From 67aa14c8e29271f640c8f1df368308403575c95f Mon Sep 17 00:00:00 2001 From: dwelch-spike <53876192+dwelch-spike@users.noreply.github.com> Date: Wed, 21 Dec 2022 11:29:39 -0800 Subject: [PATCH] asbench 1.7.0 (#83) * build: TOOLS-2257 Upgrade build std to C11 (#78) * feat: [TOOLS-2274] build on m1 (#82) * build: [TOOLS-2291] statically link openssl in mac builds (#81) Co-authored-by: Jesse S --- .../workflows/benchmark-artifact-linux.yml | 2 +- .github/workflows/benchmark-build.yml | 10 +- .github/workflows/mac-artifact.yml | 6 +- .gitignore | 1 + .vscode/c_cpp_properties.json | 18 - Makefile | 67 +- README.md | 6 +- src/include/benchmark.h | 28 +- src/include/coordinator.h | 2 +- src/include/histogram.h | 8 +- src/include/osx_pthread_barrier.h | 4 +- src/include/queue.h | 4 +- src/main/benchmark.c | 21 +- src/main/coordinator.c | 32 +- src/main/histogram.c | 38 +- src/main/latency_output.c | 25 +- src/main/osx_pthread_barrier.c | 18 +- src/main/queue.c | 24 +- src/main/transaction.c | 69 +- src/test/unit/histogram_test.c | 22 +- tso/Makefile | 39 - tso/exclude_ce.txt | 34 - tso/test.c | 834 ------------------ tso/tso.cc | 751 ---------------- 24 files changed, 200 insertions(+), 1863 deletions(-) delete mode 100644 .vscode/c_cpp_properties.json delete mode 100644 tso/Makefile delete mode 100644 tso/exclude_ce.txt delete mode 100644 tso/test.c delete mode 100644 tso/tso.cc diff --git a/.github/workflows/benchmark-artifact-linux.yml b/.github/workflows/benchmark-artifact-linux.yml index 8a49cce1..89f5d641 100644 --- a/.github/workflows/benchmark-artifact-linux.yml +++ b/.github/workflows/benchmark-artifact-linux.yml @@ -1,7 +1,7 @@ name: Build:Main on: push: - branches: [ main, mac_build ] + branches: [ main ] jobs: build-libraries: runs-on: ubuntu-20.04 diff --git a/.github/workflows/benchmark-build.yml b/.github/workflows/benchmark-build.yml index d7769136..073371e6 100644 --- a/.github/workflows/benchmark-build.yml +++ b/.github/workflows/benchmark-build.yml @@ -2,9 +2,9 @@ name: Build and Test All on: push: - branches: [ main, test-ready ] + branches: [ main, test-ready, "*-bugfix" ] pull_request: - branches: [ main, test-ready ] + branches: [ main, test-ready, "*-bugfix" ] jobs: build: runs-on: ubuntu-latest @@ -28,9 +28,9 @@ jobs: fi - name: install lcov run: | - wget "https://github.com/linux-test-project/lcov/archive/master.zip" - unzip master.zip - cd lcov-master + wget "https://github.com/linux-test-project/lcov/archive/refs/tags/v1.16.zip" + unzip v1.16.zip + cd lcov-1.16 sudo make install - name: Download libcheck uses: actions/checkout@v2 diff --git a/.github/workflows/mac-artifact.yml b/.github/workflows/mac-artifact.yml index 81a6cf36..7e4c5dca 100644 --- a/.github/workflows/mac-artifact.yml +++ b/.github/workflows/mac-artifact.yml @@ -1,9 +1,9 @@ name: Mac Artifact on: push: - branches: [actionsHub, main, test-ready ] + branches: [ main, test-ready, "bugfix-*" ] pull_request: - branches: [actionsHub] + branches: [] workflow_call: inputs: submodule: @@ -55,7 +55,7 @@ jobs: git describe --tags --always - name: Build asbench run: | - make EVENT_LIB=${{ matrix.ev-lib }} LIBUV_STATIC_PATH=/usr/local/lib LIBEVENT_STATIC_PATH=/usr/local/lib LIBEV_STATIC_PATH=/usr/local/lib + make EVENT_LIB=${{ matrix.ev-lib }} LIBUV_STATIC_PATH=/usr/local/lib LIBEVENT_STATIC_PATH=/usr/local/lib LIBEV_STATIC_PATH=/usr/local/lib OPENSSL_STATIC_PATH=/usr/local/opt/openssl/lib working-directory: ${{ steps.working-dir.outputs.value }} - name: Sanity test asbench artifact run: | diff --git a/.gitignore b/.gitignore index b29aa0bb..a26ce860 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ Version.java modules/.* target/ client_test-valgrind +.vscode # Compiled source # ################### diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json deleted file mode 100644 index 1840a0c5..00000000 --- a/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "configurations": [ - { - "name": "Linux", - "includePath": [ - "${workspaceFolder}/**", - "${env:CLIENTREPO}/**", - "${env:CLIENTREPO}/modules/common/**" - ], - "defines": [], - "compilerPath": "/usr/bin/gcc", - "cStandard": "c11", - "cppStandard": "c++17", - "intelliSenseMode": "gcc-x86" - } - ], - "version": 4 -} \ No newline at end of file diff --git a/Makefile b/Makefile index 1cca4251..4df19461 100644 --- a/Makefile +++ b/Makefile @@ -14,10 +14,20 @@ VERSION := $(shell git describe 2>/dev/null; if [ $${?} != 0 ]; then echo 'unkno ROOT = $(CURDIR) NAME = $(shell basename $(ROOT)) OS = $(shell uname) +ARCH = $(shell uname -m) + +M1_HOME_BREW = ifeq ($(OS),Darwin) - ARCH = $(shell uname -m) -else - ARCH = $(shell uname -m) + ifneq ($(wildcard /opt/homebrew),) + M1_HOME_BREW = true + endif +endif + +# M1 macs brew install openssl under /opt/homebrew/opt/openssl +# set OPENSSL_PREFIX to the prefix for your openssl if it is installed elsewhere +OPENSSL_PREFIX ?= /usr/local/opt/openssl +ifdef M1_HOME_BREW + OPENSSL_PREFIX = /opt/homebrew/opt/openssl endif CMAKE3_CHECK := $(shell cmake3 --help > /dev/null 2>&1 || (echo "cmake3 not found")) @@ -33,7 +43,7 @@ else endif endif -CFLAGS = -std=gnu99 -Wall -fPIC -O3 -MMD -MP +CFLAGS = -std=gnu11 -Wall -fPIC -O3 -MMD -MP CFLAGS += -fno-common -fno-strict-aliasing CFLAGS += -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_GNU_SOURCE CFLAGS += -DTOOL_VERSION=\"$(VERSION)\" @@ -47,24 +57,6 @@ DIR_LIBCYAML_BUILD ?= $(ROOT)/modules/libcyaml/$(DIR_LIBCYAML_BUILD_REL) DIR_C_CLIENT ?= $(ROOT)/modules/c-client C_CLIENT_LIB := $(DIR_C_CLIENT)/target/$(PLATFORM)/lib/libaerospike.a -DIR_TSO := $(ROOT)/tso -TSO_LIB := $(DIR_TSO)/tso.so - -ifeq ($(ARCH),aarch64) - # Plugin configuration. - PLUGIN_ENABLE = yes - PLUGIN_FIX_ASM = yes - PLUGIN_FIX_BUILT_IN = yes - PLUGIN_PROFILING = no - - TSO_FLAGS = -fplugin=$(TSO_LIB) -fplugin-arg-tso-enable=$(PLUGIN_ENABLE) \ - -fplugin-arg-tso-exclude=$(DIR_TSO)/exclude_ce.txt -fplugin-arg-tso-exclude=$(DIR_TSO)/exclude_ce.txt \ - -fplugin-arg-tso-track-deps=yes -fplugin-arg-tso-fix-asm=$(PLUGIN_FIX_ASM) \ - -fplugin-arg-tso-fix-built-in=$(PLUGIN_FIX_BUILT_IN) -fplugin-arg-tso-profiling=$(PLUGIN_PROFILING) - - CFLAGS += $(TSO_FLAGS) -endif - DIR_INCLUDE = $(ROOT)/src/include DIR_INCLUDE += $(ROOT)/modules DIR_INCLUDE += $(DIR_LIBYAML)/include @@ -77,7 +69,7 @@ INCLUDES = $(DIR_INCLUDE:%=-I%) DIR_ENV = $(ROOT)/env -ifneq ($(ARCH),$(filter $(ARCH),ppc64 ppc64le aarch64)) +ifneq ($(ARCH),$(filter $(ARCH),ppc64 ppc64le aarch64 arm64)) CFLAGS += -march=nocona endif @@ -103,7 +95,7 @@ LDFLAGS = -L/usr/local/lib ifeq ($(OPENSSL_STATIC_PATH),) ifeq ($(OS),Darwin) - LDFLAGS += -L/usr/local/opt/openssl/lib + LDFLAGS += -L$(OPENSSL_PREFIX)/lib endif LDFLAGS += -lssl LDFLAGS += -lcrypto @@ -146,6 +138,15 @@ endif LDFLAGS += -lm -lz +# if this is an m1 mac using homebrew +# add the new homebrew lib and include path +# incase dependencies are installed there +# NOTE: /usr/local/include will be checked first +ifdef M1_HOME_BREW + LDFLAGS += -L/opt/homebrew/lib + INCLUDES += -I/opt/homebrew/include +endif + TEST_LDFLAGS = $(LDFLAGS) -Ltest_target/lib -lcheck BUILD_LDFLAGS = $(LDFLAGS) -Ltarget/lib @@ -231,7 +232,6 @@ target/libbench.a: $(OBJECTS) clean: rm -rf target test_target $(DIR_ENV) $(MAKE) clean -C $(DIR_LIBCYAML) - $(MAKE) clean -C $(DIR_TSO) if [ -d $(DIR_LIBYAML_BUILD) ]; then $(MAKE) clean -C $(DIR_LIBYAML_BUILD); fi rm -rf $(DIR_LIBYAML_BUILD) $(MAKE) -C $(DIR_C_CLIENT) clean @@ -251,15 +251,10 @@ target/lib: | target target/obj/hdr_histogram: | target/obj mkdir $@ -$(TSO_LIB): - if [ $(ARCH) = "aarch64" ]; then \ - $(MAKE) -C $(DIR_TSO); \ - fi - -target/obj/%.o: src/main/%.c | $(TSO_LIB) target/obj +target/obj/%.o: src/main/%.c | target/obj $(CC) $(BUILD_CFLAGS) -o $@ -c $< $(INCLUDES) -target/obj/hdr_histogram%.o: modules/hdr_histogram/%.c | $(TSO_LIB) target/obj/hdr_histogram +target/obj/hdr_histogram%.o: modules/hdr_histogram/%.c | target/obj/hdr_histogram $(CC) $(BUILD_CFLAGS) -o $@ -c $< $(INCLUDES) target/lib/libyaml.a: $(DIR_LIBYAML_BUILD)/libyaml.a | target/lib @@ -297,7 +292,7 @@ test: unit integration # unit testing .PHONY: unit -unit: | $(TSO_LIB) test_target/test +unit: | test_target/test @echo @#valgrind --tool=memcheck --leak-check=full --track-origins=yes ./test_target/test @./test_target/test @@ -317,13 +312,13 @@ test_target/obj/hdr_histogram: | test_target/obj test_target/lib: | test_target mkdir $@ -test_target/obj/unit/%.o: src/test/unit/%.c | $(TSO_LIB) test_target/obj/unit +test_target/obj/unit/%.o: src/test/unit/%.c | test_target/obj/unit $(CC) $(TEST_CFLAGS) -o $@ -c $< $(INCLUDES) -test_target/obj/%.o: src/main/%.c | $(TSO_LIB) test_target/obj +test_target/obj/%.o: src/main/%.c | test_target/obj $(CC) $(TEST_CFLAGS) -fprofile-arcs -ftest-coverage -coverage -o $@ -c $< $(INCLUDES) -test_target/obj/hdr_histogram%.o: modules/hdr_histogram/%.c | $(TSO_LIB) test_target/obj/hdr_histogram +test_target/obj/hdr_histogram%.o: modules/hdr_histogram/%.c | test_target/obj/hdr_histogram $(CC) $(TEST_CFLAGS) -fprofile-arcs -ftest-coverage -coverage -o $@ -c $< $(INCLUDES) test_target/test: $(TEST_OBJECTS) test_target/lib/libcyaml.a test_target/lib/libyaml.a $(C_CLIENT_LIB) | test_target diff --git a/README.md b/README.md index aefee88d..912e3355 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,18 @@ For more information on how to use the benchmark tool and configure it to your n ### Dependencies -Before building, you need to have a local copy of the [Aerospike C Client](https://github.com/aerospike/aerospike-client-c) and to have built it. After this, set the environment variable `CLIENTREPO` to point to the directory containing the built C client. +Asbackup builds the [Aerospike C Client](https://github.com/aerospike/aerospike-client-c) as a submodule. +Make sure all the C clients build dependencies are installed before starting the asbackup build. Additional external dependencies: * OpenSSL (libssl and libcrypto) * libyaml-devel * libev, libuv, or libevent, if an event library is used + * If on macOS install via `brew` This project uses git submodules, so you will need to initialize and update submodules before building this project. - $ git submodule update --init + $ git submodule update --init --recursive ### Build diff --git a/src/include/benchmark.h b/src/include/benchmark.h index 7fe6d501..84ed5299 100644 --- a/src/include/benchmark.h +++ b/src/include/benchmark.h @@ -21,6 +21,8 @@ ******************************************************************************/ #pragma once +#include + #include #include #include @@ -111,18 +113,18 @@ typedef struct clientdata_s { aerospike client; // TODO make all these counts thread-local to reduce contention - uint64_t read_hit_count; - uint64_t read_miss_count; - uint64_t read_timeout_count; - uint64_t read_error_count; + _Atomic(uint64_t) read_hit_count; + _Atomic(uint64_t) read_miss_count; + _Atomic(uint64_t) read_timeout_count; + _Atomic(uint64_t) read_error_count; - uint64_t write_count; - uint64_t write_timeout_count; - uint64_t write_error_count; + _Atomic(uint64_t) write_count; + _Atomic(uint64_t) write_timeout_count; + _Atomic(uint64_t) write_error_count; - uint64_t udf_count; - uint64_t udf_timeout_count; - uint64_t udf_error_count; + _Atomic(uint64_t) udf_count; + _Atomic(uint64_t) udf_timeout_count; + _Atomic(uint64_t) udf_error_count; FILE* hdr_comp_read_output; FILE* hdr_text_read_output; @@ -162,7 +164,7 @@ typedef struct threaddata_s { // thread index: [0, n_threads) uint32_t t_idx; // which workload stage we're currrently on - uint32_t stage_idx; + _Atomic(uint32_t) stage_idx; /* * note: to stop threads, tdata->finished must be set before tdata->do_work @@ -170,10 +172,10 @@ typedef struct threaddata_s { */ // when true, things continue as normal, when set to false, worker // threads will stop doing what they're doing and await orders - bool do_work; + atomic_bool do_work; // when true, all threads will stop doing work and close (note that do_work // must also be set to false for this to work) - bool finished; + atomic_bool finished; // the following arguments are initialized for each stage as_record fixed_full_record; diff --git a/src/include/coordinator.h b/src/include/coordinator.h index 75266d88..d10464e3 100644 --- a/src/include/coordinator.h +++ b/src/include/coordinator.h @@ -60,7 +60,7 @@ typedef struct thr_coordinator_s { // stage, plus this thread (which decrements this variable after returning // from the as_sleep call, i.e. once the minimum required duration of the // stage has elapsed) - uint32_t unfinished_threads; + _Atomic(uint32_t) unfinished_threads; } thr_coord_t; struct coordinator_worker_args_s { diff --git a/src/include/histogram.h b/src/include/histogram.h index c7522e9c..6eaaeb8a 100644 --- a/src/include/histogram.h +++ b/src/include/histogram.h @@ -36,7 +36,7 @@ typedef struct rangespec_s { typedef struct histogram_s { - uint32_t* buckets; + _Atomic(uint32_t)* buckets; struct bucket_range_desc_s* bounds; // name to be printed before each output line of this histogram @@ -48,9 +48,9 @@ typedef struct histogram_s { delay_t range_max; // a count of the number of data points below the minimum bucket - uint32_t underflow_cnt; + _Atomic(uint32_t) underflow_cnt; // a count of the number of data points above the maximum bucket - uint32_t overflow_cnt; + _Atomic(uint32_t) overflow_cnt; // the number of elements in the bounds array uint32_t n_bounds; @@ -102,7 +102,7 @@ uint64_t histogram_calc_total(const histogram_t* h); /* * insert the delay into the histogram in a thread-safe manner */ -void histogram_add(histogram_t* h, delay_t elapsed_us); +void histogram_incr(histogram_t* h, delay_t elapsed_us); /* * returns the count in the bucket of the given index diff --git a/src/include/osx_pthread_barrier.h b/src/include/osx_pthread_barrier.h index 0cceb917..7ff90357 100644 --- a/src/include/osx_pthread_barrier.h +++ b/src/include/osx_pthread_barrier.h @@ -42,10 +42,10 @@ typedef struct pthread_barrier { // the number of threads that must meet at the barrier uint32_t count; // the count of threads that have met the barrier - uint32_t in; + _Atomic(uint32_t) in; // the round number (starts at 0, incremented each time all threads reach // the barrier) - uint32_t current_round; + _Atomic(uint32_t) current_round; } pthread_barrier_t; int32_t pthread_barrier_init(pthread_barrier_t*, void* attr, diff --git a/src/include/queue.h b/src/include/queue.h index bf6d176a..2e188d50 100644 --- a/src/include/queue.h +++ b/src/include/queue.h @@ -29,12 +29,12 @@ * queue */ typedef struct queue_s { - void** items; + _Atomic(void*)* items; // length of items - 1 (length is always a power of 2) uint32_t len_mask; // the position of the next element to be inserted (modulo len) - uint32_t __attribute__((aligned(8))) pos; + _Atomic(uint32_t) __attribute__((aligned(8))) pos; // the position of the head, i.e. the next element to be popped (modulo len) uint32_t __attribute__((aligned(8))) head; diff --git a/src/main/benchmark.c b/src/main/benchmark.c index a392f385..9ecffa27 100644 --- a/src/main/benchmark.c +++ b/src/main/benchmark.c @@ -76,6 +76,17 @@ run_benchmark(args_t* args) data.latency = args->latency; data.debug = args->debug; data.async_max_commands = args->async_max_commands; + + atomic_init(&data.read_hit_count, 0); + atomic_init(&data.read_miss_count, 0); + atomic_init(&data.read_timeout_count, 0); + atomic_init(&data.read_error_count, 0); + atomic_init(&data.write_count, 0); + atomic_init(&data.write_timeout_count, 0); + atomic_init(&data.write_error_count, 0); + atomic_init(&data.udf_count, 0); + atomic_init(&data.udf_timeout_count, 0); + atomic_init(&data.udf_error_count, 0); time_t start_time; hdr_timespec start_timespec; @@ -310,10 +321,10 @@ init_tdata(const args_t* args, cdata_t* cdata, thr_coord_t* coord, tdata->random = as_random_instance(); tdata->t_idx = t_idx; // always start on the first stage - tdata->stage_idx = 0; + atomic_init(&tdata->stage_idx, 0); - tdata->do_work = true; - tdata->finished = false; + atomic_init(&tdata->do_work, true); + atomic_init(&tdata->finished, false); as_policies* p = &tdata->policies; as_policies_init(p); @@ -475,8 +486,8 @@ _run(const args_t* args, cdata_t* cdata) // make thread exit // note that we must update finished before do_work, since we don't // want any of the threads to enter the pthread barrier - as_store_uint8((uint8_t*) &tdatas[i]->finished, true); - as_store_uint8((uint8_t*) &tdatas[i]->do_work, false); + tdatas[i]->finished = true; + tdatas[i]->do_work = false; } pthread_join(threads[i], NULL); destroy_tdata(tdatas[i]); diff --git a/src/main/coordinator.c b/src/main/coordinator.c index 59ca4911..443afe41 100644 --- a/src/main/coordinator.c +++ b/src/main/coordinator.c @@ -7,8 +7,6 @@ #include -#include - #include #include @@ -59,7 +57,7 @@ thr_coordinator_init(thr_coord_t* coord, uint32_t n_threads) pthread_barrier_init(&coord->barrier, NULL, n_threads + 1); coord->n_threads = n_threads; // unfinished threads includes this thread - coord->unfinished_threads = n_threads + 1; + atomic_init(&coord->unfinished_threads, n_threads + 1); return 0; } @@ -88,11 +86,7 @@ thr_coordinator_complete(thr_coord_t* coord) // first acquire the complete lock pthread_mutex_lock(&coord->c_lock); - uint32_t rem_threads = coord->unfinished_threads - 1; - coord->unfinished_threads = rem_threads; - // commit this write before signaling the condition variable and releasing - // the lock, since it was not atomic - as_fence_seq(); + uint32_t rem_threads = atomic_fetch_sub(&coord->unfinished_threads, 1) - 1; if (rem_threads == 0) { pthread_cond_broadcast(&coord->complete); @@ -107,11 +101,11 @@ thr_coordinator_sleep(thr_coord_t* coord, uint32_t rem_threads; pthread_mutex_lock(&coord->c_lock); - // since condition variable waits may supriously return, we have to check + // since condition variable waits may spuriously return, we have to check // that the time hasn't expired each time. we also want to check that there // are still unfinished threads left, since if this value is 0, we don't // want to continue waiting any longer - while ((rem_threads = as_load_uint32(&coord->unfinished_threads)) != 0 && + while ((rem_threads = coord->unfinished_threads) != 0 && _has_not_happened(wakeup_time)) { pthread_cond_timedwait(&coord->complete, &coord->c_lock, wakeup_time); @@ -168,7 +162,7 @@ coordinator_worker(void* udata) stage_random_pause(&random, &cdata->stages.stages[stage_idx]); // reset unfinished_threads count - as_store_uint32(&coord->unfinished_threads, n_threads + 1); + coord->unfinished_threads = n_threads + 1; _release_threads(coord, tdatas, n_threads); } @@ -223,7 +217,7 @@ _halt_threads(thr_coord_t* coord, tdata_t** tdatas, uint32_t n_threads) { for (uint32_t i = 0; i < n_threads; i++) { - as_store_uint8((uint8_t*) &tdatas[i]->do_work, false); + tdatas[i]->do_work = false; } pthread_barrier_wait(&coord->barrier); } @@ -237,7 +231,7 @@ _terminate_threads(thr_coord_t* coord, tdata_t** tdatas, uint32_t n_threads) { for (uint32_t i = 0; i < n_threads; i++) { - as_store_uint8((uint8_t*) &tdatas[i]->finished, true); + tdatas[i]->finished = true; } pthread_barrier_wait(&coord->barrier); } @@ -251,7 +245,7 @@ _release_threads(thr_coord_t* coord, tdata_t** tdatas, uint32_t n_threads) { for (uint32_t i = 0; i < n_threads; i++) { - as_store_uint8((uint8_t*) &tdatas[i]->do_work, true); + tdatas[i]->do_work = true; } pthread_barrier_wait(&coord->barrier); } @@ -267,11 +261,7 @@ _finish_req_duration(thr_coord_t* coord) { pthread_mutex_lock(&coord->c_lock); - uint32_t rem_threads = coord->unfinished_threads - 1; - coord->unfinished_threads = rem_threads; - // commit this write before signaling the condition variable and releasing - // the lock, since it was not atomic - as_fence_seq(); + uint32_t rem_threads = atomic_fetch_sub(&coord->unfinished_threads, 1) - 1; // if we're the last thread finishing, notify any threads waiting on the // complete condition variable @@ -282,7 +272,7 @@ _finish_req_duration(thr_coord_t* coord) // wait for the rest of the threads to complete while (rem_threads != 0) { pthread_cond_wait(&coord->complete, &coord->c_lock); - rem_threads = as_load_uint32(&coord->unfinished_threads); + rem_threads = coord->unfinished_threads; } pthread_mutex_unlock(&coord->c_lock); } @@ -308,7 +298,5 @@ clear_cdata_counts(cdata_t* cdata) cdata->udf_count = 0; cdata->udf_timeout_count = 0; cdata->udf_error_count = 0; - - as_fence_seq(); } diff --git a/src/main/histogram.c b/src/main/histogram.c index a5375a64..1254d036 100644 --- a/src/main/histogram.c +++ b/src/main/histogram.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include "histogram.h" #include "common.h" @@ -68,10 +68,10 @@ LOCAL_HELPER void _print_header(const histogram_t* h, uint64_t period_duration_u STATIC_ASSERT(offsetof(histogram_t, underflow_cnt) + sizeof(uint32_t) == offsetof(histogram_t, overflow_cnt)); -inline uint32_t* +inline _Atomic(uint32_t)* __attribute__((always_inline)) __histogram_get_bucket(histogram_t* h, int64_t idx) { - return (idx < 0) ? (((uint32_t*) (((ptr_int_t) h) + offsetof(histogram_t, underflow_cnt) + return (idx < 0) ? (((_Atomic(uint32_t)*) (((ptr_int_t) h) + offsetof(histogram_t, underflow_cnt) + 2 * sizeof(uint32_t))) + idx) : &h->buckets[idx]; } @@ -112,13 +112,18 @@ histogram_init(histogram_t* h, size_t n_ranges, delay_t lowb, rangespec_t* range range_start = range_end; } - h->buckets = (uint32_t*) cf_calloc(total_buckets, sizeof(uint32_t)); + h->buckets = (_Atomic(uint32_t)*) cf_calloc(total_buckets, sizeof(_Atomic(uint32_t))); + + for (uint32_t i = 0; i < total_buckets; i++) { + atomic_init(&h->buckets[i], 0); + } + h->bounds = b; h->name = NULL; h->range_min = lowb; h->range_max = range_start; - h->underflow_cnt = 0; - h->overflow_cnt = 0; + atomic_init(&h->underflow_cnt, 0); + atomic_init(&h->overflow_cnt, 0); h->n_bounds = n_ranges; h->n_buckets = total_buckets; return 0; @@ -137,7 +142,10 @@ histogram_free(histogram_t* h) void histogram_clear(histogram_t* h) { - memset(h->buckets, 0, h->n_buckets * sizeof(uint32_t)); + for (uint32_t i = 0; i < h->n_buckets; i++) { + h->buckets[i] = 0; + } + h->underflow_cnt = 0; h->overflow_cnt = 0; } @@ -153,20 +161,20 @@ histogram_set_name(histogram_t* h, const char* name) } void -histogram_add(histogram_t* h, delay_t elapsed_us) +histogram_incr(histogram_t* h, delay_t elapsed_us) { int32_t bucket_idx = _histogram_get_index(h, elapsed_us); - uint32_t* bucket = __histogram_get_bucket(h, bucket_idx); + _Atomic(uint32_t)* bucket = __histogram_get_bucket(h, bucket_idx); - as_incr_uint32(bucket); + (*bucket)++; } delay_t histogram_get_count(histogram_t* h, uint64_t bucket_idx) { - uint32_t* bucket = __histogram_get_bucket(h, bucket_idx); + _Atomic(uint32_t)* bucket = __histogram_get_bucket(h, bucket_idx); - return as_load_uint32(bucket); + return *bucket; } uint64_t @@ -229,16 +237,16 @@ histogram_print_clear(histogram_t* h, uint64_t period_duration_us, FILE* out_fil * counts that were read in and storing the bucket values in an array * of counts (to be read later when the individual buckets are printed) */ - uint32_t underflow_cnt = as_fas_uint32(&h->underflow_cnt, 0); + uint32_t underflow_cnt = atomic_exchange(&h->underflow_cnt, 0); total_cnt += underflow_cnt; for (uint32_t idx = 0; idx < h->n_buckets; idx++) { // atomic swap 0 in for the bucket value - uint32_t cnt = as_fas_uint32(&h->buckets[idx], 0); + uint32_t cnt = atomic_exchange(&h->buckets[idx], 0); cnts[idx] = cnt; total_cnt += cnt; } - uint32_t overflow_cnt = as_fas_uint32(&h->overflow_cnt, 0); + uint32_t overflow_cnt = atomic_exchange(&h->overflow_cnt, 0); total_cnt += overflow_cnt; _print_header(h, period_duration_us, total_cnt, out_file); diff --git a/src/main/latency_output.c b/src/main/latency_output.c index a85f9f8e..cd8a9512 100644 --- a/src/main/latency_output.c +++ b/src/main/latency_output.c @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -397,7 +396,7 @@ periodic_output_worker(void* udata) goto do_sleep; - while (!as_load_uint8((uint8_t*) &tdata->finished)) { + while (!tdata->finished) { clock_gettime(COORD_CLOCK, &wake_up); time = timespec_to_us(&wake_up); @@ -405,16 +404,16 @@ periodic_output_worker(void* udata) int64_t elapsed = time - prev_time; prev_time = time; - uint64_t write_current = as_fas_uint64(&cdata->write_count, 0); - uint64_t write_timeout_current = as_fas_uint64(&cdata->write_timeout_count, 0); - uint64_t write_error_current = as_fas_uint64(&cdata->write_error_count, 0); - uint64_t read_hit_current = as_fas_uint64(&cdata->read_hit_count, 0); - uint64_t read_miss_current = as_fas_uint64(&cdata->read_miss_count, 0); - uint64_t read_timeout_current = as_fas_uint64(&cdata->read_timeout_count, 0); - uint64_t read_error_current = as_fas_uint64(&cdata->read_error_count, 0); - uint64_t udf_current = as_fas_uint64(&cdata->udf_count, 0); - uint64_t udf_timeout_current = as_fas_uint64(&cdata->udf_timeout_count, 0); - uint64_t udf_error_current = as_fas_uint64(&cdata->udf_error_count, 0); + uint64_t write_current = atomic_exchange(&cdata->write_count, 0); + uint64_t write_timeout_current = atomic_exchange(&cdata->write_timeout_count, 0); + uint64_t write_error_current = atomic_exchange(&cdata->write_error_count, 0); + uint64_t read_hit_current = atomic_exchange(&cdata->read_hit_count, 0); + uint64_t read_miss_current = atomic_exchange(&cdata->read_miss_count, 0); + uint64_t read_timeout_current = atomic_exchange(&cdata->read_timeout_count, 0); + uint64_t read_error_current = atomic_exchange(&cdata->read_error_count, 0); + uint64_t udf_current = atomic_exchange(&cdata->udf_count, 0); + uint64_t udf_timeout_current = atomic_exchange(&cdata->udf_timeout_count, 0); + uint64_t udf_error_current = atomic_exchange(&cdata->udf_error_count, 0); cdata->period_begin = time; @@ -512,7 +511,7 @@ periodic_output_worker(void* udata) thr_coordinator_wait(coord); // check to make sure we're not finished before resetting everything - if (!as_load_uint8((uint8_t*) &tdata->finished)) { + if (!tdata->finished) { // first indicate that this thread has no required work to do thr_coordinator_complete(coord); // so the logger doesn't immediately go back to waiting again diff --git a/src/main/osx_pthread_barrier.c b/src/main/osx_pthread_barrier.c index c49ba671..d4afb104 100644 --- a/src/main/osx_pthread_barrier.c +++ b/src/main/osx_pthread_barrier.c @@ -3,11 +3,13 @@ #include #include #include +#include #include #define BARRIER_IN_THRESHOLD ((2147483647 * 2U + 1U) / 2) +#define atomic_add_fetch_explicit(object, operand, order) atomic_fetch_add_explicit(object, operand, order) + operand int32_t pthread_barrier_init(pthread_barrier_t* barrier, void* attr, @@ -24,8 +26,8 @@ pthread_barrier_init(pthread_barrier_t* barrier, void* attr, pthread_mutex_init(&barrier->lock, NULL); barrier->count = count; - barrier->in = 0; - barrier->current_round = 0; + atomic_init(&barrier->in, 0); + atomic_init(&barrier->current_round, 0); return 0; } @@ -44,10 +46,10 @@ pthread_barrier_wait(pthread_barrier_t* barrier) // read the current round before incrementing the in variable, since we // require that current round be correct, and incrementing in before reading // current_round would induce a race - uint32_t round = __atomic_load_n(&barrier->current_round, __ATOMIC_ACQUIRE); + uint32_t round = atomic_load_explicit(&barrier->current_round, memory_order_acquire); // increment the in variable with relaxed memory ordering since this is the // only modification we've made to memory - uint32_t i = __atomic_add_fetch(&barrier->in, 1, __ATOMIC_RELAXED); + uint32_t i = atomic_add_fetch_explicit(&barrier->in, 1, memory_order_relaxed); uint32_t count = barrier->count; if (i < count) { @@ -57,11 +59,11 @@ pthread_barrier_wait(pthread_barrier_t* barrier) pthread_mutex_lock(&barrier->lock); // read the current round before waiting on the condition variable uint32_t cur_round = - __atomic_load_n(&barrier->current_round, __ATOMIC_ACQUIRE); + atomic_load_explicit(&barrier->current_round, memory_order_acquire); while (cur_round == round) { pthread_cond_wait(&barrier->cond, &barrier->lock); - cur_round = __atomic_load_n(&barrier->current_round, __ATOMIC_ACQUIRE); + cur_round = atomic_load_explicit(&barrier->current_round, memory_order_acquire); } pthread_mutex_unlock(&barrier->lock); @@ -69,11 +71,11 @@ pthread_barrier_wait(pthread_barrier_t* barrier) else { // reset the in-thread count to zero, preventing any of the other // threads at the barrier from leaving before the round is incremented - __atomic_store_n(&barrier->in, 0, __ATOMIC_RELAXED); + atomic_store_explicit(&barrier->in, 0, memory_order_relaxed); // go to the next round, allowing all other threads waiting at the // barrier to leave. At this point, the state of the barrier is // completely reset - __atomic_store_n(&barrier->current_round, round + 1, __ATOMIC_RELEASE); + atomic_store_explicit(&barrier->current_round, round + 1, memory_order_release); // acquire the condition lock so no thread can wait after checking the // condition and after the broadcast wakeup is executed diff --git a/src/main/queue.c b/src/main/queue.c index c041ae64..42df6caa 100644 --- a/src/main/queue.c +++ b/src/main/queue.c @@ -4,8 +4,10 @@ // #include +#include + +#include -#include #include #include @@ -33,10 +35,15 @@ queue_init(queue_t* q, uint32_t q_len) // len is the next power of 2 strictly greater than q_len uint32_t len = next_pow2(q_len); - q->items = (void**) cf_calloc(len, sizeof(void*)); + q->items = (_Atomic(void*)*) cf_calloc(len, sizeof(_Atomic(void*))); + + for(uint32_t i = 0; i < len; i++) { + atomic_init(&q->items[i], NULL); + } + q->len_mask = len - 1; - q->pos = 0; q->head = 0; + atomic_init(&q->pos, 0); return 0; } @@ -50,8 +57,9 @@ queue_free(queue_t* q) void queue_push(queue_t* q, void* item) { - uint32_t pos = as_faa_uint32(&q->pos, 1); - as_store_ptr(&q->items[pos & q->len_mask], item); + uint32_t pos = atomic_fetch_add(&q->pos, 1); + // no race condition because 'head' is only incremented in pop if item is not null + q->items[pos & q->len_mask] = item; } @@ -60,12 +68,10 @@ queue_pop(queue_t* q) { void* item; uint32_t head = q->head; - uint32_t pos = as_load_uint32(&q->pos); // Since uint32_t can overflow with only ~4B transactions, use !=, not < - if (head != pos) { - item = (void*) as_fas_uint64((uint64_t*) &q->items[head & q->len_mask], - (uint64_t) NULL); + if (head != q->pos) { + item = atomic_exchange(&q->items[head & q->len_mask], NULL); // can be non-atomic since this thread is the only modifier of head q->head += (item != NULL); return item; diff --git a/src/main/transaction.c b/src/main/transaction.c index 8b781b4a..bb0a6306 100644 --- a/src/main/transaction.c +++ b/src/main/transaction.c @@ -10,7 +10,6 @@ #include #endif -#include #include #include #include @@ -172,8 +171,8 @@ transaction_worker(void* udata) cdata_t* cdata = tdata->cdata; thr_coord_t* coord = tdata->coord; - while (!as_load_uint8((uint8_t*) &tdata->finished)) { - uint32_t stage_idx = as_load_uint32(&tdata->stage_idx); + while (!tdata->finished) { + uint32_t stage_idx = tdata->stage_idx; stage_t* stage = &cdata->stages.stages[stage_idx]; init_stage(cdata, tdata, stage); @@ -185,7 +184,7 @@ transaction_worker(void* udata) do_sync_workload(tdata, cdata, coord, stage); } // check tdata->finished before locking - if (as_load_uint8((uint8_t*) &tdata->finished)) { + if (tdata->finished) { break; } terminate_stage(cdata, tdata, stage); @@ -232,9 +231,9 @@ _record_read(cdata_t* cdata, uint64_t dt_us) hdr_record_value_atomic(cdata->read_hdr, dt_us); } if (cdata->histogram_output != NULL || cdata->hdr_comp_read_output != NULL) { - histogram_add(&cdata->read_histogram, dt_us); + histogram_incr(&cdata->read_histogram, dt_us); } - as_incr_uint64(&cdata->read_hit_count); + cdata->read_hit_count++; } LOCAL_HELPER void @@ -244,9 +243,9 @@ _record_write(cdata_t* cdata, uint64_t dt_us) hdr_record_value_atomic(cdata->write_hdr, dt_us); } if (cdata->histogram_output != NULL || cdata->hdr_comp_write_output != NULL) { - histogram_add(&cdata->write_histogram, dt_us); + histogram_incr(&cdata->write_histogram, dt_us); } - as_incr_uint64(&cdata->write_count); + cdata->write_count++; } LOCAL_HELPER void @@ -256,9 +255,9 @@ _record_udf(cdata_t* cdata, uint64_t dt_us) hdr_record_value_atomic(cdata->udf_hdr, dt_us); } if (cdata->histogram_output != NULL || cdata->hdr_comp_udf_output != NULL) { - histogram_add(&cdata->udf_histogram, dt_us); + histogram_incr(&cdata->udf_histogram, dt_us); } - as_incr_uint64(&cdata->udf_count); + cdata->udf_count++; } @@ -285,10 +284,10 @@ _write_record_sync(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // Handle error conditions. if (status == AEROSPIKE_ERR_TIMEOUT) { - as_incr_uint64(&cdata->write_timeout_count); + cdata->write_timeout_count++; } else { - as_incr_uint64(&cdata->write_error_count); + cdata->write_error_count++; if (cdata->debug) { blog_error("Write error: ns=%s set=%s key=%d bin=%s code=%d " @@ -332,13 +331,13 @@ _read_record_sync(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // Handle error conditions. if (status == AEROSPIKE_ERR_RECORD_NOT_FOUND) { - as_incr_uint64(&cdata->read_miss_count); + cdata->read_miss_count++; } else if (status == AEROSPIKE_ERR_TIMEOUT) { - as_incr_uint64(&cdata->read_timeout_count); + cdata->read_timeout_count++; } else { - as_incr_uint64(&cdata->read_error_count); + cdata->read_error_count++; if (cdata->debug) { blog_error("Read error: ns=%s set=%s key=%d bin=%s code=%d " @@ -373,10 +372,10 @@ _batch_read_record_sync(tdata_t* tdata, cdata_t* cdata, // Handle error conditions. if (status == AEROSPIKE_ERR_TIMEOUT) { - as_incr_uint64(&cdata->read_timeout_count); + cdata->read_timeout_count++; } else { - as_incr_uint64(&cdata->read_error_count); + cdata->read_error_count++; if (cdata->debug) { blog_error("Batch read error: ns=%s set=%s bin=%s code=%d " @@ -426,10 +425,10 @@ _apply_udf_sync(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // Handle error conditions. if (status == AEROSPIKE_ERR_TIMEOUT) { - as_incr_uint64(&cdata->udf_timeout_count); + cdata->udf_timeout_count++; } else { - as_incr_uint64(&cdata->udf_error_count); + cdata->udf_error_count++; if (cdata->debug) { blog_error("UDF error: ns=%s set=%s key=%d bin=%s code=%d " @@ -778,7 +777,7 @@ linear_writes(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, cdata->transaction_worker_threads, &start_key, &end_key); key_val = start_key; - while (as_load_uint8((uint8_t*) &tdata->do_work) && + while (tdata->do_work && key_val < end_key) { // create a record with given key @@ -810,7 +809,7 @@ random_read_write(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // to finish as soon as the timer runs out thr_coordinator_complete(coord); - while (as_load_uint8((uint8_t*) &tdata->do_work)) { + while (tdata->do_work) { // roll the die uint32_t die = _random_fp(tdata->random); @@ -838,7 +837,7 @@ random_read_write_udf(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // to finish as soon as the timer runs out thr_coordinator_complete(coord); - while (as_load_uint8((uint8_t*) &tdata->do_work)) { + while (tdata->do_work) { // roll the die uint32_t die = _random_fp(tdata->random); @@ -871,7 +870,7 @@ linear_deletes(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, cdata->transaction_worker_threads, &start_key, &end_key); key_val = start_key; - while (as_load_uint8((uint8_t*) &tdata->do_work) && + while (tdata->do_work && key_val < end_key) { // create a record with given key @@ -906,7 +905,7 @@ LOCAL_HELPER void random_read_write_delete(tdata_t* tdata, cdata_t* cdata, // to finish as soon as the timer runs out thr_coordinator_complete(coord); - while (as_load_uint8((uint8_t*) &tdata->do_work)) { + while (tdata->do_work) { // roll the die uint32_t die = _random_fp(tdata->random); @@ -1042,24 +1041,24 @@ _async_listener(as_error* err, void* udata, as_event_loop* event_loop) else { if (err->code == AEROSPIKE_ERR_TIMEOUT) { if (adata->op == read_op) { - as_incr_uint64(&cdata->read_timeout_count); + cdata->read_timeout_count++; } else if (adata->op == udf_op) { - as_incr_uint64(&cdata->udf_timeout_count); + cdata->udf_timeout_count++; } else { - as_incr_uint64(&cdata->write_timeout_count); + cdata->write_timeout_count++; } } else { if (adata->op == read_op) { - as_incr_uint64(&cdata->read_error_count); + cdata->read_error_count++; } else if (adata->op == udf_op) { - as_incr_uint64(&cdata->udf_error_count); + cdata->udf_error_count++; } else { - as_incr_uint64(&cdata->write_error_count); + cdata->write_error_count++; } if (cdata->debug) { @@ -1153,7 +1152,7 @@ linear_writes_async(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, key_val = stage->key_start; end_key = stage->key_end; - while (as_load_uint8((uint8_t*) &tdata->do_work) && + while (tdata->do_work && key_val < end_key) { adata = queue_pop_wait(adata_q); @@ -1200,7 +1199,7 @@ random_read_write_async(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // are finished with our required tasks and can be stopped whenever thr_coordinator_complete(coord); - while (as_load_uint8((uint8_t*) &tdata->do_work)) { + while (tdata->do_work) { adata = queue_pop_wait(adata_q); @@ -1245,7 +1244,7 @@ random_read_write_udf_async(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, // are finished with our required tasks and can be stopped whenever thr_coordinator_complete(coord); - while (as_load_uint8((uint8_t*) &tdata->do_work)) { + while (tdata->do_work) { adata = queue_pop_wait(adata_q); @@ -1285,7 +1284,7 @@ linear_deletes_async(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coord, key_val = stage->key_start; end_key = stage->key_end; - while (as_load_uint8((uint8_t*) &tdata->do_work) && + while (tdata->do_work && key_val < end_key) { adata = queue_pop_wait(adata_q); @@ -1336,7 +1335,7 @@ random_read_write_delete_async(tdata_t* tdata, cdata_t* cdata, thr_coord_t* coor // are finished with our required tasks and can be stopped whenever thr_coordinator_complete(coord); - while (as_load_uint8((uint8_t*) &tdata->do_work)) { + while (tdata->do_work) { adata = queue_pop_wait(adata_q); diff --git a/src/test/unit/histogram_test.c b/src/test/unit/histogram_test.c index b2eb6b20..114ba128 100644 --- a/src/test/unit/histogram_test.c +++ b/src/test/unit/histogram_test.c @@ -134,7 +134,7 @@ END_TEST START_TEST(simple_insert_one) { histogram_t* h = &hist; - histogram_add(h, 1); + histogram_incr(h, 1); } END_TEST @@ -145,7 +145,7 @@ END_TEST START_TEST(simple_query_one) { histogram_t* h = &hist; - histogram_add(h, 1); + histogram_incr(h, 1); ck_assert_int_eq(histogram_get_count(h, 0), 1); } END_TEST @@ -157,7 +157,7 @@ END_TEST START_TEST(simple_query_total) { histogram_t* h = &hist; - histogram_add(h, 1); + histogram_incr(h, 1); ck_assert_int_eq(histogram_calc_total(h), 1); } END_TEST @@ -169,7 +169,7 @@ END_TEST START_TEST(simple_query_below_range) { histogram_t* h = &hist; - histogram_add(h, 0); + histogram_incr(h, 0); ck_assert_int_eq(h->underflow_cnt, 1); } END_TEST @@ -181,7 +181,7 @@ END_TEST START_TEST(simple_query_above_range) { histogram_t* h = &hist; - histogram_add(h, 10); + histogram_incr(h, 10); ck_assert_int_eq(h->overflow_cnt, 1); } END_TEST @@ -193,7 +193,7 @@ END_TEST START_TEST(simple_clear) { histogram_t* h = &hist; - histogram_add(h, 2); + histogram_incr(h, 2); ck_assert_int_eq(histogram_get_count(h, 1), 1); histogram_clear(h); ck_assert_int_eq(histogram_get_count(h, 1), 0); @@ -239,7 +239,7 @@ END_TEST START_TEST(simple_print) { histogram_t* h = &hist; - histogram_add(h, 3); + histogram_incr(h, 3); FILE* out_file = tmpfile(); @@ -269,7 +269,7 @@ END_TEST START_TEST(simple_print_lowb) { histogram_t* h = &hist; - histogram_add(h, 0); + histogram_incr(h, 0); FILE* out_file = tmpfile(); @@ -299,7 +299,7 @@ END_TEST START_TEST(simple_print_upb) { histogram_t* h = &hist; - histogram_add(h, 20); + histogram_incr(h, 20); FILE* out_file = tmpfile(); @@ -329,7 +329,7 @@ END_TEST START_TEST(simple_print_clear) { histogram_t* h = &hist; - histogram_add(h, 3); + histogram_incr(h, 3); FILE* out_file = tmpfile(); @@ -709,7 +709,7 @@ default_setup(void) // insert a bunch of elements for (delay_t us = 1; us < 128500; us++) { - histogram_add(&hist, us); + histogram_incr(&hist, us); } } diff --git a/tso/Makefile b/tso/Makefile deleted file mode 100644 index edb4fddd..00000000 --- a/tso/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -HOST := $(CXX) -CROSS := $(CC) - -INCL := $(shell $(CROSS) -print-file-name=plugin)/include -FLAGS := -std=c++17 -Os -gdwarf-4 -I$(INCL) -Wall -Wextra \ - -fno-strict-aliasing -fno-inline -fno-omit-frame-pointer \ - -fno-rtti -fno-exceptions - -all: tso.so - -tso.so: tso.cc - $(HOST) $(FLAGS) -fPIC -shared -o tso.so tso.cc - -dump: tso.so test.c - $(CROSS) -std=c99 -O2 -Wall -Wextra -fdump-tree-all \ - -gdwarf-4 -fno-strict-aliasing \ - -fplugin=./tso.so \ - -fplugin-arg-tso-enable=yes \ - -fplugin-arg-tso-exclude=exclude_ce.txt \ - -fplugin-arg-tso-track-deps=yes \ - -fplugin-arg-tso-fix-asm=yes \ - -fplugin-arg-tso-fix-built-in=yes \ - -fplugin-arg-tso-profiling=no \ - -o test test.c - -test: tso.so test.c - $(CROSS) -std=c99 -O2 -Wall -Wextra \ - -gdwarf-4 -fno-strict-aliasing \ - -fplugin=./tso.so \ - -fplugin-arg-tso-enable=yes \ - -fplugin-arg-tso-exclude=exclude_ce.txt \ - -fplugin-arg-tso-track-deps=yes \ - -fplugin-arg-tso-fix-asm=yes \ - -fplugin-arg-tso-fix-built-in=yes \ - -fplugin-arg-tso-profiling=no \ - -o test test.c - -clean: - rm -f tso.so test test.c.* diff --git a/tso/exclude_ce.txt b/tso/exclude_ce.txt deleted file mode 100644 index dcd057b4..00000000 --- a/tso/exclude_ce.txt +++ /dev/null @@ -1,34 +0,0 @@ -# Note: submodules are all excluded, so their .c files are not instrumented. -# However naming .h files will exempt their inlines. And yes, we're specifically -# NOT exempting the as_atomic headers in common. - -as_aerospike.h -as_boolean.h -as_bytes.h -as_double.h -as_geojson.h -as_integer.h -as_iterator.h -as_list.h -as_log.h -as_map.h -as_msgpack.h -as_random.h -as_rec.h -as_serializer.h -as_stream.h -as_string.h -as_val.h - -cf_b64.h -cf_byte_order.h -cf_clock.h -cf_digest.h -cf_hash_math.h -cf_ll.h -cf_queue.h - -jansson.h -lookup3.h - -#------------------------------ diff --git a/tso/test.c b/tso/test.c deleted file mode 100644 index da6378d7..00000000 --- a/tso/test.c +++ /dev/null @@ -1,834 +0,0 @@ -// vim: set noet ts=4 sw=4: - -// --- dependencies ------------------------------------------------------------ - -int* g_p; - -int -addr_dep(int i) -{ - int x = g_p[i]; - - return x; -} - -int** g_pp; - -int -addr_dep_2(int i, int k) -{ - int x = g_pp[i][k]; - - return x; -} - -void -data_dep(int* p1, int* p2, int i) -{ - p1[i] = p2[i] + 123; -} - -// --- assignment -------------------------------------------------------------- - -void -as_1(int* x) -{ - g_p = x; -} - -void -as_2(int x) -{ - *g_p = x; -} - -void -as_3(int* x) -{ - *g_pp = x; -} - -void -as_4(int x) -{ - **g_pp = x; -} - -// --- built-ins, inline assembly ---------------------------------------------- - -int -built_in_1(int* p, int x, int y) -{ - int r = __sync_val_compare_and_swap(p, x, y); - - return r; -} - -int -built_in_2(int* p) -{ - return __atomic_load_n(p, __ATOMIC_RELAXED); -} - -void -assembly(void) -{ - __asm__ volatile ("yield" : : : "memory"); -} - -// --- struct ------------------------------------------------------------------ - -typedef struct str_s { - int x; - int y; - struct str_s* p; -} str_t; - -str_t g_str; -str_t* g_str_p; -str_t g_strs[10]; - -int -str_1(str_t* p) -{ - return p->x; -} - -int -str_2(str_t* p) -{ - return p->p->x; -} - -int -str_3(void) -{ - return g_str.x; -} - -int -str_4(void) -{ - return g_str.p->x; -} - -int -str_5(void) -{ - return g_str_p->x; -} - -int -str_6(int i) -{ - return g_strs[i].x; -} - -int -str_7(void) -{ - static str_t str; - - return str.x; -} - -int -str_8(void) -{ - static str_t str; - - return str.p->x; -} - -int -str_9(void) -{ - str_t str = g_str; - - return str.x; -} - -int -str_10(void) -{ - str_t* p = &g_str; - - return p->x; -} - -void -str_11(void) -{ - g_str = (str_t){ 0, 0, 0 }; -} - -void -str_12(void) -{ - g_str.x = 0; -} - -void -str_13(void) -{ - g_str.p->x = 0; -} - -void -str_14(void) -{ - g_str_p->x = 0; -} - -void -str_15(void) -{ - g_str_p->p->x = 0; -} - -void -str_16(int i) -{ - g_strs[i].x = 0; -} - -void -str_17(void) -{ - static str_t str; - - str.x = 0; - - (void)str.x; -} - -void -str_18(void) -{ - str_t* str = &(str_t){ 0, 0, 0 }; - - (void)str; -} - -// --- union ------------------------------------------------------------------- - -typedef union un_u { - int x; - int y; - union un_u* p; -} un_t; - -un_t g_un; -un_t* g_un_p; -un_t g_uns[10]; - -int -un_1(un_t* p) -{ - return p->x; -} - -int -un_2(un_t* p) -{ - return p->p->x; -} - -int -un_3(void) -{ - return g_un.x; -} - -int -un_4(void) -{ - return g_un.p->x; -} - -int -un_5(void) -{ - return g_un_p->x; -} - -int -un_6(int i) -{ - return g_uns[i].x; -} - -int -un_7(void) -{ - static un_t un; - - return un.x; -} - -int -un_8(void) -{ - static un_t un; - - return un.p->x; -} - -int -un_9(void) -{ - un_t un = g_un; - - return un.x; -} - -int -un_10(void) -{ - un_t* p = &g_un; - - return p->x; -} - -void -un_11(void) -{ - g_un = (un_t){ 0 }; -} - -void -un_12(void) -{ - g_un.x = 0; -} - -void -un_13(void) -{ - g_un.p->x = 0; -} - -void -un_14(void) -{ - g_un_p->x = 0; -} - -void -un_15(void) -{ - g_un_p->p->x = 0; -} - -void -un_16(int i) -{ - g_uns[i].x = 0; -} - -void -un_17(void) -{ - static un_t un; - - un.x = 0; - - (void)un.x; -} - -void -un_18(void) -{ - un_t* un = &(un_t){ 0 }; - - (void)un; -} - -// --- bitmap ------------------------------------------------------------------ - -typedef struct { - int x : 9; - int y : 23; -} bm_t; - -bm_t g_bm; -bm_t* g_bm_p; -bm_t g_bms[10]; - -int -bm_1(bm_t* p) -{ - return p->x; -} - -int -bm_2(void) -{ - return g_bm.x; -} - -int -bm_3(void) -{ - return g_bm_p->x; -} - -int -bm_4(int i) -{ - return g_bms[i].x; -} - -int -bm_5(void) -{ - static bm_t bm; - - return bm.x; -} - -int -bm_6(void) -{ - bm_t bm = g_bm; - - return bm.x; -} - -int -bm_7(void) -{ - bm_t* p = &g_bm; - - return p->x; -} - -void -bm_11(void) -{ - g_bm = (bm_t){ 0, 0 }; -} - -void -bm_12(void) -{ - g_bm.x = 0; -} - -void -bm_13(void) -{ - g_bm_p->x = 0; -} - -void -bm_14(int i) -{ - g_bms[i].x = 0; -} - -void -bm_15(void) -{ - static bm_t bm; - - bm.x = 0; - - (void)bm.x; -} - -void -bm_16(void) -{ - bm_t* bm = &(bm_t){ 0, 0 }; - - (void)bm; -} - -// --- nested ------------------------------------------------------------------ - -typedef struct str_n_s { - str_t s; - un_t u; - bm_t b; -} str_n_t; - -typedef union un_n_u { - str_t s; - un_t u; - bm_t b; -} un_n_t; - -str_n_t g_str_n; -un_n_t g_un_n; - -int -nest_1(void) -{ - return g_str_n.s.x; -} - -int -nest_2(void) -{ - return g_str_n.u.x; -} - -int -nest_3(void) -{ - return g_str_n.b.x; -} - -int -nest_4(void) -{ - return g_un_n.s.x; -} - -int -nest_5(void) -{ - return g_un_n.u.x; -} - -int -nest_6(void) -{ - return g_un_n.b.x; -} - -int -nest_7(str_n_t* str, un_n_t* un) -{ - return str->s.x + un->s.x + g_str_n.s.y + g_un_n.s.y; -} - -void -nest_8(void) -{ - g_str_n.s.x = 0; -} - -void -nest_9(void) -{ - g_str_n.u.x = 0; -} - -void -nest_10(void) -{ - g_str_n.b.x = 0; -} - -void -nest_11(void) -{ - g_str_n.s = (str_t){ 0 , 0 , 0 }; -} - -void -nest_12(void) -{ - g_str_n.u = (un_t){ 0 }; -} - -void -nest_13(void) -{ - g_str_n.b = (bm_t){ 0 , 0 }; -} - -// --- & operator -------------------------------------------------------------- - -int g_x; - -int -addr_1(void) -{ - int* p = &g_x; - - return *p; -} - -int -addr_2(void) -{ - return *&g_x; -} - -int -addr_3(void) -{ - int x = *&g_x; - - return x; -} - -int -addr_4(void) -{ - int* x = &*g_p; - - return *x; -} - -int -addr_5(void) -{ - int x = *&*g_p; - - return x; -} - -int g_xs[10]; - -int -addr_6(int i) -{ - int* p = &g_xs[i]; - - return *p; -} - -int -addr_7(int i) -{ - int x = *&g_xs[i]; - - return x; -} - -void -addr_8(int x) -{ - *&g_x = x; -} - -void -addr_9(int i, int x) -{ - *&g_xs[i] = x; -} - -// --- pointer + array --------------------------------------------------------- - -typedef int (*pa_t)[10]; -pa_t g_pa[10]; - -int -pa_1(int i, int k) -{ - return (*g_pa[i])[k]; -} - -int -pa_2(int i) -{ - int* p = g_xs + i; - int x = *p; - - return x; -} - -void -pa_3(int i, int k, int x) -{ - (*g_pa[i])[k] = x; -} - -void -pa_4(int i, int x) -{ - int* p = g_xs + i; - - *p = x; -} - -int (*g_ia)[10]; - -int -pa_5(int i) -{ - int x = (*g_ia)[i]; - - return x; -} - -int -pa_6(int i) -{ - int* p = *g_ia + i; - int x = *p; - - return x; -} - -int -pa_7(int i) -{ - int* p = &(*g_ia)[i]; - int x = *p; - - return x; -} - -void -pa_8(int i, int x) -{ - (*g_ia)[i] = x; -} - -void -pa_9(int i, int x) -{ - int* p = *g_ia + i; - - *p = x; -} - -void -pa_10(int i, int x) -{ - int* p = &(*g_ia)[i]; - - *p = x; -} - -// --- conversion -------------------------------------------------------------- - -int -con_1(float x) -{ - return *(int*)&x; -} - -float -con_2(void) -{ - return *(float*)&g_x; -} - -long -con_3(void) -{ - return (long)&g_x; -} - -int -con_4(bm_t* bm1, bm_t* bm2) -{ - return bm1->x == bm2->x; // VIEW_CONVERT_EXPR -} - -// --- arithmetic -------------------------------------------------------------- - -void -ari_1(int x) -{ - g_x += x; -} - -void -ari_2(void) -{ - ++g_x; -} - -void -ari_3(void) -{ - g_x++; -} - -void -ari_4(int x) -{ - *g_p += x; -} - -void -ari_5(void) -{ - ++*g_p; -} - -void -ari_6(void) -{ - (*g_p)++; -} - -// --- arguments --------------------------------------------------------------- - -int -arg_read(int* p) -{ - return *p; -} - -int -arg_1(int* p) -{ - return arg_read(p); -} - -int -arg_2(void) -{ - return arg_read(g_p); -} - -int -arg_3(void) -{ - return arg_read(*g_pp); -} - -// --- redundancy -------------------------------------------------------------- - -int -red(int* p) -{ - int x = g_x; - int y = *p + 1; - - (void)x; - (void)y; - - return 1234; -} - -// --- fake file name ---------------------------------------------------------- - -# 1 "fake.h" 1 - -int -fake(int* p) -{ - return *p; -} - -# 814 "test.c" 2 - -// --- main -------------------------------------------------------------------- - -int g_n_barriers = 0; - -void -cf_tso_count_barrier(void) -{ - __sync_synchronize(); - - ++g_n_barriers; -} - -int -main(int argc, char* argv[]) -{ - (void)argc; - (void)argv; - - return 0; -} diff --git a/tso/tso.cc b/tso/tso.cc deleted file mode 100644 index 6092d70d..00000000 --- a/tso/tso.cc +++ /dev/null @@ -1,751 +0,0 @@ -// vim: set noet ts=4 sw=4: - -/* - * tso.cc - * - * Copyright (C) 2022 Aerospike, Inc. - * - * Portions may be licensed to Aerospike, Inc. under one or more contributor - * license agreements. - * - * This program is free software: you can redistribute it and/or modify it under - * the terms of the GNU Affero General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more - * details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see http://www.gnu.org/licenses/ - */ - -//========================================================== -// Includes. -// - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#pragma GCC diagnostic warning "-Wshadow" -#pragma GCC diagnostic warning "-Wcast-align" -#pragma GCC diagnostic warning "-Wcast-qual" -#pragma GCC diagnostic warning "-Wconversion" -#pragma GCC diagnostic warning "-Wsign-conversion" -#pragma GCC diagnostic warning "-Wmissing-declarations" -#pragma GCC diagnostic warning "-Wredundant-decls" - -#include -#include -#include -#include -#include -#include - - -//========================================================== -// Typedefs & constants. -// - -#define DEBUG 0 -#define COUNT_BARRIER "cf_tso_count_barrier" -#define RELAXED 0 -#define SEQ_CST 5 - -class tso_1 : public gimple_opt_pass -{ -public: - tso_1(const pass_data& pd) : gimple_opt_pass(pd, g) {} - virtual tso_1* clone() override; - virtual unsigned int execute(function* f) override; -}; - -class tso_2 : public gimple_opt_pass -{ -public: - tso_2(const pass_data& pd) : gimple_opt_pass(pd, g) {} - virtual tso_2* clone() override; - virtual unsigned int execute(function* f) override; -}; - - -//========================================================== -// Globals. -// - -int plugin_is_GPL_compatible; - -static bool g_enable = true; -static std::set g_excluded; -static bool g_track_deps = true; -static bool g_fix_asm = true; -static bool g_fix_built_in = true; -static bool g_profiling = false; - - -//========================================================== -// Forward declarations. -// - -static bool handle_argument(const plugin_argument* arg); -static bool add_excluded(const std::string& path); -static std::string strip_line(const std::string& line); -static bool set_boolean(bool& b, const std::string& value); -static std::string strip_path(const std::string& path); -static bool skip_function(const std::string& top_file, const std::string& file, - const std::string& ident); -static bool is_excluded(const std::string& key); -static bool gimple_needs_ordering(const gimple* st); -static bool tree_needs_ordering(const tree t); -static bool gimple_depends(const gimple* st, std::vector& deps, bool ordered); -static bool tree_contains(const tree t, const tree x); -static void insert_barrier(gimple_stmt_iterator& gsi, location_t loc); -static gcall* make_barrier(void); -static gcall* make_profiling_call(void); -static bool is_barrier(const gimple* st); -static void neutralize_barrier(gimple* st); -#if DEBUG > 0 -static void debug_func(const std::string& top_file, const std::string& file, - const std::string& ident); -static void debug_stmt(/* const */ gimple* st, const std::vector& deps); -static void debug_stmt(/* const */ gimple* st); -#endif -#if DEBUG > 1 -static void debug_ops(const gimple* st); -#endif - - -//========================================================== -// Public API. -// - -int -plugin_init(plugin_name_args* info, plugin_gcc_version* ver) -{ - if (! plugin_default_version_check (ver, &gcc_version)) { - return 1; - } - - for (int i = 0; i < info->argc; ++i) { - plugin_argument* arg = info->argv + i; - - if (! handle_argument(arg)) { - return 1; - } - } - - pass_data pd_1 = { - .type = GIMPLE_PASS, - .name = "tso_1", - .optinfo_flags = OPTGROUP_NONE, - .tv_id = TV_NONE, - .properties_required = PROP_gimple_lcf, - .properties_provided = 0, - .properties_destroyed = 0, - .todo_flags_start = 0, - .todo_flags_finish = 0 - }; - - pass_data pd_2 = { - .type = GIMPLE_PASS, - .name = "tso_2", - .optinfo_flags = OPTGROUP_NONE, - .tv_id = TV_NONE, - .properties_required = PROP_cfg, - .properties_provided = 0, - .properties_destroyed = 0, - .todo_flags_start = 0, - .todo_flags_finish = 0 - }; - - register_pass_info pi_1 = { - .pass = new tso_1(pd_1), - .reference_pass_name = "cfg", - .ref_pass_instance_number = 1, - .pos_op = PASS_POS_INSERT_BEFORE - }; - - register_pass_info pi_2 = { - .pass = new tso_2(pd_2), - .reference_pass_name = "sanopt", - .ref_pass_instance_number = 1, - .pos_op = PASS_POS_INSERT_AFTER - }; - - register_callback(info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pi_1); - register_callback(info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pi_2); - - return 0; -} - -tso_1* -tso_1::clone() { - return this; -} - -unsigned int -tso_1::execute(function* fn) -{ - tree decl = fn->decl; - const std::string top_path{main_input_filename}; - const std::string& top_file = strip_path(top_path); - const std::string path{DECL_SOURCE_FILE(decl)}; - const std::string& file = strip_path(path); - const std::string ident{fndecl_name(decl)}; - -#if DEBUG > 0 - debug_func(top_file, file, ident); -#endif - - if (! g_enable || skip_function(top_file, file, ident)) { - return 0; - } - - gimple_seq& body = fn->gimple_body; - gimple_stmt_iterator gsi = gsi_start(body); - std::vector deps; - - while (! gsi_end_p(gsi)) { - gimple* st = gsi_stmt(gsi); - - if (gimple_has_substatements(st)) { - std::cerr << "substatements not supported" << std::endl; - ::abort(); - } - -#if DEBUG > 0 - debug_stmt(st, deps); -#endif - - bool ordered = gimple_needs_ordering(st); - bool depends = g_track_deps && gimple_depends(st, deps, ordered); - - if (ordered) { - if (depends) { -#if DEBUG > 0 - std::cerr << " depends" << std::endl; -#endif - } - else { - location_t loc = gimple_location(st); - - insert_barrier(gsi, loc); -#if DEBUG > 0 - std::cerr << " barrier" << std::endl; -#endif - } - } - -#if DEBUG > 1 - debug_ops(st); -#endif - - gsi_next(&gsi); - } - - return 0; -} - -tso_2* -tso_2::clone() { - return this; -} - -unsigned int -tso_2::execute(function* fn) -{ - tree decl = fn->decl; - const std::string top_path{main_input_filename}; - const std::string& top_file = strip_path(top_path); - const std::string path{DECL_SOURCE_FILE(decl)}; - const std::string& file = strip_path(path); - const std::string ident{fndecl_name(decl)}; - -#if DEBUG > 0 - debug_func(top_file, file, ident); -#endif - - if (! g_enable || skip_function(top_file, file, ident)) { - return 0; - } - - basic_block bb; - - FOR_ALL_BB_FN(bb, fn) { - gimple_stmt_iterator gsi = gsi_start_nondebug_bb(bb); - bool safe = false; - - while (! gsi_end_p(gsi)) { - gimple* st = gsi_stmt(gsi); - -#if DEBUG > 0 - debug_stmt(st); -#endif - - if (is_barrier(st)) { - if (safe) { - neutralize_barrier(st); -#if DEBUG > 0 - std::cerr << " neutralize" << std::endl; -#endif - } - else { - safe = true; -#if DEBUG > 0 - std::cerr << " keep" << std::endl; -#endif - } - } - else { - safe = false; - } - - gsi_next_nondebug(&gsi); - } - } - - return 0; -} - -//========================================================== -// Local helpers. -// - -static bool -handle_argument(const plugin_argument* arg) -{ - const std::string key{arg->key}; - const std::string value{arg->value}; - - if (key == "enable") { - return set_boolean(g_enable, value); - } - - if (key == "exclude") { - return add_excluded(value); - } - - if (key == "track-deps") { - return set_boolean(g_track_deps, value); - } - - if (key == "fix-asm") { - return set_boolean(g_fix_asm, value); - } - - if (key == "fix-built-in") { - return set_boolean(g_fix_built_in, value); - } - - if (key == "profiling") { - return set_boolean(g_profiling, value); - } - - std::cerr << "invalid plugin argument \"" << key << "\"" << std::endl; - return false; -} - -static bool -add_excluded(const std::string& path) -{ - std::ifstream ifs{path}; - - if (! ifs.is_open()) { - std::cerr << "failed to open \"" << path << "\"" << std::endl; - return false; - } - - std::string line; - - while (std::getline(ifs, line)) { - const std::string& stripped = strip_line(line); - - if (stripped.size() != 0) { - g_excluded.insert(stripped); - } - } - - return true; -} - -static std::string -strip_line(const std::string& line) -{ - size_t i_comment = line.find('#'); - const std::string& no_comment = line.substr(0, i_comment); - size_t i_begin = no_comment.find_first_not_of(" \t"); - - if (i_begin == std::string::npos) { - return ""; - } - - size_t i_end = no_comment.find_last_not_of(" \t"); - - return std::string{no_comment.substr(i_begin, i_end + 1 - i_begin)}; -} - -static bool -set_boolean(bool& b, const std::string& value) -{ - if (value == "on" || value == "yes" || value == "1") { - b = true; - return true; - } - - if (value == "off" || value == "no" || value == "0") { - b = false; - return true; - } - - std::cerr << "invalid Boolean \"" << value << "\"" << std::endl; - return false; -} - -static std::string -strip_path(const std::string& path) -{ - size_t i = path.rfind('/'); - - return i == std::string::npos ? - path : path.substr(i + 1, std::string::npos); -} - -static bool -skip_function(const std::string& top_file, const std::string& file, - const std::string& ident) -{ - if (ident == COUNT_BARRIER) { - return true; - } - - if (is_excluded(file) || is_excluded(ident)) { - return true; - } - - if (is_excluded(file + ":" + ident)) { - return true; - } - - if (file == top_file) { - return false; - } - - if (is_excluded(top_file)) { - return true; - } - - if (is_excluded(top_file + ":" + file)) { - return true; - } - - if (is_excluded(top_file + ":" + ident)) { - return true; - } - - if (is_excluded(top_file + ":" + file + ":" + ident)) { - return true; - } - - return false; -} - -static -bool is_excluded(const std::string& key) -{ - return g_excluded.find(key) != g_excluded.end(); -} - -static bool -gimple_needs_ordering(const gimple* st) -{ - enum gimple_code code = gimple_code(st); - - if (code == GIMPLE_ASM && g_fix_asm) { - return true; - } - - if (code == GIMPLE_CALL && g_fix_built_in) { - tree decl = gimple_call_fndecl(st); - - if (decl != NULL_TREE && DECL_BUILT_IN_CLASS(decl) != NOT_BUILT_IN) { - return true; - } - - // fall through - } - - unsigned n_ops = gimple_num_ops(st); - - for (unsigned i = 0; i < n_ops; ++i) { - tree op = gimple_op(st, i); - - if (tree_needs_ordering(op)) { - return true; - } - } - - return false; -} - -static bool -tree_needs_ordering(const tree t) -{ - if (t == NULL_TREE) { - return false; - } - - tree_code code = TREE_CODE(t); - - if (TREE_CODE_CLASS(code) == tcc_reference) { - switch (code) { - case COMPONENT_REF: - case BIT_FIELD_REF: - case ARRAY_REF: - case VIEW_CONVERT_EXPR: - // recurse - check operands for pointer dereferences and globals - break; - - case ARRAY_RANGE_REF: - case REALPART_EXPR: - case IMAGPART_EXPR: - case INDIRECT_REF: - case TARGET_MEM_REF: - std::cerr << "unsupported tree code " << code << std::endl; - ::abort(); - - case MEM_REF: - // pointer dereference - done - return true; - - default: - std::cerr << "unknown tree code " << code << std::endl; - ::abort(); - } - } - else if (code == ADDR_EXPR) { - // & operator - done - // (otherwise taking the address of a global variable - an ADDR_EXPR - // on a VAR_DECL with TREE_STATIC() - would result in a barrier) - return false; - } - else if (code == VAR_DECL && TREE_STATIC(t)) { - // global - done - return true; - } - // else - recurse - - int n_ops = TREE_OPERAND_LENGTH(t); - - for (int i = 0; i < n_ops; ++i) { - tree op = TREE_OPERAND(t, i); - - if (tree_needs_ordering(op)) { - return true; - } - } - - return false; -} - -static bool -gimple_depends(const gimple* st, std::vector& deps, bool ordered) -{ - enum gimple_code code = gimple_code(st); - - if (code != GIMPLE_ASSIGN) { - deps.clear(); - return false; - } - - tree lhs = gimple_assign_lhs(st); - tree rhs1 = gimple_assign_rhs1(st); - tree rhs2 = gimple_assign_rhs2(st); // optional, may be NULL_TREE - tree rhs3 = gimple_assign_rhs3(st); // optional, may be NULL_TREE - - bool depends = false; - - for (tree& dep : deps) { - // deps only contains SSA_NAMEs - if something in deps appears on the - // LHS, then we can be sure that it is being read (by definition of SSA, - // each SSA_NAME is only written to once, which is what got it into - // deps, so subsequent occurrences must be read occurrences) - if (tree_contains(lhs, dep) || tree_contains(rhs1, dep) || - tree_contains(rhs2, dep) || tree_contains(rhs3, dep)) { - depends = true; - break; - } - } - - if (ordered) { - deps.clear(); - } - - if ((ordered || depends) && TREE_CODE(lhs) == SSA_NAME) { - deps.push_back(lhs); - } - - return depends; -} - -static bool -tree_contains(const tree t, const tree x) -{ - if (t == NULL_TREE) { - return false; - } - - if (t == x) { - return true; - } - - int n_ops = TREE_OPERAND_LENGTH(t); - - for (int i = 0; i < n_ops; ++i) { - tree op = TREE_OPERAND(t, i); - - if (tree_contains(op, x)) { - return true; - } - } - - return false; -} - -static void -insert_barrier(gimple_stmt_iterator& gsi, location_t loc) -{ - gcall* b = g_profiling ? make_profiling_call() : make_barrier(); - - gimple_set_location(b, loc); - gsi_insert_before(&gsi, b, GSI_SAME_STMT); -} - -static gcall* -make_barrier(void) -{ - tree decl = builtin_decl_explicit(BUILT_IN_ATOMIC_THREAD_FENCE); - tree order = build_int_cst(integer_type_node, SEQ_CST); - gcall* call = gimple_build_call(decl, 1, order); - - return call; -} - -static gcall* -make_profiling_call(void) -{ - tree type = build_function_type_list(void_type_node, NULL_TREE); - tree decl = build_fn_decl(COUNT_BARRIER, type); - gcall* call = gimple_build_call(decl, 0); - - return call; -} - -static bool -is_barrier(const gimple* st) -{ - if (! is_gimple_call(st)) { - return false; - } - - tree decl = builtin_decl_explicit(BUILT_IN_ATOMIC_THREAD_FENCE); - - if (gimple_call_fndecl(st) != decl) { - return false; - } - - if (gimple_call_num_args(st) != 1) { - std::cerr << "unexpected number of arguments" << std::endl; - ::abort(); - } - - tree order = gimple_call_arg(st, 0); - - if (TREE_CODE(order) != INTEGER_CST) { - std::cerr << "non-constant memory order argument" << std::endl; - ::abort(); - } - - if (tree_to_uhwi(order) != SEQ_CST) { - return false; - } - - return true; -} - -static void -neutralize_barrier(gimple* st) -{ - tree order = build_int_cst(integer_type_node, RELAXED); - - gimple_call_set_arg(st, 0, order); -} - -#if DEBUG > 0 -static void -debug_func(const std::string& top_file, const std::string& file, - const std::string& ident) -{ - size_t sz = ident.size() + file.size() + top_file.size() + 3; - size_t n = sz > 70 ? 10 : 80 - sz; - - std::cerr << std::string(n, '-') << " " << top_file << ":" << file << - ":" << ident << std::endl; -} - -static void -debug_stmt(/* const */ gimple* st, const std::vector& deps) -{ - enum gimple_code code = gimple_code(st); - - std::cerr << "- " << code; - - for (const tree& dep : deps) { - std::cerr << " _" << SSA_NAME_VERSION(dep); - } - - std::cerr << " | "; - debug_gimple_stmt(st); -} - -static void -debug_stmt(/* const */ gimple* st) -{ - enum gimple_code code = gimple_code(st); - - std::cerr << "- " << code; - - std::cerr << " | "; - debug_gimple_stmt(st); -} -#endif - -#if DEBUG > 1 -static void -debug_ops(const gimple* st) -{ - unsigned n_ops = gimple_num_ops(st); - - for (unsigned i = 0; i < n_ops; ++i) { - tree op = gimple_op(st, i); - - std::cerr << "op " << i << std::endl; - debug_tree(op); - } -} -#endif