From 57f9209dcca48fbe70d397dba67511f4def3ae49 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Mon, 4 Nov 2024 16:58:36 +0000 Subject: [PATCH] Port version of tskit's blk_alloc using c11 atomics Minimal changes to use c11 Update setup.py for Windows C11 --- Makefile | 6 +- lib/ancestor_builder.c | 22 ++++---- lib/ancestor_matcher.c | 12 ++-- lib/err.c | 108 ++++++++++++++++++++++++++++++++++++ lib/err.h | 25 +++++++++ lib/meson.build | 2 +- lib/tree_sequence_builder.c | 10 ++-- lib/tsinfer.h | 6 +- setup.py | 9 ++- 9 files changed, 169 insertions(+), 31 deletions(-) diff --git a/Makefile b/Makefile index 37c38db1..3d48f5ab 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ CC?=gcc -CFLAGS=-std=c99 -g -O3 -march=native -funroll-loops -ffast-math \ +CFLAGS=-std=c11 -g -O3 -march=native -funroll-loops -ffast-math \ # -ftree-vectorize \ # -ftree-vectorizer-verbose=6 \ # -fopt-info-vec-missed -all: _tsinfer.cpython-34m.so +all: _tsinfer.cpython-34m.so -_tsinfer.cpython-34m.so: _tsinfermodule.c +_tsinfer.cpython-34m.so: _tsinfermodule.c CC="${CC}" CFLAGS="${CFLAGS}" python3 setup.py build_ext --inplace ctags: diff --git a/lib/ancestor_builder.c b/lib/ancestor_builder.c index da939e95..41d6e946 100644 --- a/lib/ancestor_builder.c +++ b/lib/ancestor_builder.c @@ -115,7 +115,7 @@ ancestor_builder_print_state(ancestor_builder_t *self, FILE *out) } fprintf(out, "\n"); } - tsk_blkalloc_print_state(&self->allocator, out); + tsi_blkalloc_print_state(&self->allocator, out); ancestor_builder_check_state(self); return 0; } @@ -144,12 +144,12 @@ ancestor_builder_alloc( goto out; } /* Pre-calculate the maximum sizes asked for in other methods when calling - * tsk_blkalloc_get(&self->allocator, ...) */ + * tsi_blkalloc_get(&self->allocator, ...) */ max_size = TSK_MAX(self->num_samples * sizeof(allele_t), max_size); /* NB: using self->max_sites below is probably overkill: the real number should be * the maximum number of focal sites in a single ancestor, usually << max_sites */ max_size = TSK_MAX(self->max_sites * sizeof(tsk_id_t), max_size); - ret = tsk_blkalloc_init(&self->allocator, max_size); + ret = tsi_blkalloc_init(&self->allocator, max_size); if (ret != 0) { goto out; } @@ -163,7 +163,7 @@ ancestor_builder_free(ancestor_builder_t *self) { tsi_safe_free(self->sites); tsi_safe_free(self->descriptors); - tsk_blkalloc_free(&self->allocator); + tsi_blkalloc_free(&self->allocator); return 0; } @@ -177,8 +177,8 @@ ancestor_builder_get_time_map(ancestor_builder_t *self, double time) search.time = time; avl_node = avl_search(&self->time_map, &search); if (avl_node == NULL) { - avl_node = tsk_blkalloc_get(&self->allocator, sizeof(*avl_node)); - time_map = tsk_blkalloc_get(&self->allocator, sizeof(*time_map)); + avl_node = tsi_blkalloc_get(&self->allocator, sizeof(*avl_node)); + time_map = tsi_blkalloc_get(&self->allocator, sizeof(*time_map)); if (avl_node == NULL || time_map == NULL) { goto out; } @@ -439,10 +439,10 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot search.num_samples = self->num_samples; avl_node = avl_search(pattern_map, &search); if (avl_node == NULL) { - avl_node = tsk_blkalloc_get(&self->allocator, sizeof(avl_node_t)); - map_elem = tsk_blkalloc_get(&self->allocator, sizeof(pattern_map_t)); + avl_node = tsi_blkalloc_get(&self->allocator, sizeof(avl_node_t)); + map_elem = tsi_blkalloc_get(&self->allocator, sizeof(pattern_map_t)); site->genotypes - = tsk_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t)); + = tsi_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t)); if (avl_node == NULL || map_elem == NULL || site->genotypes == NULL) { ret = TSI_ERR_NO_MEMORY; goto out; @@ -465,7 +465,7 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot } map_elem->num_sites++; - list_node = tsk_blkalloc_get(&self->allocator, sizeof(site_list_t)); + list_node = tsi_blkalloc_get(&self->allocator, sizeof(site_list_t)); if (list_node == NULL) { ret = TSI_ERR_NO_MEMORY; goto out; @@ -538,7 +538,7 @@ ancestor_builder_finalise(ancestor_builder_t *self) descriptor = self->descriptors + self->num_ancestors; self->num_ancestors++; descriptor->time = time_map->time; - focal_sites = tsk_blkalloc_get( + focal_sites = tsi_blkalloc_get( &self->allocator, pattern_map->num_sites * sizeof(tsk_id_t)); if (focal_sites == NULL) { ret = TSI_ERR_NO_MEMORY; diff --git a/lib/ancestor_matcher.c b/lib/ancestor_matcher.c index 8fb7ea5d..3cd18cab 100644 --- a/lib/ancestor_matcher.c +++ b/lib/ancestor_matcher.c @@ -94,7 +94,7 @@ ancestor_matcher_print_state(ancestor_matcher_t *self, FILE *out) } fprintf(out, "\n"); } - tsk_blkalloc_print_state(&self->traceback_allocator, out); + tsi_blkalloc_print_state(&self->traceback_allocator, out); /* ancestor_matcher_check_state(self); */ return 0; @@ -132,7 +132,7 @@ ancestor_matcher_alloc(ancestor_matcher_t *self, ret = TSI_ERR_NO_MEMORY; goto out; } - ret = tsk_blkalloc_init(&self->traceback_allocator, traceback_block_size); + ret = tsi_blkalloc_init(&self->traceback_allocator, traceback_block_size); if (ret != 0) { goto out; } @@ -165,7 +165,7 @@ ancestor_matcher_free(ancestor_matcher_t *self) tsi_safe_free(self->output.left); tsi_safe_free(self->output.right); tsi_safe_free(self->output.parent); - tsk_blkalloc_free(&self->traceback_allocator); + tsi_blkalloc_free(&self->traceback_allocator); return 0; } @@ -229,9 +229,9 @@ ancestor_matcher_store_traceback(ancestor_matcher_t *self, const tsk_id_t site_i T[site_id].node = T[site_id - 1].node; T[site_id].recombination_required = T[site_id - 1].recombination_required; } else { - list_node = tsk_blkalloc_get(&self->traceback_allocator, + list_node = tsi_blkalloc_get(&self->traceback_allocator, (size_t) num_likelihood_nodes * sizeof(tsk_id_t)); - list_R = tsk_blkalloc_get( + list_R = tsi_blkalloc_get( &self->traceback_allocator, (size_t) num_likelihood_nodes * sizeof(int8_t)); if (list_node == NULL || list_R == NULL) { ret = TSI_ERR_NO_MEMORY; @@ -554,7 +554,7 @@ ancestor_matcher_reset(ancestor_matcher_t *self) assert(self->num_nodes <= self->max_nodes); memset(self->allelic_state, 0xff, self->num_nodes * sizeof(*self->allelic_state)); - ret = tsk_blkalloc_reset(&self->traceback_allocator); + ret = tsi_blkalloc_reset(&self->traceback_allocator); if (ret != 0) { goto out; } diff --git a/lib/err.c b/lib/err.c index cd71b5c8..ce01c80a 100644 --- a/lib/err.c +++ b/lib/err.c @@ -105,3 +105,111 @@ tsi_strerror(int err) } return ret; } + +/* Temporary hack. See notes in err.h for why this code is here. */ + +#include +#include + +void +tsi_blkalloc_print_state(tsi_blkalloc_t *self, FILE *out) +{ + fprintf(out, "Block allocator%p::\n", (void *) self); + fprintf(out, "\ttop = %lld\n", (long long) self->top); + fprintf(out, "\tchunk_size = %lld\n", (long long) self->chunk_size); + fprintf(out, "\tnum_chunks = %lld\n", (long long) self->num_chunks); + fprintf(out, "\ttotal_allocated = %lld\n", (long long) self->total_allocated); + fprintf(out, "\ttotal_size = %lld\n", (long long) self->total_size); +} + +int TSK_WARN_UNUSED +tsi_blkalloc_reset(tsi_blkalloc_t *self) +{ + int ret = 0; + + self->top = 0; + self->current_chunk = 0; + self->total_allocated = 0; + return ret; +} + +int TSK_WARN_UNUSED +tsi_blkalloc_init(tsi_blkalloc_t *self, size_t chunk_size) +{ + int ret = 0; + + tsk_memset(self, 0, sizeof(tsi_blkalloc_t)); + if (chunk_size < 1) { + ret = TSK_ERR_BAD_PARAM_VALUE; + goto out; + } + self->chunk_size = chunk_size; + self->top = 0; + self->current_chunk = 0; + self->total_allocated = 0; + self->total_size = 0; + self->num_chunks = 0; + self->mem_chunks = malloc(sizeof(char *)); + if (self->mem_chunks == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + self->mem_chunks[0] = malloc(chunk_size); + if (self->mem_chunks[0] == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + self->num_chunks = 1; + self->total_size = chunk_size + sizeof(void *); +out: + return ret; +} + +void *TSK_WARN_UNUSED +tsi_blkalloc_get(tsi_blkalloc_t *self, size_t size) +{ + void *ret = NULL; + void *p; + + if (size > self->chunk_size) { + goto out; + } + if ((self->top + size) > self->chunk_size) { + if (self->current_chunk == (self->num_chunks - 1)) { + p = realloc(self->mem_chunks, (self->num_chunks + 1) * sizeof(void *)); + if (p == NULL) { + goto out; + } + self->mem_chunks = p; + p = malloc(self->chunk_size); + if (p == NULL) { + goto out; + } + self->mem_chunks[self->num_chunks] = p; + self->num_chunks++; + self->total_size += self->chunk_size + sizeof(void *); + } + self->current_chunk++; + self->top = 0; + } + ret = self->mem_chunks[self->current_chunk] + self->top; + self->top += size; + self->total_allocated += size; +out: + return ret; +} + +void +tsi_blkalloc_free(tsi_blkalloc_t *self) +{ + size_t j; + + for (j = 0; j < self->num_chunks; j++) { + if (self->mem_chunks[j] != NULL) { + free(self->mem_chunks[j]); + } + } + if (self->mem_chunks != NULL) { + free(self->mem_chunks); + } +} diff --git a/lib/err.h b/lib/err.h index 52afa27b..91568b01 100644 --- a/lib/err.h +++ b/lib/err.h @@ -41,4 +41,29 @@ const char *tsi_strerror(int err); +/* FIXME! Including a custom version of the tsk_blkalloc struct here so that + * we can use c11 atomics on the total_size attribute. Including it in this + * file and err.c as this is the least noisy place to put it, for now + * See https://github.com/jeromekelleher/sc2ts/issues/381 for reasoning. + */ + +#include "tskit.h" +#include + +typedef struct { + size_t chunk_size; /* number of bytes per chunk */ + size_t top; /* the offset of the next available byte in the current chunk */ + size_t current_chunk; /* the index of the chunk currently being used */ + _Atomic size_t total_size; /* the total number of bytes allocated + overhead. */ + size_t total_allocated; /* the total number of bytes allocated. */ + size_t num_chunks; /* the number of memory chunks. */ + char **mem_chunks; /* the memory chunks */ +} tsi_blkalloc_t; + +extern void tsi_blkalloc_print_state(tsi_blkalloc_t *self, FILE *out); +extern int tsi_blkalloc_reset(tsi_blkalloc_t *self); +extern int tsi_blkalloc_init(tsi_blkalloc_t *self, size_t chunk_size); +extern void *tsi_blkalloc_get(tsi_blkalloc_t *self, size_t size); +extern void tsi_blkalloc_free(tsi_blkalloc_t *self); + #endif /*__ERR_H__*/ diff --git a/lib/meson.build b/lib/meson.build index c4d4a8af..4baea8ab 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -8,7 +8,7 @@ m_dep = cc.find_library('m', required : false) cunit_dep = dependency('cunit') extra_c_args = [ - '-std=c99', '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W', + '-std=c11', '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W', '-Wmissing-prototypes', '-Wstrict-prototypes', '-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align', '-Wcast-qual', '-Wwrite-strings', '-Wnested-externs', diff --git a/lib/tree_sequence_builder.c b/lib/tree_sequence_builder.c index 0876f701..77e86b2a 100644 --- a/lib/tree_sequence_builder.c +++ b/lib/tree_sequence_builder.c @@ -194,8 +194,8 @@ tree_sequence_builder_print_state(tree_sequence_builder_t *self, FILE *out) out, "%d\t%d\t%d\t%d\n", edge->left, edge->right, edge->parent, edge->child); } - fprintf(out, "tsk_blkalloc = \n"); - tsk_blkalloc_print_state(&self->tsk_blkalloc, out); + fprintf(out, "tsi_blkalloc = \n"); + tsi_blkalloc_print_state(&self->tsi_blkalloc, out); fprintf(out, "avl_node_heap = \n"); object_heap_print_state(&self->avl_node_heap, out); fprintf(out, "edge_heap = \n"); @@ -244,7 +244,7 @@ tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites, if (ret != 0) { goto out; } - ret = tsk_blkalloc_init(&self->tsk_blkalloc, + ret = tsi_blkalloc_init(&self->tsi_blkalloc, TSK_MAX(8192, num_sites * sizeof(mutation_list_node_t) / 4)); if (ret != 0) { goto out; @@ -278,7 +278,7 @@ tree_sequence_builder_free(tree_sequence_builder_t *self) tsi_safe_free(self->sites.num_alleles); tsi_safe_free(self->left_index_edges); tsi_safe_free(self->right_index_edges); - tsk_blkalloc_free(&self->tsk_blkalloc); + tsi_blkalloc_free(&self->tsi_blkalloc); object_heap_free(&self->avl_node_heap); object_heap_free(&self->edge_heap); return 0; @@ -418,7 +418,7 @@ tree_sequence_builder_add_mutation( } } - list_node = tsk_blkalloc_get(&self->tsk_blkalloc, sizeof(mutation_list_node_t)); + list_node = tsi_blkalloc_get(&self->tsi_blkalloc, sizeof(mutation_list_node_t)); if (list_node == NULL) { ret = TSI_ERR_NO_MEMORY; goto out; diff --git a/lib/tsinfer.h b/lib/tsinfer.h index f8176009..7b877708 100644 --- a/lib/tsinfer.h +++ b/lib/tsinfer.h @@ -108,7 +108,7 @@ typedef struct { int flags; site_t *sites; avl_tree_t time_map; - tsk_blkalloc_t allocator; + tsi_blkalloc_t allocator; ancestor_descriptor_t *descriptors; } ancestor_builder_t; @@ -141,7 +141,7 @@ typedef struct { size_t num_nodes; size_t num_match_nodes; size_t num_mutations; - tsk_blkalloc_t tsk_blkalloc; + tsi_blkalloc_t tsi_blkalloc; object_heap_t avl_node_heap; object_heap_t edge_heap; /* Dynamic edge indexes used for tree generation and path compression. The @@ -184,7 +184,7 @@ typedef struct { tsk_id_t *likelihood_nodes_tmp; tsk_id_t *likelihood_nodes; node_state_list_t *traceback; - tsk_blkalloc_t traceback_allocator; + tsi_blkalloc_t traceback_allocator; size_t total_traceback_size; struct { tsk_id_t *left; diff --git a/setup.py b/setup.py index 02a63bba..a1e05589 100644 --- a/setup.py +++ b/setup.py @@ -31,12 +31,17 @@ + [os.path.join(kasdir, f) for f in kas_source_files] ) -libraries = ["Advapi32"] if IS_WINDOWS else [] +if IS_WINDOWS: + libraries = ["Advapi32"] + extra_compile_args = ["/std:c11"] +else: + libraries = [] + extra_compile_args = ["-std=c11"] _tsinfer_module = Extension( "_tsinfer", sources=sources, - extra_compile_args=["-std=c99"], + extra_compile_args=extra_compile_args, libraries=libraries, undef_macros=["NDEBUG"], include_dirs=includes + [numpy.get_include()],