Skip to content

Commit

Permalink
Port version of tskit's blk_alloc using c11 atomics
Browse files Browse the repository at this point in the history
Minimal changes to use c11

Update setup.py for Windows C11
  • Loading branch information
jeromekelleher committed Nov 5, 2024
1 parent d575342 commit 57f9209
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 31 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
CC?=gcc
CFLAGS=-std=c99 -g -O3 -march=native -funroll-loops -ffast-math \
CFLAGS=-std=c11 -g -O3 -march=native -funroll-loops -ffast-math \
# -ftree-vectorize \
# -ftree-vectorizer-verbose=6 \
# -fopt-info-vec-missed

all: _tsinfer.cpython-34m.so
all: _tsinfer.cpython-34m.so

_tsinfer.cpython-34m.so: _tsinfermodule.c
_tsinfer.cpython-34m.so: _tsinfermodule.c
CC="${CC}" CFLAGS="${CFLAGS}" python3 setup.py build_ext --inplace

ctags:
Expand Down
22 changes: 11 additions & 11 deletions lib/ancestor_builder.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ ancestor_builder_print_state(ancestor_builder_t *self, FILE *out)
}
fprintf(out, "\n");
}
tsk_blkalloc_print_state(&self->allocator, out);
tsi_blkalloc_print_state(&self->allocator, out);
ancestor_builder_check_state(self);
return 0;
}
Expand Down Expand Up @@ -144,12 +144,12 @@ ancestor_builder_alloc(
goto out;
}
/* Pre-calculate the maximum sizes asked for in other methods when calling
* tsk_blkalloc_get(&self->allocator, ...) */
* tsi_blkalloc_get(&self->allocator, ...) */
max_size = TSK_MAX(self->num_samples * sizeof(allele_t), max_size);
/* NB: using self->max_sites below is probably overkill: the real number should be
* the maximum number of focal sites in a single ancestor, usually << max_sites */
max_size = TSK_MAX(self->max_sites * sizeof(tsk_id_t), max_size);
ret = tsk_blkalloc_init(&self->allocator, max_size);
ret = tsi_blkalloc_init(&self->allocator, max_size);
if (ret != 0) {
goto out;
}
Expand All @@ -163,7 +163,7 @@ ancestor_builder_free(ancestor_builder_t *self)
{
tsi_safe_free(self->sites);
tsi_safe_free(self->descriptors);
tsk_blkalloc_free(&self->allocator);
tsi_blkalloc_free(&self->allocator);
return 0;
}

Expand All @@ -177,8 +177,8 @@ ancestor_builder_get_time_map(ancestor_builder_t *self, double time)
search.time = time;
avl_node = avl_search(&self->time_map, &search);
if (avl_node == NULL) {
avl_node = tsk_blkalloc_get(&self->allocator, sizeof(*avl_node));
time_map = tsk_blkalloc_get(&self->allocator, sizeof(*time_map));
avl_node = tsi_blkalloc_get(&self->allocator, sizeof(*avl_node));
time_map = tsi_blkalloc_get(&self->allocator, sizeof(*time_map));
if (avl_node == NULL || time_map == NULL) {
goto out;
}
Expand Down Expand Up @@ -439,10 +439,10 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
search.num_samples = self->num_samples;
avl_node = avl_search(pattern_map, &search);
if (avl_node == NULL) {
avl_node = tsk_blkalloc_get(&self->allocator, sizeof(avl_node_t));
map_elem = tsk_blkalloc_get(&self->allocator, sizeof(pattern_map_t));
avl_node = tsi_blkalloc_get(&self->allocator, sizeof(avl_node_t));
map_elem = tsi_blkalloc_get(&self->allocator, sizeof(pattern_map_t));
site->genotypes
= tsk_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t));
= tsi_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t));
if (avl_node == NULL || map_elem == NULL || site->genotypes == NULL) {
ret = TSI_ERR_NO_MEMORY;
goto out;
Expand All @@ -465,7 +465,7 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
}
map_elem->num_sites++;

list_node = tsk_blkalloc_get(&self->allocator, sizeof(site_list_t));
list_node = tsi_blkalloc_get(&self->allocator, sizeof(site_list_t));
if (list_node == NULL) {
ret = TSI_ERR_NO_MEMORY;
goto out;
Expand Down Expand Up @@ -538,7 +538,7 @@ ancestor_builder_finalise(ancestor_builder_t *self)
descriptor = self->descriptors + self->num_ancestors;
self->num_ancestors++;
descriptor->time = time_map->time;
focal_sites = tsk_blkalloc_get(
focal_sites = tsi_blkalloc_get(
&self->allocator, pattern_map->num_sites * sizeof(tsk_id_t));
if (focal_sites == NULL) {
ret = TSI_ERR_NO_MEMORY;
Expand Down
12 changes: 6 additions & 6 deletions lib/ancestor_matcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ ancestor_matcher_print_state(ancestor_matcher_t *self, FILE *out)
}
fprintf(out, "\n");
}
tsk_blkalloc_print_state(&self->traceback_allocator, out);
tsi_blkalloc_print_state(&self->traceback_allocator, out);

/* ancestor_matcher_check_state(self); */
return 0;
Expand Down Expand Up @@ -132,7 +132,7 @@ ancestor_matcher_alloc(ancestor_matcher_t *self,
ret = TSI_ERR_NO_MEMORY;
goto out;
}
ret = tsk_blkalloc_init(&self->traceback_allocator, traceback_block_size);
ret = tsi_blkalloc_init(&self->traceback_allocator, traceback_block_size);
if (ret != 0) {
goto out;
}
Expand Down Expand Up @@ -165,7 +165,7 @@ ancestor_matcher_free(ancestor_matcher_t *self)
tsi_safe_free(self->output.left);
tsi_safe_free(self->output.right);
tsi_safe_free(self->output.parent);
tsk_blkalloc_free(&self->traceback_allocator);
tsi_blkalloc_free(&self->traceback_allocator);
return 0;
}

Expand Down Expand Up @@ -229,9 +229,9 @@ ancestor_matcher_store_traceback(ancestor_matcher_t *self, const tsk_id_t site_i
T[site_id].node = T[site_id - 1].node;
T[site_id].recombination_required = T[site_id - 1].recombination_required;
} else {
list_node = tsk_blkalloc_get(&self->traceback_allocator,
list_node = tsi_blkalloc_get(&self->traceback_allocator,
(size_t) num_likelihood_nodes * sizeof(tsk_id_t));
list_R = tsk_blkalloc_get(
list_R = tsi_blkalloc_get(
&self->traceback_allocator, (size_t) num_likelihood_nodes * sizeof(int8_t));
if (list_node == NULL || list_R == NULL) {
ret = TSI_ERR_NO_MEMORY;
Expand Down Expand Up @@ -554,7 +554,7 @@ ancestor_matcher_reset(ancestor_matcher_t *self)
assert(self->num_nodes <= self->max_nodes);

memset(self->allelic_state, 0xff, self->num_nodes * sizeof(*self->allelic_state));
ret = tsk_blkalloc_reset(&self->traceback_allocator);
ret = tsi_blkalloc_reset(&self->traceback_allocator);
if (ret != 0) {
goto out;
}
Expand Down
108 changes: 108 additions & 0 deletions lib/err.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,111 @@ tsi_strerror(int err)
}
return ret;
}

/* Temporary hack. See notes in err.h for why this code is here. */

#include <stdlib.h>
#include <stdio.h>

void
tsi_blkalloc_print_state(tsi_blkalloc_t *self, FILE *out)
{
fprintf(out, "Block allocator%p::\n", (void *) self);
fprintf(out, "\ttop = %lld\n", (long long) self->top);
fprintf(out, "\tchunk_size = %lld\n", (long long) self->chunk_size);
fprintf(out, "\tnum_chunks = %lld\n", (long long) self->num_chunks);
fprintf(out, "\ttotal_allocated = %lld\n", (long long) self->total_allocated);
fprintf(out, "\ttotal_size = %lld\n", (long long) self->total_size);
}

int TSK_WARN_UNUSED
tsi_blkalloc_reset(tsi_blkalloc_t *self)
{
int ret = 0;

self->top = 0;
self->current_chunk = 0;
self->total_allocated = 0;
return ret;
}

int TSK_WARN_UNUSED
tsi_blkalloc_init(tsi_blkalloc_t *self, size_t chunk_size)
{
int ret = 0;

tsk_memset(self, 0, sizeof(tsi_blkalloc_t));
if (chunk_size < 1) {
ret = TSK_ERR_BAD_PARAM_VALUE;
goto out;
}
self->chunk_size = chunk_size;
self->top = 0;
self->current_chunk = 0;
self->total_allocated = 0;
self->total_size = 0;
self->num_chunks = 0;
self->mem_chunks = malloc(sizeof(char *));
if (self->mem_chunks == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
self->mem_chunks[0] = malloc(chunk_size);
if (self->mem_chunks[0] == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
self->num_chunks = 1;
self->total_size = chunk_size + sizeof(void *);
out:
return ret;
}

void *TSK_WARN_UNUSED
tsi_blkalloc_get(tsi_blkalloc_t *self, size_t size)
{
void *ret = NULL;
void *p;

if (size > self->chunk_size) {
goto out;
}
if ((self->top + size) > self->chunk_size) {
if (self->current_chunk == (self->num_chunks - 1)) {
p = realloc(self->mem_chunks, (self->num_chunks + 1) * sizeof(void *));
if (p == NULL) {
goto out;
}
self->mem_chunks = p;
p = malloc(self->chunk_size);
if (p == NULL) {
goto out;
}
self->mem_chunks[self->num_chunks] = p;
self->num_chunks++;
self->total_size += self->chunk_size + sizeof(void *);
}
self->current_chunk++;
self->top = 0;
}
ret = self->mem_chunks[self->current_chunk] + self->top;
self->top += size;
self->total_allocated += size;
out:
return ret;
}

void
tsi_blkalloc_free(tsi_blkalloc_t *self)
{
size_t j;

for (j = 0; j < self->num_chunks; j++) {
if (self->mem_chunks[j] != NULL) {
free(self->mem_chunks[j]);
}
}
if (self->mem_chunks != NULL) {
free(self->mem_chunks);
}
}
25 changes: 25 additions & 0 deletions lib/err.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,29 @@

const char *tsi_strerror(int err);

/* FIXME! Including a custom version of the tsk_blkalloc struct here so that
* we can use c11 atomics on the total_size attribute. Including it in this
* file and err.c as this is the least noisy place to put it, for now
* See https://github.com/jeromekelleher/sc2ts/issues/381 for reasoning.
*/

#include "tskit.h"
#include <stdatomic.h>

typedef struct {
size_t chunk_size; /* number of bytes per chunk */
size_t top; /* the offset of the next available byte in the current chunk */
size_t current_chunk; /* the index of the chunk currently being used */
_Atomic size_t total_size; /* the total number of bytes allocated + overhead. */
size_t total_allocated; /* the total number of bytes allocated. */
size_t num_chunks; /* the number of memory chunks. */
char **mem_chunks; /* the memory chunks */
} tsi_blkalloc_t;

extern void tsi_blkalloc_print_state(tsi_blkalloc_t *self, FILE *out);
extern int tsi_blkalloc_reset(tsi_blkalloc_t *self);
extern int tsi_blkalloc_init(tsi_blkalloc_t *self, size_t chunk_size);
extern void *tsi_blkalloc_get(tsi_blkalloc_t *self, size_t size);
extern void tsi_blkalloc_free(tsi_blkalloc_t *self);

#endif /*__ERR_H__*/
2 changes: 1 addition & 1 deletion lib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ m_dep = cc.find_library('m', required : false)
cunit_dep = dependency('cunit')

extra_c_args = [
'-std=c99', '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W',
'-std=c11', '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W',
'-Wmissing-prototypes', '-Wstrict-prototypes',
'-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align',
'-Wcast-qual', '-Wwrite-strings', '-Wnested-externs',
Expand Down
10 changes: 5 additions & 5 deletions lib/tree_sequence_builder.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ tree_sequence_builder_print_state(tree_sequence_builder_t *self, FILE *out)
out, "%d\t%d\t%d\t%d\n", edge->left, edge->right, edge->parent, edge->child);
}

fprintf(out, "tsk_blkalloc = \n");
tsk_blkalloc_print_state(&self->tsk_blkalloc, out);
fprintf(out, "tsi_blkalloc = \n");
tsi_blkalloc_print_state(&self->tsi_blkalloc, out);
fprintf(out, "avl_node_heap = \n");
object_heap_print_state(&self->avl_node_heap, out);
fprintf(out, "edge_heap = \n");
Expand Down Expand Up @@ -244,7 +244,7 @@ tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites,
if (ret != 0) {
goto out;
}
ret = tsk_blkalloc_init(&self->tsk_blkalloc,
ret = tsi_blkalloc_init(&self->tsi_blkalloc,
TSK_MAX(8192, num_sites * sizeof(mutation_list_node_t) / 4));
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -278,7 +278,7 @@ tree_sequence_builder_free(tree_sequence_builder_t *self)
tsi_safe_free(self->sites.num_alleles);
tsi_safe_free(self->left_index_edges);
tsi_safe_free(self->right_index_edges);
tsk_blkalloc_free(&self->tsk_blkalloc);
tsi_blkalloc_free(&self->tsi_blkalloc);
object_heap_free(&self->avl_node_heap);
object_heap_free(&self->edge_heap);
return 0;
Expand Down Expand Up @@ -418,7 +418,7 @@ tree_sequence_builder_add_mutation(
}
}

list_node = tsk_blkalloc_get(&self->tsk_blkalloc, sizeof(mutation_list_node_t));
list_node = tsi_blkalloc_get(&self->tsi_blkalloc, sizeof(mutation_list_node_t));
if (list_node == NULL) {
ret = TSI_ERR_NO_MEMORY;
goto out;
Expand Down
6 changes: 3 additions & 3 deletions lib/tsinfer.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ typedef struct {
int flags;
site_t *sites;
avl_tree_t time_map;
tsk_blkalloc_t allocator;
tsi_blkalloc_t allocator;
ancestor_descriptor_t *descriptors;
} ancestor_builder_t;

Expand Down Expand Up @@ -141,7 +141,7 @@ typedef struct {
size_t num_nodes;
size_t num_match_nodes;
size_t num_mutations;
tsk_blkalloc_t tsk_blkalloc;
tsi_blkalloc_t tsi_blkalloc;
object_heap_t avl_node_heap;
object_heap_t edge_heap;
/* Dynamic edge indexes used for tree generation and path compression. The
Expand Down Expand Up @@ -184,7 +184,7 @@ typedef struct {
tsk_id_t *likelihood_nodes_tmp;
tsk_id_t *likelihood_nodes;
node_state_list_t *traceback;
tsk_blkalloc_t traceback_allocator;
tsi_blkalloc_t traceback_allocator;
size_t total_traceback_size;
struct {
tsk_id_t *left;
Expand Down
9 changes: 7 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,17 @@
+ [os.path.join(kasdir, f) for f in kas_source_files]
)

libraries = ["Advapi32"] if IS_WINDOWS else []
if IS_WINDOWS:
libraries = ["Advapi32"]
extra_compile_args = ["/std:c11"]
else:
libraries = []
extra_compile_args = ["-std=c11"]

_tsinfer_module = Extension(
"_tsinfer",
sources=sources,
extra_compile_args=["-std=c99"],
extra_compile_args=extra_compile_args,
libraries=libraries,
undef_macros=["NDEBUG"],
include_dirs=includes + [numpy.get_include()],
Expand Down

0 comments on commit 57f9209

Please sign in to comment.