From a59b69ceb6d6da02ab31ef651fb366eb88d3f499 Mon Sep 17 00:00:00 2001 From: Marcus Harrison Date: Sun, 4 Feb 2024 14:00:04 +0100 Subject: [PATCH 1/2] Add custom buffer constructor --- src/count_min_sketch.c | 39 +++++++++++++++++++++++++++++------ src/count_min_sketch.h | 27 ++++++++++++++++++++++++ tests/count_min_sketch_test.c | 26 +++++++++++++++++++++++ 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/src/count_min_sketch.c b/src/count_min_sketch.c index 109a6ce..bfe66c7 100644 --- a/src/count_min_sketch.c +++ b/src/count_min_sketch.c @@ -29,12 +29,26 @@ static int __compare(const void * a, const void * b); static int32_t __safe_add(int32_t a, uint32_t b); static int32_t __safe_sub(int32_t a, uint32_t b); static int32_t __safe_add_2(int32_t a, int32_t b); +static int __valid_construct(unsigned int width, unsigned int depth); // Compatibility with non-clang compilers #ifndef __has_builtin #define __has_builtin(x) 0 #endif +// We could probably re-write the other constructors in terms of this one +int cms_init_custom_buffer_alt(CountMinSketch* cms, unsigned int width, unsigned int depth, int32_t* buffer, cms_hash_function hash_function) { + if (!__valid_construct(width, depth)) { + // Should we really be printing from library code? + fprintf(stderr, "Unable to initialize the count-min sketch since either width or depth is 0!\n"); + return CMS_ERROR; + } + double confidence = 1 - (1 / pow(2, depth)); + double error_rate = 2 / (double) width; + cms->managed = 0; + cms->bins = buffer; + return __setup_cms(cms, width, depth, error_rate, confidence, hash_function); +} int cms_init_optimal_alt(CountMinSketch* cms, double error_rate, double confidence, cms_hash_function hash_function) { /* https://cs.stackexchange.com/q/44803 */ @@ -44,21 +58,24 @@ int cms_init_optimal_alt(CountMinSketch* cms, double error_rate, double confiden } uint32_t width = ceil(2 / error_rate); uint32_t depth = ceil((-1 * log(1 - confidence)) / LOG_TWO); + cms->managed = 1; return __setup_cms(cms, width, depth, error_rate, confidence, hash_function); } int cms_init_alt(CountMinSketch* cms, uint32_t width, uint32_t depth, cms_hash_function hash_function) { - if (depth < 1 || width < 1) { + if (!__valid_construct(width, depth)) { fprintf(stderr, "Unable to initialize the count-min sketch since either width or depth is 0!\n"); return CMS_ERROR; } double confidence = 1 - (1 / pow(2, depth)); double error_rate = 2 / (double) width; + cms->managed = 1; return __setup_cms(cms, width, depth, error_rate, confidence, hash_function); } int cms_destroy(CountMinSketch* cms) { - free(cms->bins); + if (cms->managed) + free(cms->bins); cms->width = 0; cms->depth = 0; cms->confidence = 0.0; @@ -245,6 +262,7 @@ int cms_merge(CountMinSketch* cms, int num_sketches, ...) { /* Merge */ va_start(ap, num_sketches); base = (CountMinSketch *) va_arg(ap, CountMinSketch *); + cms->managed = 1; if (CMS_ERROR == __setup_cms(cms, base->width, base->depth, base->error_rate, base->confidence, base->hash_function)) { va_end(ap); return CMS_ERROR; @@ -281,18 +299,27 @@ int cms_merge_into(CountMinSketch* cms, int num_sketches, ...) { /******************************************************************************* * PRIVATE FUNCTIONS *******************************************************************************/ + +static int __valid_construct(unsigned int width, unsigned int depth) +{ + return width > 0 && depth > 0; +} + static int __setup_cms(CountMinSketch* cms, unsigned int width, unsigned int depth, double error_rate, double confidence, cms_hash_function hash_function) { cms->width = width; cms->depth = depth; cms->confidence = confidence; cms->error_rate = error_rate; cms->elements_added = 0; - cms->bins = (int32_t*)calloc((width * depth), sizeof(int32_t)); cms->hash_function = (hash_function == NULL) ? __default_hash : hash_function; - if (NULL == cms->bins) { - fprintf(stderr, "Failed to allocate %zu bytes for bins!", ((width * depth) * sizeof(int32_t))); - return CMS_ERROR; + if (cms->managed) { + cms->bins = (int32_t*)calloc((width * depth), sizeof(int32_t)); + + if (NULL == cms->bins) { + fprintf(stderr, "Failed to allocate %zu bytes for bins!", ((width * depth) * sizeof(int32_t))); + return CMS_ERROR; + } } return CMS_SUCCESS; } diff --git a/src/count_min_sketch.h b/src/count_min_sketch.h index d594009..bca2cc0 100644 --- a/src/count_min_sketch.h +++ b/src/count_min_sketch.h @@ -42,6 +42,7 @@ typedef struct { double error_rate; cms_hash_function hash_function; int32_t* bins; + uint32_t managed: 1; } CountMinSketch, count_min_sketch; @@ -69,6 +70,32 @@ static __inline__ int cms_init_optimal(CountMinSketch* cms, float error_rate, fl return cms_init_optimal_alt(cms, error_rate, confidence, NULL); } +/* Initialize the count-min sketch based on user defined width and depth + This version takes a custom buffer, which **must** be of at least + sizeof(int32_t) * width * depth size + width and depth must be positive integers + + Passing to cms_destroy() is safe; the buffer remains untouched + You must manage your own buffer as required + For advanced users only + + Returns: + CMS_SUCCESS + CMS_ERROR - when width or depth are 0 or negative */ +int cms_init_custom_buffer_alt( + CountMinSketch* cms, + unsigned int width, + unsigned int depth, + int32_t* buffer, + cms_hash_function hash_function); +static __inline__ int cms_init_custom_buffer( + CountMinSketch* cms, + unsigned int width, + unsigned int depth, + int32_t* buffer) { + return cms_init_custom_buffer_alt(cms, width, depth, buffer, NULL); +} + /* Free all memory used in the count-min sketch diff --git a/tests/count_min_sketch_test.c b/tests/count_min_sketch_test.c index eff8f40..fe3c39b 100644 --- a/tests/count_min_sketch_test.c +++ b/tests/count_min_sketch_test.c @@ -215,6 +215,32 @@ int main(int argc, char** argv) { cms_destroy(&cms); + printf("Count-Min Sketch: setup using custom buffer (unmanaged): "); + int32_t buffer[2000 * 17] = { 0 }; + cms_init_custom_buffer(&cms, 2000, 17, buffer); + if (!cms.managed) { + success_or_failure(0); + } else { + success_or_failure(1); + } + + printf("Count-Min Sketch: set up width and depth: "); + if (cms.width == 2000 && cms.depth == 17) { + success_or_failure(0); + } else { + success_or_failure(1); + } + + printf("Count-Min Sketch: buffer is correct: "); + if (cms.bins == buffer) { + success_or_failure(0); + } else { + success_or_failure(1); + } + + // If free() is called on buffer[][], this should segfault + cms_destroy(&cms); + printf("Count-Min Sketch: import: "); result = 0; result = cms_import(&cms, "./dist/test_export.cms"); From d76b55c321c1488539984ad7cbd230349e7a4a71 Mon Sep 17 00:00:00 2001 From: Marcus Harrison Date: Sun, 4 Feb 2024 14:55:40 +0100 Subject: [PATCH 2/2] Make sure bins are cleaned when loading from file --- src/count_min_sketch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/count_min_sketch.c b/src/count_min_sketch.c index bfe66c7..bc4be0e 100644 --- a/src/count_min_sketch.c +++ b/src/count_min_sketch.c @@ -358,6 +358,7 @@ static void __read_from_file(CountMinSketch* cms, FILE *fp, short on_disk, const rewind(fp); size_t length = cms->width * cms->depth; if (on_disk == 0) { + cms->managed = 1; cms->bins = (int32_t*)malloc(length * sizeof(int32_t)); size_t read = fread(cms->bins, sizeof(int32_t), length, fp); if (read != length) {