Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add custom buffer constructor #21

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions src/count_min_sketch.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,26 @@
static int32_t __safe_add(int32_t a, uint32_t b);
static int32_t __safe_sub(int32_t a, uint32_t b);
static int32_t __safe_add_2(int32_t a, int32_t b);
static int __valid_construct(unsigned int width, unsigned int depth);

// Compatibility with non-clang compilers
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif

// We could probably re-write the other constructors in terms of this one
int cms_init_custom_buffer_alt(CountMinSketch* cms, unsigned int width, unsigned int depth, int32_t* buffer, cms_hash_function hash_function) {
if (!__valid_construct(width, depth)) {

Check warning on line 41 in src/count_min_sketch.c

View check run for this annotation

Codecov / codecov/patch

src/count_min_sketch.c#L40-L41

Added lines #L40 - L41 were not covered by tests
// Should we really be printing from library code?
fprintf(stderr, "Unable to initialize the count-min sketch since either width or depth is 0!\n");
return CMS_ERROR;

Check warning on line 44 in src/count_min_sketch.c

View check run for this annotation

Codecov / codecov/patch

src/count_min_sketch.c#L43-L44

Added lines #L43 - L44 were not covered by tests
}
double confidence = 1 - (1 / pow(2, depth));
double error_rate = 2 / (double) width;
cms->managed = 0;
cms->bins = buffer;
return __setup_cms(cms, width, depth, error_rate, confidence, hash_function);

Check warning on line 50 in src/count_min_sketch.c

View check run for this annotation

Codecov / codecov/patch

src/count_min_sketch.c#L46-L50

Added lines #L46 - L50 were not covered by tests
}

int cms_init_optimal_alt(CountMinSketch* cms, double error_rate, double confidence, cms_hash_function hash_function) {
/* https://cs.stackexchange.com/q/44803 */
Expand All @@ -44,21 +58,24 @@
}
uint32_t width = ceil(2 / error_rate);
uint32_t depth = ceil((-1 * log(1 - confidence)) / LOG_TWO);
cms->managed = 1;
return __setup_cms(cms, width, depth, error_rate, confidence, hash_function);
}

int cms_init_alt(CountMinSketch* cms, uint32_t width, uint32_t depth, cms_hash_function hash_function) {
if (depth < 1 || width < 1) {
if (!__valid_construct(width, depth)) {
fprintf(stderr, "Unable to initialize the count-min sketch since either width or depth is 0!\n");
return CMS_ERROR;
}
double confidence = 1 - (1 / pow(2, depth));
double error_rate = 2 / (double) width;
cms->managed = 1;
return __setup_cms(cms, width, depth, error_rate, confidence, hash_function);
}

int cms_destroy(CountMinSketch* cms) {
free(cms->bins);
if (cms->managed)
free(cms->bins);
cms->width = 0;
cms->depth = 0;
cms->confidence = 0.0;
Expand Down Expand Up @@ -245,6 +262,7 @@
/* Merge */
va_start(ap, num_sketches);
base = (CountMinSketch *) va_arg(ap, CountMinSketch *);
cms->managed = 1;
if (CMS_ERROR == __setup_cms(cms, base->width, base->depth, base->error_rate, base->confidence, base->hash_function)) {
va_end(ap);
return CMS_ERROR;
Expand Down Expand Up @@ -281,18 +299,27 @@
/*******************************************************************************
* PRIVATE FUNCTIONS
*******************************************************************************/

static int __valid_construct(unsigned int width, unsigned int depth)
{
return width > 0 && depth > 0;
}

static int __setup_cms(CountMinSketch* cms, unsigned int width, unsigned int depth, double error_rate, double confidence, cms_hash_function hash_function) {
cms->width = width;
cms->depth = depth;
cms->confidence = confidence;
cms->error_rate = error_rate;
cms->elements_added = 0;
cms->bins = (int32_t*)calloc((width * depth), sizeof(int32_t));
cms->hash_function = (hash_function == NULL) ? __default_hash : hash_function;

if (NULL == cms->bins) {
fprintf(stderr, "Failed to allocate %zu bytes for bins!", ((width * depth) * sizeof(int32_t)));
return CMS_ERROR;
if (cms->managed) {
cms->bins = (int32_t*)calloc((width * depth), sizeof(int32_t));

if (NULL == cms->bins) {
fprintf(stderr, "Failed to allocate %zu bytes for bins!", ((width * depth) * sizeof(int32_t)));
return CMS_ERROR;

Check warning on line 321 in src/count_min_sketch.c

View check run for this annotation

Codecov / codecov/patch

src/count_min_sketch.c#L320-L321

Added lines #L320 - L321 were not covered by tests
}
}
return CMS_SUCCESS;
}
Expand Down Expand Up @@ -331,6 +358,7 @@
rewind(fp);
size_t length = cms->width * cms->depth;
if (on_disk == 0) {
cms->managed = 1;
cms->bins = (int32_t*)malloc(length * sizeof(int32_t));
size_t read = fread(cms->bins, sizeof(int32_t), length, fp);
if (read != length) {
Expand Down
27 changes: 27 additions & 0 deletions src/count_min_sketch.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ typedef struct {
double error_rate;
cms_hash_function hash_function;
int32_t* bins;
uint32_t managed: 1;
} CountMinSketch, count_min_sketch;


Expand Down Expand Up @@ -69,6 +70,32 @@ static __inline__ int cms_init_optimal(CountMinSketch* cms, float error_rate, fl
return cms_init_optimal_alt(cms, error_rate, confidence, NULL);
}

/* Initialize the count-min sketch based on user defined width and depth
This version takes a custom buffer, which **must** be of at least
sizeof(int32_t) * width * depth size
width and depth must be positive integers

Passing to cms_destroy() is safe; the buffer remains untouched
You must manage your own buffer as required
For advanced users only

Returns:
CMS_SUCCESS
CMS_ERROR - when width or depth are 0 or negative */
int cms_init_custom_buffer_alt(
CountMinSketch* cms,
unsigned int width,
unsigned int depth,
int32_t* buffer,
cms_hash_function hash_function);
static __inline__ int cms_init_custom_buffer(
CountMinSketch* cms,
unsigned int width,
unsigned int depth,
int32_t* buffer) {
return cms_init_custom_buffer_alt(cms, width, depth, buffer, NULL);
}


/* Free all memory used in the count-min sketch

Expand Down
26 changes: 26 additions & 0 deletions tests/count_min_sketch_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,32 @@ int main(int argc, char** argv) {

cms_destroy(&cms);

printf("Count-Min Sketch: setup using custom buffer (unmanaged): ");
int32_t buffer[2000 * 17] = { 0 };
cms_init_custom_buffer(&cms, 2000, 17, buffer);
if (!cms.managed) {
success_or_failure(0);
} else {
success_or_failure(1);
}

printf("Count-Min Sketch: set up width and depth: ");
if (cms.width == 2000 && cms.depth == 17) {
success_or_failure(0);
} else {
success_or_failure(1);
}

printf("Count-Min Sketch: buffer is correct: ");
if (cms.bins == buffer) {
success_or_failure(0);
} else {
success_or_failure(1);
}

// If free() is called on buffer[][], this should segfault
cms_destroy(&cms);

printf("Count-Min Sketch: import: ");
result = 0;
result = cms_import(&cms, "./dist/test_export.cms");
Expand Down
Loading