diff --git a/src/db.c b/src/db.c index c879b2ffb5..87977d3c56 100644 --- a/src/db.c +++ b/src/db.c @@ -190,7 +190,11 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) { return o; } -/* Add the key to the DB. It's up to the caller to increment the reference +/* Add the key to the DB. + * + * In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed. + * + * It's up to the caller to increment the reference * counter of the value if needed. * * If the update_if_existing argument is false, the program is aborted @@ -204,7 +208,6 @@ static void dbAddInternal(serverDb *db, robj *key, robj *val, int update_if_exis return; } serverAssertWithInfo(NULL, key, de != NULL); - kvstoreDictSetKey(db->keys, slot, de, sdsdup(key->ptr)); initObjectLRUOrLFU(val); kvstoreDictSetVal(db->keys, slot, de, val); signalKeyAsReady(db, key, val->type); @@ -241,15 +244,16 @@ int getKeySlot(sds key) { /* This is a special version of dbAdd() that is used only when loading * keys from the RDB file: the key is passed as an SDS string that is - * retained by the function (and not freed by the caller). + * copied by the function and freed by the caller. * * Moreover this function will not abort if the key is already busy, to * give more control to the caller, nor will signal the key as ready * since it is not useful in this context. * - * The function returns 1 if the key was added to the database, taking - * ownership of the SDS string, otherwise 0 is returned, and is up to the - * caller to free the SDS string. */ + * The function returns 1 if the key was added to the database, otherwise 0 is returned. + * + * In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed. + */ int dbAddRDBLoad(serverDb *db, sds key, robj *val) { int slot = getKeySlot(key); dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL); diff --git a/src/debug.c b/src/debug.c index c625ab5150..94890d4620 100644 --- a/src/debug.c +++ b/src/debug.c @@ -864,7 +864,7 @@ void debugCommand(client *c) { sds sizes = sdsempty(); sizes = sdscatprintf(sizes, "bits:%d ", (sizeof(void *) == 8) ? 64 : 32); sizes = sdscatprintf(sizes, "robj:%d ", (int)sizeof(robj)); - sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage()); + sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage(NULL)); sizes = sdscatprintf(sizes, "sdshdr5:%d ", (int)sizeof(struct sdshdr5)); sizes = sdscatprintf(sizes, "sdshdr8:%d ", (int)sizeof(struct sdshdr8)); sizes = sdscatprintf(sizes, "sdshdr16:%d ", (int)sizeof(struct sdshdr16)); diff --git a/src/defrag.c b/src/defrag.c index 2de1c061e8..5a54875864 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -41,6 +41,7 @@ typedef struct defragCtx { void *privdata; int slot; + void *aux; } defragCtx; typedef struct defragPubSubCtx { @@ -75,6 +76,36 @@ void *activeDefragAlloc(void *ptr) { return newptr; } +/* This method captures the expiry db dict entry which refers to data stored in keys db dict entry. */ +void defragEntryStartCbForKeys(void *ctx, void *oldptr) { + defragCtx *defragctx = (defragCtx *)ctx; + serverDb *db = defragctx->privdata; + sds oldsds = (sds)dictGetKey((dictEntry *)oldptr); + int slot = defragctx->slot; + if (kvstoreDictSize(db->expires, slot)) { + dictEntry *expire_de = kvstoreDictFind(db->expires, slot, oldsds); + defragctx->aux = expire_de; + } +} + +/* This method updates the key of expiry db dict entry. The key might be no longer valid + * as it could have been cleaned up during the defrag-realloc of the main dictionary. */ +void defragEntryFinishCbForKeys(void *ctx, void *newptr) { + defragCtx *defragctx = (defragCtx *)ctx; + dictEntry *expire_de = (dictEntry *)defragctx->aux; + /* Item doesn't have TTL associated to it. */ + if (!expire_de) return; + /* No reallocation happened. */ + if (!newptr) { + expire_de = NULL; + return; + } + serverDb *db = defragctx->privdata; + sds newsds = (sds)dictGetKey((dictEntry *)newptr); + int slot = defragctx->slot; + kvstoreDictSetKey(db->expires, slot, expire_de, newsds); +} + /*Defrag helper for sds strings * * returns NULL in case the allocation wasn't moved. @@ -650,25 +681,10 @@ void defragModule(serverDb *db, dictEntry *kde) { /* for each key we scan in the main dict, this function will attempt to defrag * all the various pointers it has. */ void defragKey(defragCtx *ctx, dictEntry *de) { - sds keysds = dictGetKey(de); - robj *newob, *ob; - unsigned char *newzl; - sds newsds; serverDb *db = ctx->privdata; int slot = ctx->slot; - /* Try to defrag the key name. */ - newsds = activeDefragSds(keysds); - if (newsds) { - kvstoreDictSetKey(db->keys, slot, de, newsds); - if (kvstoreDictSize(db->expires, slot)) { - /* We can't search in db->expires for that key after we've released - * the pointer it holds, since it won't be able to do the string - * compare, but we can find the entry using key hash and pointer. */ - uint64_t hash = kvstoreGetHash(db->expires, newsds); - dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash); - if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds); - } - } + robj *newob, *ob; + unsigned char *newzl; /* Try to defrag robj and / or string value. */ ob = dictGetVal(de); @@ -984,7 +1000,9 @@ void activeDefragCycle(void) { endtime = start + timelimit; latencyStartMonitor(latency); - dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; + dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc, + .defragEntryStartCb = defragEntryStartCbForKeys, + .defragEntryFinishCb = defragEntryFinishCbForKeys}; do { /* if we're not continuing a scan from the last call or loop, start a new one */ if (!defrag_stage && !defrag_cursor && (slot < 0)) { diff --git a/src/dict.c b/src/dict.c index bc92d49564..b6a06eb36a 100644 --- a/src/dict.c +++ b/src/dict.c @@ -35,6 +35,7 @@ #include "fmacros.h" +#include #include #include #include @@ -48,6 +49,10 @@ #include "serverassert.h" #include "monotonic.h" +#ifndef static_assert +#define static_assert(expr, lit) _Static_assert(expr, lit) +#endif + #define UNUSED(V) ((void)V) /* Using dictSetResizeEnabled() we make possible to disable @@ -76,6 +81,33 @@ struct dictEntry { struct dictEntry *next; /* Next entry in the same hash bucket. */ }; +typedef struct { + union { + void *val; + uint64_t u64; + int64_t s64; + double d; + } v; + struct dictEntry *next; /* Next entry in the same hash bucket. */ + uint8_t key_header_size; /* offset into key_buf where the key is located at. */ + unsigned char key_buf[]; /* buffer with embedded key. */ +} embeddedDictEntry; + +/* Validation and helper for `embeddedDictEntry` */ + +static_assert(offsetof(embeddedDictEntry, v) == 0, "unexpected field offset"); +static_assert(offsetof(embeddedDictEntry, next) == sizeof(double), "unexpected field offset"); +static_assert(offsetof(embeddedDictEntry, key_header_size) == sizeof(double) + sizeof(void *), + "unexpected field offset"); +/* key_buf is located after a union with a double value `v.d`, a pointer `next` and uint8_t field `key_header_size` */ +static_assert(offsetof(embeddedDictEntry, key_buf) == sizeof(double) + sizeof(void *) + sizeof(uint8_t), + "unexpected field offset"); + +/* The minimum amount of bytes required for embedded dict entry. */ +static inline size_t compactSizeEmbeddedDictEntry(void) { + return offsetof(embeddedDictEntry, key_buf); +} + typedef struct { void *key; dictEntry *next; @@ -91,6 +123,19 @@ static dictEntry *dictGetNext(const dictEntry *de); static dictEntry **dictGetNextRef(dictEntry *de); static void dictSetNext(dictEntry *de, dictEntry *next); +/* -------------------------- Utility functions -------------------------------- */ + +/* Validates dict type members dependencies. */ +static inline void validateDictType(dictType *type) { + if (type->embedded_entry) { + assert(type->embedKey); + assert(!type->keyDup); + assert(!type->keyDestructor); + } else { + assert(!type->embedKey); + } +} + /* -------------------------- hash functions -------------------------------- */ static uint8_t dict_hash_function_seed[16]; @@ -126,6 +171,8 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) { #define ENTRY_PTR_MASK 7 /* 111 */ #define ENTRY_PTR_NORMAL 0 /* 000 */ #define ENTRY_PTR_NO_VALUE 2 /* 010 */ +#define ENTRY_PTR_EMBEDDED 4 /* 100 */ +/* ENTRY_PTR_IS_KEY xx1 */ /* Returns 1 if the entry pointer is a pointer to a key, rather than to an * allocated entry. Returns 0 otherwise. */ @@ -145,12 +192,9 @@ static inline int entryIsNoValue(const dictEntry *de) { return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NO_VALUE; } -/* Creates an entry without a value field. */ -static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) { - dictEntryNoValue *entry = zmalloc(sizeof(*entry)); - entry->key = key; - entry->next = next; - return (dictEntry *)(void *)((uintptr_t)(void *)entry | ENTRY_PTR_NO_VALUE); + +static inline int entryIsEmbedded(const dictEntry *de) { + return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_EMBEDDED; } static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) { @@ -163,15 +207,40 @@ static inline void *decodeMaskedPtr(const dictEntry *de) { return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK); } +/* Creates an entry without a value field. */ +static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) { + dictEntryNoValue *entry = zmalloc(sizeof(*entry)); + entry->key = key; + entry->next = next; + return encodeMaskedPtr(entry, ENTRY_PTR_NO_VALUE); +} + +static inline dictEntry *createEmbeddedEntry(void *key, dictEntry *next, dictType *dt) { + size_t key_len = dt->embedKey(NULL, 0, key, NULL); + embeddedDictEntry *entry = zmalloc(compactSizeEmbeddedDictEntry() + key_len); + dt->embedKey(entry->key_buf, key_len, key, &entry->key_header_size); + entry->next = next; + return encodeMaskedPtr(entry, ENTRY_PTR_EMBEDDED); +} + +static inline void *getEmbeddedKey(const dictEntry *de) { + embeddedDictEntry *entry = (embeddedDictEntry *)decodeMaskedPtr(de); + return &entry->key_buf[entry->key_header_size]; +} + /* Decodes the pointer to an entry without value, when you know it is an entry * without value. Hint: Use entryIsNoValue to check. */ static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) { return decodeMaskedPtr(de); } +static inline embeddedDictEntry *decodeEmbeddedEntry(const dictEntry *de) { + return decodeMaskedPtr(de); +} + /* Returns 1 if the entry has a value field and 0 otherwise. */ static inline int entryHasValue(const dictEntry *de) { - return entryIsNormal(de); + return entryIsNormal(de) || entryIsEmbedded(de); } /* ----------------------------- API implementation ------------------------- */ @@ -185,6 +254,7 @@ static void _dictReset(dict *d, int htidx) { /* Create a new hash table */ dict *dictCreate(dictType *type) { + validateDictType(type); size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes(NULL) : 0; dict *d = zmalloc(sizeof(*d) + metasize); if (metasize > 0) { @@ -473,6 +543,10 @@ int dictAdd(dict *d, void *key, void *val) { * with the existing entry if existing is not NULL. * * If key was added, the hash entry is returned to be manipulated by the caller. + * + * The dict handles `key` based on `dictType` during initialization: + * - If `dictType.embedded-entry` is 1, it clones the `key`. + * - Otherwise, it assumes ownership of the `key`. */ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { /* Get the position for the new key or NULL if the key already exists. */ @@ -511,6 +585,8 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) { /* Allocate an entry without value. */ entry = createEntryNoValue(key, *bucket); } + } else if (d->type->embedded_entry) { + entry = createEmbeddedEntry(key, *bucket, d->type); } else { /* Allocate the memory and store the new entry. * Insert the element in top, with the assumption that in a database @@ -658,6 +734,7 @@ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { if (he == NULL) return; dictFreeKey(d, he); dictFreeVal(d, he); + /* Clear the dictEntry */ if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); } @@ -804,7 +881,11 @@ void dictSetKey(dict *d, dictEntry *de, void *key) { void dictSetVal(dict *d, dictEntry *de, void *val) { UNUSED(d); assert(entryHasValue(de)); - de->v.val = val; + if (entryIsEmbedded(de)) { + decodeEmbeddedEntry(de)->v.val = val; + } else { + de->v.val = val; + } } void dictSetSignedIntegerVal(dictEntry *de, int64_t val) { @@ -840,11 +921,15 @@ double dictIncrDoubleVal(dictEntry *de, double val) { void *dictGetKey(const dictEntry *de) { if (entryIsKey(de)) return (void *)de; if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key; + if (entryIsEmbedded(de)) return getEmbeddedKey(de); return de->key; } void *dictGetVal(const dictEntry *de) { assert(entryHasValue(de)); + if (entryIsEmbedded(de)) { + return decodeEmbeddedEntry(de)->v.val; + } return de->v.val; } @@ -874,6 +959,7 @@ double *dictGetDoubleValPtr(dictEntry *de) { static dictEntry *dictGetNext(const dictEntry *de) { if (entryIsKey(de)) return NULL; /* there's no next */ if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next; + if (entryIsEmbedded(de)) return decodeEmbeddedEntry(de)->next; return de->next; } @@ -882,14 +968,16 @@ static dictEntry *dictGetNext(const dictEntry *de) { static dictEntry **dictGetNextRef(dictEntry *de) { if (entryIsKey(de)) return NULL; if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next; + if (entryIsEmbedded(de)) return &decodeEmbeddedEntry(de)->next; return &de->next; } static void dictSetNext(dictEntry *de, dictEntry *next) { assert(!entryIsKey(de)); if (entryIsNoValue(de)) { - dictEntryNoValue *entry = decodeEntryNoValue(de); - entry->next = next; + decodeEntryNoValue(de)->next = next; + } else if (entryIsEmbedded(de)) { + decodeEmbeddedEntry(de)->next = next; } else { de->next = next; } @@ -901,8 +989,20 @@ size_t dictMemUsage(const dict *d) { return dictSize(d) * sizeof(dictEntry) + dictBuckets(d) * sizeof(dictEntry *); } -size_t dictEntryMemUsage(void) { - return sizeof(dictEntry); +/* Returns the memory usage in bytes of dictEntry based on the type. if `de` is NULL, return the size of + * regular dict entry else return based on the type. */ +size_t dictEntryMemUsage(dictEntry *de) { + if (de == NULL || entryIsNormal(de)) + return sizeof(dictEntry); + else if (entryIsKey(de)) + return 0; + else if (entryIsNoValue(de)) + return sizeof(dictEntryNoValue); + else if (entryIsEmbedded(de)) + return zmalloc_size(decodeEmbeddedEntry(de)); + else + assert("Entry type not supported"); + return 0; } /* A fingerprint is a 64 bit number that represents the state of the dictionary @@ -1172,7 +1272,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { /* Reallocate the dictEntry, key and value allocations in a bucket using the * provided allocation functions in order to defrag them. */ -static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns) { +static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns, void *privdata) { dictDefragAllocFunction *defragalloc = defragfns->defragAlloc; dictDefragAllocFunction *defragkey = defragfns->defragKey; dictDefragAllocFunction *defragval = defragfns->defragVal; @@ -1190,6 +1290,17 @@ static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragf entry = newentry; } if (newkey) entry->key = newkey; + } else if (entryIsEmbedded(de)) { + defragfns->defragEntryStartCb(privdata, de); + embeddedDictEntry *entry = decodeEmbeddedEntry(de), *newentry; + if ((newentry = defragalloc(entry))) { + newde = encodeMaskedPtr(newentry, ENTRY_PTR_EMBEDDED); + entry = newentry; + defragfns->defragEntryFinishCb(privdata, newde); + } else { + defragfns->defragEntryFinishCb(privdata, NULL); + } + if (newval) entry->v.val = newval; } else { assert(entryIsNormal(de)); newde = defragalloc(de); @@ -1353,7 +1464,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio /* Emit entries at cursor */ if (defragfns) { - dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns); + dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata); } de = d->ht_table[htidx0][v & m0]; while (de) { @@ -1386,7 +1497,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio /* Emit entries at cursor */ if (defragfns) { - dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns); + dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata); } de = d->ht_table[htidx0][v & m0]; while (de) { @@ -1400,7 +1511,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio do { /* Emit entries at cursor */ if (defragfns) { - dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns); + dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns, privdata); } de = d->ht_table[htidx1][v & m1]; while (de) { @@ -1573,29 +1684,6 @@ uint64_t dictGetHash(dict *d, const void *key) { return dictHashKey(d, key); } -/* Finds the dictEntry using pointer and pre-calculated hash. - * oldkey is a dead pointer and should not be accessed. - * the hash value should be provided using dictGetHash. - * no string / key comparison is performed. - * return value is a pointer to the dictEntry if found, or NULL if not found. */ -dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) { - dictEntry *he; - unsigned long idx, table; - - if (dictSize(d) == 0) return NULL; /* dict is empty */ - for (table = 0; table <= 1; table++) { - idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); - if (table == 0 && (long)idx < d->rehashidx) continue; - he = d->ht_table[table][idx]; - while (he) { - if (oldptr == dictGetKey(he)) return he; - he = dictGetNext(he); - } - if (!dictIsRehashing(d)) return NULL; - } - return NULL; -} - /* Provides the old and new ht size for a given dictionary during rehashing. This method * should only be invoked during initialization/rehashing. */ void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) { diff --git a/src/dict.h b/src/dict.h index 723e5a54c2..a7c5c71826 100644 --- a/src/dict.h +++ b/src/dict.h @@ -66,6 +66,10 @@ typedef struct dictType { /* Allow a dict to carry extra caller-defined metadata. The * extra memory is initialized to 0 when a dict is allocated. */ size_t (*dictMetadataBytes)(dict *d); + /* Method for copying a given key into a buffer of buf_len. Also used for + * computing the length of the key + header when buf is NULL. */ + size_t (*embedKey)(unsigned char *buf, size_t buf_len, const void *key, unsigned char *header_size); + /* Data */ void *userdata; @@ -80,8 +84,9 @@ typedef struct dictType { * enables one more optimization: to store a key without an allocated * dictEntry. */ unsigned int keys_are_odd : 1; - /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that - * flag is set. */ + /* If embedded_entry flag is set, it indicates that a copy of the key is created and the key is embedded + * as part of the dict entry. */ + unsigned int embedded_entry : 1; } dictType; #define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1 << (exp)) @@ -127,10 +132,13 @@ typedef struct dictStats { typedef void(dictScanFunction)(void *privdata, const dictEntry *de); typedef void *(dictDefragAllocFunction)(void *ptr); +typedef void(dictDefragEntryCb)(void *privdata, void *ptr); typedef struct { - dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */ - dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */ - dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */ + dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */ + dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */ + dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */ + dictDefragEntryCb *defragEntryStartCb; /* Callback invoked prior to the start of defrag of dictEntry. */ + dictDefragEntryCb *defragEntryFinishCb; /* Callback invoked after the defrag of dictEntry is tried. */ } dictDefragFunctions; /* This is the initial size of every hash table */ @@ -212,7 +220,7 @@ uint64_t dictGetUnsignedIntegerVal(const dictEntry *de); double dictGetDoubleVal(const dictEntry *de); double *dictGetDoubleValPtr(dictEntry *de); size_t dictMemUsage(const dict *d); -size_t dictEntryMemUsage(void); +size_t dictEntryMemUsage(dictEntry *de); dictIterator *dictGetIterator(dict *d); dictIterator *dictGetSafeIterator(dict *d); void dictInitIterator(dictIterator *iter, dict *d); @@ -236,7 +244,6 @@ unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *pri unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata); uint64_t dictGetHash(dict *d, const void *key); -dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash); void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size); size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full); diff --git a/src/kvstore.c b/src/kvstore.c index a43b72e1e1..16cc8e4822 100644 --- a/src/kvstore.c +++ b/src/kvstore.c @@ -241,7 +241,12 @@ static size_t kvstoreDictMetadataSize(dict *d) { /* Create an array of dictionaries * num_dicts_bits is the log2 of the amount of dictionaries needed (e.g. 0 for 1 dict, - * 3 for 8 dicts, etc.) */ + * 3 for 8 dicts, etc.) + * + * The kvstore handles `key` based on `dictType` during initialization: + * - If `dictType.embedded-entry` is 1, it clones the `key`. + * - Otherwise, it assumes ownership of the `key`. + */ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) { /* We can't support more than 2^16 dicts because we want to save 48 bits * for the dict cursor, see kvstoreScan */ @@ -340,7 +345,7 @@ size_t kvstoreMemUsage(kvstore *kvs) { size_t mem = sizeof(*kvs); unsigned long long keys_count = kvstoreSize(kvs); - mem += keys_count * dictEntryMemUsage() + kvstoreBuckets(kvs) * sizeof(dictEntry *) + + mem += keys_count * dictEntryMemUsage(NULL) + kvstoreBuckets(kvs) * sizeof(dictEntry *) + kvs->allocated_dicts * (sizeof(dict) + kvstoreDictMetadataSize(NULL)); /* Values are dict* shared with kvs->dicts */ @@ -717,12 +722,6 @@ dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx) { return dictGetFairRandomKey(d); } -dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash) { - dict *d = kvstoreGetDict(kvs, didx); - if (!d) return NULL; - return dictFindEntryByPtrAndHash(d, oldptr, hash); -} - unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count) { dict *d = kvstoreGetDict(kvs, didx); if (!d) return 0; @@ -776,6 +775,17 @@ dictEntry *kvstoreDictFind(kvstore *kvs, int didx, void *key) { return dictFind(d, key); } +/* + * The kvstore handles `key` based on `dictType` during initialization: + * - If `dictType.embedded-entry` is 1, it clones the `key`. + * - Otherwise, it assumes ownership of the `key`. + * The caller must ensure the `key` is properly freed. + * + * kvstore current usage: + * + * 1. keyspace (db.keys) kvstore - creates a copy of the key. + * 2. expiry (db.expires), pubsub_channels and pubsubshard_channels kvstore - takes ownership of the key. + */ dictEntry *kvstoreDictAddRaw(kvstore *kvs, int didx, void *key, dictEntry **existing) { dict *d = createDictIfNeeded(kvs, didx); dictEntry *ret = dictAddRaw(d, key, existing); diff --git a/src/kvstore.h b/src/kvstore.h index e7e21f8aa9..a94f366b6b 100644 --- a/src/kvstore.h +++ b/src/kvstore.h @@ -58,7 +58,6 @@ void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id); dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di); dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx); dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx); -dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash); unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count); int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size); unsigned long kvstoreDictScanDefrag(kvstore *kvs, diff --git a/src/object.c b/src/object.c index 73c3de55dd..b366984d06 100644 --- a/src/object.c +++ b/src/object.c @@ -1010,7 +1010,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { asize = sizeof(*o) + sizeof(dict) + (sizeof(struct dictEntry *) * dictBuckets(d)); while ((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize += dictEntryMemUsage() + sdsZmallocSize(ele); + elesize += dictEntryMemUsage(de) + sdsZmallocSize(ele); samples++; } dictReleaseIterator(di); @@ -1033,7 +1033,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { (sizeof(struct dictEntry *) * dictBuckets(d)) + zmalloc_size(zsl->header); while (znode != NULL && samples < sample_size) { elesize += sdsZmallocSize(znode->ele); - elesize += dictEntryMemUsage() + zmalloc_size(znode); + elesize += dictEntryMemUsage(NULL) + zmalloc_size(znode); samples++; znode = znode->level[0].forward; } @@ -1052,7 +1052,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { ele = dictGetKey(de); ele2 = dictGetVal(de); elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2); - elesize += dictEntryMemUsage(); + elesize += dictEntryMemUsage(de); samples++; } dictReleaseIterator(di); @@ -1552,8 +1552,7 @@ NULL return; } size_t usage = objectComputeSize(c->argv[2], dictGetVal(de), samples, c->db->id); - usage += sdsZmallocSize(dictGetKey(de)); - usage += dictEntryMemUsage(); + usage += dictEntryMemUsage(de); addReplyLongLong(c, usage); } else if (!strcasecmp(c->argv[1]->ptr, "stats") && c->argc == 2) { struct serverMemOverhead *mh = getMemoryOverheadData(); diff --git a/src/rdb.c b/src/rdb.c index 07fc70c16d..ad009005dd 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -3324,6 +3324,9 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin /* call key space notification on key loaded for modules only */ moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id); + + /* Release key (sds), dictEntry stores a copy of it in embedded data */ + sdsfree(key); } /* Loading the database more slowly is useful in order to test diff --git a/src/sds.c b/src/sds.c index 1c0ddd559d..ba3362e88a 100644 --- a/src/sds.c +++ b/src/sds.c @@ -192,6 +192,25 @@ sds sdsdup(const sds s) { return sdsnewlen(s, sdslen(s)); } +/* + * This method returns the minimum amount of bytes required to store the sds (header + data + NULL terminator). + */ +static inline size_t sdsminlen(sds s) { + return sdslen(s) + sdsHdrSize(s[-1]) + 1; +} + +/* This method copies the sds `s` into `buf` which is the target character buffer. */ +size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, sds s, uint8_t *hdr_size) { + size_t required_keylen = sdsminlen(s); + if (buf == NULL) { + return required_keylen; + } + assert(buf_len >= required_keylen); + memcpy(buf, sdsAllocPtr(s), required_keylen); + *hdr_size = sdsHdrSize(s[-1]); + return required_keylen; +} + /* Free an sds string. No operation is performed if 's' is NULL. */ void sdsfree(sds s) { if (s == NULL) return; diff --git a/src/sds.h b/src/sds.h index 20d598829a..a12b8dd89e 100644 --- a/src/sds.h +++ b/src/sds.h @@ -182,6 +182,7 @@ sds sdstrynewlen(const void *init, size_t initlen); sds sdsnew(const char *init); sds sdsempty(void); sds sdsdup(const sds s); +size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, sds s, uint8_t *hdr_size); void sdsfree(sds s); sds sdsgrowzero(sds s, size_t len); sds sdscatlen(sds s, const void *t, size_t len); diff --git a/src/server.c b/src/server.c index ee1bcd088f..ac1ccbfb74 100644 --- a/src/server.c +++ b/src/server.c @@ -289,6 +289,10 @@ int dictSdsKeyCompare(dict *d, const void *key1, const void *key2) { return memcmp(key1, key2, l1) == 0; } +size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint8_t *key_offset) { + return sdscopytobuffer(buf, buf_len, (sds)key, key_offset); +} + /* A case insensitive version used for the command lookup table and other * places where case insensitive non binary-safe comparison is needed. */ int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2) { @@ -468,9 +472,11 @@ dictType dbDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ dictSdsKeyCompare, /* key compare */ - dictSdsDestructor, /* key destructor */ + NULL, /* key is embedded in the dictEntry and freed internally */ dictObjectDestructor, /* val destructor */ dictResizeAllowed, /* allow to resize */ + .embedKey = dictSdsEmbedKey, + .embedded_entry = 1, }; /* Db->expires */