diff --git a/src/aof.c b/src/aof.c index 0fd3cf5c26..9b72ab78ae 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1888,30 +1888,28 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { } } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = o->ptr; - dictIterator *di = dictGetIterator(zs->dict); - dictEntry *de; - - while ((de = dictNext(di)) != NULL) { - sds ele = dictGetKey(de); - double *score = dictGetVal(de); - + hashtableIterator iter; + hashtableInitIterator(&iter, zs->ht); + zskiplistNode *node; + while (hashtableNext(&iter, (void **)&node)) { if (count == 0) { int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? AOF_REWRITE_ITEMS_PER_CMD : items; if (!rioWriteBulkCount(r, '*', 2 + cmd_items * 2) || !rioWriteBulkString(r, "ZADD", 4) || !rioWriteBulkObject(r, key)) { - dictReleaseIterator(di); + hashtableResetIterator(&iter); return 0; } } - if (!rioWriteBulkDouble(r, *score) || !rioWriteBulkString(r, ele, sdslen(ele))) { - dictReleaseIterator(di); + sds ele = node->ele; + if (!rioWriteBulkDouble(r, node->score) || !rioWriteBulkString(r, ele, sdslen(ele))) { + hashtableResetIterator(&iter); return 0; } if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; items--; } - dictReleaseIterator(di); + hashtableResetIterator(&iter); } else { serverPanic("Unknown sorted zset encoding"); } diff --git a/src/db.c b/src/db.c index 1223d00c8d..171888aa92 100644 --- a/src/db.c +++ b/src/db.c @@ -1003,13 +1003,6 @@ void dictScanCallback(void *privdata, const dictEntry *de) { if (!data->only_keys) { val = dictGetVal(de); } - } else if (o->type == OBJ_ZSET) { - key = sdsdup(keysds); - if (!data->only_keys) { - char buf[MAX_LONG_DOUBLE_CHARS]; - int len = ld2string(buf, sizeof(buf), *(double *)dictGetVal(de), LD_STR_AUTO); - val = sdsnewlen(buf, len); - } } else { serverPanic("Type not handled in dict SCAN callback."); } @@ -1020,13 +1013,26 @@ void dictScanCallback(void *privdata, const dictEntry *de) { void hashtableScanCallback(void *privdata, void *entry) { scanData *data = (scanData *)privdata; + sds val = NULL; + sds key = NULL; + robj *o = data->o; list *keys = data->keys; data->sampled++; - /* currently only implemented for SET scan */ - serverAssert(o && o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HASHTABLE); - sds key = (sds)entry; /* Specific for OBJ_SET */ + /* This callback is only used for scanning elements within a key (hash + * fields, set elements, etc.) so o must be set here. */ + serverAssert(o != NULL); + + /* get key */ + if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HASHTABLE) { + key = (sds)entry; + } else if (o->type == OBJ_ZSET) { + zskiplistNode *node = (zskiplistNode *)entry; + key = node->ele; + } else { + serverPanic("Type not handled in hashset SCAN callback."); + } /* Filter element if it does not match the pattern. */ if (data->pattern) { @@ -1035,7 +1041,23 @@ void hashtableScanCallback(void *privdata, void *entry) { } } + if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HASHTABLE) { + /* no value, key used by reference */ + } else if (o->type == OBJ_ZSET) { + /* zset data is copied */ + zskiplistNode *node = (zskiplistNode *)entry; + key = sdsdup(node->ele); + if (!data->only_keys) { + char buf[MAX_LONG_DOUBLE_CHARS]; + int len = ld2string(buf, sizeof(buf), node->score, LD_STR_AUTO); + val = sdsnewlen(buf, len); + } + } else { + serverPanic("Type not handled in hashset SCAN callback."); + } + listAddNodeTail(keys, key); + if (val) listAddNodeTail(keys, val); } /* Try to parse a SCAN cursor stored at object 'o': @@ -1183,7 +1205,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { shallow_copied_list_items = 1; } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = o->ptr; - dict_table = zs->dict; + hashtable_table = zs->ht; /* scanning ZSET allocates temporary strings even though it's a dict */ shallow_copied_list_items = 0; } diff --git a/src/debug.c b/src/debug.c index 4efe12e237..9e0dd5c411 100644 --- a/src/debug.c +++ b/src/debug.c @@ -205,20 +205,19 @@ void xorObjectDigest(serverDb *db, robj *keyobj, unsigned char *digest, robj *o) } } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = o->ptr; - dictIterator *di = dictGetIterator(zs->dict); - dictEntry *de; + hashtableIterator iter; + hashtableInitIterator(&iter, zs->ht); - while ((de = dictNext(di)) != NULL) { - sds sdsele = dictGetKey(de); - double *score = dictGetVal(de); - const int len = fpconv_dtoa(*score, buf); + zskiplistNode *node; + while (hashtableNext(&iter, (void **)&node)) { + const int len = fpconv_dtoa(node->score, buf); buf[len] = '\0'; memset(eledigest, 0, 20); - mixDigest(eledigest, sdsele, sdslen(sdsele)); + mixDigest(eledigest, node->ele, sdslen(node->ele)); mixDigest(eledigest, buf, strlen(buf)); xorDigest(digest, eledigest, 20); } - dictReleaseIterator(di); + hashtableResetIterator(&iter); } else { serverPanic("Unknown sorted set encoding"); } @@ -928,7 +927,7 @@ void debugCommand(client *c) { switch (o->encoding) { case OBJ_ENCODING_SKIPLIST: { zset *zs = o->ptr; - d = zs->dict; + ht = zs->ht; } break; case OBJ_ENCODING_HT: d = o->ptr; break; case OBJ_ENCODING_HASHTABLE: ht = o->ptr; break; diff --git a/src/defrag.c b/src/defrag.c index 6522d9aa7b..f38d7bb05f 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -297,54 +297,46 @@ static void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode } /* Defrag helper for sorted set. - * Update the robj pointer, defrag the skiplist struct and return the new score - * reference. We may not access oldele pointer (not even the pointer stored in - * the skiplist), as it was already freed. Newele may be null, in which case we - * only need to defrag the skiplist, but not update the obj pointer. - * When return value is non-NULL, it is the score reference that must be updated - * in the dict record. */ -static double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { - zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx; - int i; - sds ele = newele ? newele : oldele; - - /* find the skiplist node referring to the object that was moved, - * and all pointers that need to be updated if we'll end up moving the skiplist node. */ - x = zsl->header; - for (i = zsl->level - 1; i >= 0; i--) { - while (x->level[i].forward && x->level[i].forward->ele != oldele && /* make sure not to access the - ->obj pointer if it matches - oldele */ - (x->level[i].forward->score < score || - (x->level[i].forward->score == score && sdscmp(x->level[i].forward->ele, ele) < 0))) + * Defragment a single skiplist node, update skiplist pointers, and update the + * hashtable pointer to the node */ +static void activeDefragZsetNode(void *privdata, void *entry_ref) { + zskiplist *zsl = privdata; + zskiplistNode **node_ref = (zskiplistNode **)entry_ref; + + /* defragment node internals */ + sds newsds = activeDefragSds((*node_ref)->ele); + if (newsds) (*node_ref)->ele = newsds; + + const double score = (*node_ref)->score; + const sds ele = (*node_ref)->ele; + + /* find skiplist pointers that need to be updated if we end up moving the + * skiplist node. */ + zskiplistNode *update[ZSKIPLIST_MAXLEVEL]; + zskiplistNode *x = zsl->header; + for (int i = zsl->level - 1; i >= 0; i--) { + while (1) { + /* stop when we've reached the end of this level or the next node + * comes after our target in sorted order */ + zskiplistNode *next = x->level[i].forward; + if (!next) break; + if (next->score > score) break; + if (next->score == score && sdscmp(next->ele, ele) >= 0) { + break; + } x = x->level[i].forward; + } update[i] = x; } - - /* update the robj pointer inside the skip list record. */ x = x->level[0].forward; - serverAssert(x && score == x->score && x->ele == oldele); - if (newele) x->ele = newele; + /* should have arrived at intended node */ + serverAssert(x == *node_ref); /* try to defrag the skiplist record itself */ - newx = activeDefragAlloc(x); + zskiplistNode *newx = activeDefragAlloc(x); if (newx) { zslUpdateNode(zsl, x, newx, update); - return &newx->score; - } - return NULL; -} - -/* Defrag helper for sorted set. - * Defrag a single dict entry key name, and corresponding skiplist struct */ -static void activeDefragZsetEntry(zset *zs, dictEntry *de) { - sds newsds; - double *newscore; - sds sdsele = dictGetKey(de); - if ((newsds = activeDefragSds(sdsele))) dictSetKey(zs->dict, de, newsds); - newscore = zslDefrag(zs->zsl, *(double *)dictGetVal(de), sdsele, newsds); - if (newscore) { - dictSetVal(zs->dict, de, newscore); + *node_ref = newx; /* update hashtable pointer */ } } @@ -471,24 +463,15 @@ static long scanLaterList(robj *ob, unsigned long *cursor, monotime endtime) { return bookmark_failed ? 1 : 0; } -typedef struct { - zset *zs; -} scanLaterZsetData; - -static void scanLaterZsetCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry *)_de; - scanLaterZsetData *data = privdata; - activeDefragZsetEntry(data->zs, de); +static void scanLaterZsetCallback(void *privdata, void *element_ref) { + activeDefragZsetNode(privdata, element_ref); server.stat_active_defrag_scanned++; } static void scanLaterZset(robj *ob, unsigned long *cursor) { if (ob->type != OBJ_ZSET || ob->encoding != OBJ_ENCODING_SKIPLIST) return; zset *zs = (zset *)ob->ptr; - dict *d = zs->dict; - scanLaterZsetData data = {zs}; - dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; - *cursor = dictScanDefrag(d, *cursor, scanLaterZsetCallback, &defragfns, &data); + *cursor = hashtableScanDefrag(zs->ht, *cursor, scanLaterZsetCallback, zs->zsl, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } /* Used as hashtable scan callback when all we need is to defrag the hashtable @@ -531,19 +514,22 @@ static void defragQuicklist(robj *ob) { activeDefragQuickListNodes(ql); } -static void defragZsetSkiplist(robj *ob) { +static void defragZsetSkiplist(serverDb *db, valkey *ob) { + serverAssert(ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST); zset *zs = (zset *)ob->ptr; + zset *newzs; zskiplist *newzsl; - dict *newdict; - dictEntry *de; struct zskiplistNode *newheader; - serverAssert(ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST); if ((newzs = activeDefragAlloc(zs))) ob->ptr = zs = newzs; if ((newzsl = activeDefragAlloc(zs->zsl))) zs->zsl = newzsl; if ((newheader = activeDefragAlloc(zs->zsl->header))) zs->zsl->header = newheader; - if (dictSize(zs->dict) > server.active_defrag_max_scan_fields) - defragLater(ob); + + hashtable *newtable; + if ((newtable = hashtableDefragTables(zs->ht, activeDefragAlloc))) zs->ht = newtable; + + if (hashtableSize(zs->ht) > server.active_defrag_max_scan_fields) + defragLater(db, ob); else { dictIterator *di = dictGetIterator(zs->dict); while ((de = dictNext(di)) != NULL) { @@ -555,7 +541,7 @@ static void defragZsetSkiplist(robj *ob) { if ((newdict = dictDefragTables(zs->dict))) zs->dict = newdict; } -static void defragHash(robj *ob) { +static void defragHash(serverDb *db, valkey *ob) { dict *d, *newd; serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT); d = ob->ptr; diff --git a/src/geo.c b/src/geo.c index 75654f85a5..65f17c81db 100644 --- a/src/geo.c +++ b/src/geo.c @@ -774,7 +774,7 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) { if (maxelelen < elelen) maxelelen = elelen; totelelen += elelen; znode = zslInsert(zs->zsl, score, gp->member); - serverAssert(dictAdd(zs->dict, gp->member, &znode->score) == DICT_OK); + serverAssert(hashtableAdd(zs->ht, znode)); gp->member = NULL; } diff --git a/src/module.c b/src/module.c index 36283e2c73..076150a70c 100644 --- a/src/module.c +++ b/src/module.c @@ -11023,12 +11023,10 @@ static void moduleScanKeyDictCallback(void *privdata, const dictEntry *de) { robj *o = data->key->value; robj *field = createStringObject(key, sdslen(key)); robj *value = NULL; + if (o->type == OBJ_HASH) { sds val = dictGetVal(de); value = createStringObject(val, sdslen(val)); - } else if (o->type == OBJ_ZSET) { - double *val = (double *)dictGetVal(de); - value = createStringObjectFromLongDouble(*val, 0); } else { serverPanic("unexpected object type"); } @@ -11041,12 +11039,24 @@ static void moduleScanKeyDictCallback(void *privdata, const dictEntry *de) { static void moduleScanKeyHashtableCallback(void *privdata, void *entry) { ScanKeyCBData *data = privdata; robj *o = data->key->value; - serverAssert(o->type == OBJ_SET); - sds key = entry; - robj *field = createStringObject(key, sdslen(key)); + robj *value = NULL; + sds key = NULL; + + if (o->type == OBJ_SET) { + key = entry; + /* no value */ + } else if (o->type == OBJ_ZSET) { + zskiplistNode *node = (zskiplistNode *)entry; + key = node->ele; + value = createStringObjectFromLongDouble(node->score, 0); + } else { + serverPanic("unexpected object type"); + } + robj * field = createStringObject(key, sdslen(key)); - data->fn(data->key, field, NULL, data->user_data); + data->fn(data->key, field, value, data->user_data); decrRefCount(field); + if (value) decrRefCount(value); } /* Scan api that allows a module to scan the elements in a hash, set or sorted set key @@ -11110,7 +11120,7 @@ int VM_ScanKey(ValkeyModuleKey *key, ValkeyModuleScanCursor *cursor, ValkeyModul } else if (o->type == OBJ_HASH) { if (o->encoding == OBJ_ENCODING_HT) d = o->ptr; } else if (o->type == OBJ_ZSET) { - if (o->encoding == OBJ_ENCODING_SKIPLIST) d = ((zset *)o->ptr)->dict; + if (o->encoding == OBJ_ENCODING_SKIPLIST) ht = ((zset *)o->ptr)->ht; } else { errno = EINVAL; return 0; diff --git a/src/object.c b/src/object.c index 15363f31b8..5b04fdfddd 100644 --- a/src/object.c +++ b/src/object.c @@ -460,7 +460,7 @@ robj *createZsetObject(void) { zset *zs = zmalloc(sizeof(*zs)); robj *o; - zs->dict = dictCreate(&zsetDictType); + zs->ht = hashtableCreate(&zsetHashtableType); zs->zsl = zslCreate(); o = createObject(OBJ_ZSET, zs); o->encoding = OBJ_ENCODING_SKIPLIST; @@ -518,7 +518,7 @@ void freeZsetObject(robj *o) { switch (o->encoding) { case OBJ_ENCODING_SKIPLIST: zs = o->ptr; - dictRelease(zs->dict); + hashtableRelease(zs->ht); zslFree(zs->zsl); zfree(zs); break; @@ -664,10 +664,7 @@ void dismissZsetObject(robj *o, size_t size_hint) { } } - /* Dismiss hash table memory. */ - dict *d = zs->dict; - dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0]) * sizeof(dictEntry *)); - dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1]) * sizeof(dictEntry *)); + dismissHashtable(zs->ht); } else if (o->encoding == OBJ_ENCODING_LISTPACK) { dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr)); } else { @@ -1186,18 +1183,18 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { if (o->encoding == OBJ_ENCODING_LISTPACK) { asize = sizeof(*o) + zmalloc_size(o->ptr); } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { - d = ((zset *)o->ptr)->dict; + hashtable *ht = ((zset *)o->ptr)->ht; zskiplist *zsl = ((zset *)o->ptr)->zsl; zskiplistNode *znode = zsl->header->level[0].forward; - asize = sizeof(*o) + sizeof(zset) + sizeof(zskiplist) + sizeof(dict) + - (sizeof(struct dictEntry *) * dictBuckets(d)) + zmalloc_size(zsl->header); + asize = sizeof(*o) + sizeof(zset) + sizeof(zskiplist) + + hashtableMemUsage(ht) + zmalloc_size(zsl->header); while (znode != NULL && samples < sample_size) { elesize += sdsAllocSize(znode->ele); - elesize += dictEntryMemUsage(NULL) + zmalloc_size(znode); + elesize += zmalloc_size(znode); samples++; znode = znode->level[0].forward; } - if (samples) asize += (double)elesize / samples * dictSize(d); + if (samples) asize += (double)elesize / samples * hashtableSize(ht); } else { serverPanic("Unknown sorted set encoding"); } diff --git a/src/rdb.c b/src/rdb.c index 5fb77a2897..82047c8149 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2003,7 +2003,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { o = createZsetObject(); zs = o->ptr; - if (zsetlen > DICT_HT_INITIAL_SIZE && dictTryExpand(zs->dict, zsetlen) != DICT_OK) { + if (!hashtableTryExpand(zs->ht, zsetlen)) { rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)zsetlen); decrRefCount(o); return NULL; @@ -2046,7 +2046,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { totelelen += sdslen(sdsele); znode = zslInsert(zs->zsl, score, sdsele); - if (dictAdd(zs->dict, sdsele, &znode->score) != DICT_OK) { + if (!hashtableAdd(zs->ht, znode)) { rdbReportCorruptRDB("Duplicate zset fields detected"); decrRefCount(o); /* no need to free 'sdsele', will be released by zslFree together with 'o' */ diff --git a/src/server.c b/src/server.c index 9bd7bdd4a4..ce8f0a6782 100644 --- a/src/server.c +++ b/src/server.c @@ -555,14 +555,16 @@ hashtableType setHashtableType = { .keyCompare = hashtableSdsKeyCompare, .entryDestructor = dictSdsDestructor}; +const void *zsetHashtableGetKey(const void *element) { + zskiplistNode *node = (zskiplistNode *)element; + return node->ele; +} + /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ -dictType zsetDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* Note: SDS string shared & freed by skiplist */ - NULL, /* val destructor */ - NULL, /* allow to expand */ +hashtableType zsetHashtableType = { + .hashFunction = dictSdsHash, + .entryGetKey = zsetHashtableGetKey, + .keyCompare = hashtableSdsKeyCompare, }; uint64_t hashtableSdsHash(const void *key) { diff --git a/src/server.h b/src/server.h index 1aafcaeb57..9aaca3471f 100644 --- a/src/server.h +++ b/src/server.h @@ -1475,7 +1475,7 @@ typedef struct zskiplist { } zskiplist; typedef struct zset { - dict *dict; + hashtable *ht; zskiplist *zsl; } zset; @@ -2675,7 +2675,7 @@ extern dictType objectKeyPointerValueDictType; extern dictType objectKeyHeapPointerValueDictType; extern hashtableType setHashtableType; extern dictType BenchmarkDictType; -extern dictType zsetDictType; +extern hashtableType zsetHashtableType; extern hashtableType kvstoreKeysHashtableType; extern hashtableType kvstoreExpiresHashtableType; extern double R_Zero, R_PosInf, R_NegInf, R_Nan; @@ -3749,10 +3749,11 @@ unsigned long LFUDecrAndReturn(robj *o); int performEvictions(void); void startEvictionTimeProc(void); -/* Keys hashing / comparison functions for dict.c hash tables. */ +/* Keys hashing/comparison functions for dict.c and hashtable.c hash tables. */ uint64_t dictSdsHash(const void *key); uint64_t dictSdsCaseHash(const void *key); int dictSdsKeyCompare(const void *key1, const void *key2); +int hashtableSdsKeyCompare(const void *key1, const void *key2); int dictSdsKeyCaseCompare(const void *key1, const void *key2); void dictSdsDestructor(void *val); void dictListDestructor(void *val); diff --git a/src/sort.c b/src/sort.c index b1723daff0..5bda2d61c7 100644 --- a/src/sort.c +++ b/src/sort.c @@ -330,7 +330,7 @@ void sortCommandGeneric(client *c, int readonly) { switch (sortval->type) { case OBJ_LIST: vectorlen = listTypeLength(sortval); break; case OBJ_SET: vectorlen = setTypeSize(sortval); break; - case OBJ_ZSET: vectorlen = dictSize(((zset *)sortval->ptr)->dict); break; + case OBJ_ZSET: vectorlen = hashtableSize(((zset *)sortval->ptr)->ht); break; default: vectorlen = 0; serverPanic("Bad SORT type"); /* Avoid GCC warning */ } @@ -423,7 +423,7 @@ void sortCommandGeneric(client *c, int readonly) { /* Check if starting point is trivial, before doing log(N) lookup. */ if (desc) { - long zsetlen = dictSize(((zset *)sortval->ptr)->dict); + long zsetlen = hashtableSize(((zset *)sortval->ptr)->ht); ln = zsl->tail; if (start > 0) ln = zslGetElementByRank(zsl, zsetlen - start); @@ -445,19 +445,17 @@ void sortCommandGeneric(client *c, int readonly) { end -= start; start = 0; } else if (sortval->type == OBJ_ZSET) { - dict *set = ((zset *)sortval->ptr)->dict; - dictIterator *di; - dictEntry *setele; - sds sdsele; - di = dictGetIterator(set); - while ((setele = dictNext(di)) != NULL) { - sdsele = dictGetKey(setele); - vector[j].obj = createStringObject(sdsele, sdslen(sdsele)); + hashtable *ht = ((zset *)sortval->ptr)->ht; + hashtableIterator iter; + hashtableInitIterator(&iter, ht); + zskiplistNode *node; + while (hashtableNext(&iter, (void **)&node)) { + vector[j].obj = createStringObject(node->ele, sdslen(node->ele)); vector[j].u.score = 0; vector[j].u.cmpobj = NULL; j++; } - dictReleaseIterator(di); + hashtableResetIterator(&iter); } else { serverPanic("Unknown type"); } diff --git a/src/t_zset.c b/src/t_zset.c index e8c5a369b7..ea80a4961a 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -442,7 +442,7 @@ zskiplistNode *zslNthInRange(zskiplist *zsl, zrangespec *range, long n) { * range->maxex). When inclusive a score >= min && score <= max is deleted. * Note that this function takes the reference to the hash table view of the * sorted set, in order to remove the elements from the hash table too. */ -unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dict) { +unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, hashtable *ht) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long removed = 0; int i; @@ -460,7 +460,7 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dic while (x && zslValueLteMax(x->score, range)) { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl, x, update); - dictDelete(dict, x->ele); + hashtableDelete(ht, x->ele); zslFreeNode(x); /* Here is where x->ele is actually released. */ removed++; x = next; @@ -468,7 +468,7 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dic return removed; } -unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *dict) { +unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, hashtable *ht) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long removed = 0; int i; @@ -487,7 +487,7 @@ unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *di while (x && zslLexValueLteMax(x->ele, range)) { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl, x, update); - dictDelete(dict, x->ele); + hashtableDelete(ht, x->ele); zslFreeNode(x); /* Here is where x->ele is actually released. */ removed++; x = next; @@ -497,7 +497,7 @@ unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *di /* Delete all the elements with rank between start and end from the skiplist. * Start and end are inclusive. Note that start and end need to be 1-based */ -unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) { +unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, hashtable *ht) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long traversed = 0, removed = 0; int i; @@ -516,7 +516,7 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned while (x && traversed <= end) { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl, x, update); - dictDelete(dict, x->ele); + hashtableDelete(ht, x->ele); zslFreeNode(x); removed++; traversed++; @@ -1259,7 +1259,7 @@ robj *zsetTypeCreate(size_t size_hint, size_t val_len_hint) { robj *zobj = createZsetObject(); zset *zs = zobj->ptr; - dictExpand(zs->dict, size_hint); + hashtableExpand(zs->ht, size_hint); return zobj; } @@ -1297,11 +1297,11 @@ void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap) { if (encoding != OBJ_ENCODING_SKIPLIST) serverPanic("Unknown target encoding"); zs = zmalloc(sizeof(*zs)); - zs->dict = dictCreate(&zsetDictType); + zs->ht = hashtableCreate(&zsetHashtableType); zs->zsl = zslCreate(); /* Presize the dict to avoid rehashing */ - dictExpand(zs->dict, cap); + hashtableExpand(zs->ht, cap); eptr = lpSeek(zl, 0); if (eptr != NULL) { @@ -1318,7 +1318,7 @@ void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap) { ele = sdsnewlen((char *)vstr, vlen); node = zslInsert(zs->zsl, score, ele); - serverAssert(dictAdd(zs->dict, ele, &node->score) == DICT_OK); + serverAssert(hashtableAdd(zs->ht, node)); zzlNext(zl, &eptr, &sptr); } @@ -1333,7 +1333,7 @@ void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap) { /* Approach similar to zslFree(), since we want to free the skiplist at * the same time as creating the listpack. */ zs = zobj->ptr; - dictRelease(zs->dict); + hashtableRelease(zs->ht); node = zs->zsl->header->level[0].forward; zfree(zs->zsl->header); zfree(zs->zsl); @@ -1377,9 +1377,9 @@ int zsetScore(robj *zobj, sds member, double *score) { if (zzlFind(zobj->ptr, member, score) == NULL) return C_ERR; } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; - dictEntry *de = dictFind(zs->dict, member); - if (de == NULL) return C_ERR; - *score = *(double *)dictGetVal(de); + zskiplistNode *setElement; + if (!hashtableFind(zs->ht, member, (void **)&setElement)) return C_ERR; + *score = setElement->score; } else { serverPanic("Unknown sorted set encoding"); } @@ -1504,18 +1504,16 @@ int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, dou * converted the key to skiplist. */ if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; - zskiplistNode *znode; - dictEntry *de; - de = dictFind(zs->dict, ele); - if (de != NULL) { + zskiplistNode **existing_node = (zskiplistNode **)hashtableFindRef(zs->ht, ele); + if (existing_node != NULL) { /* NX? Return, same element already exists. */ if (nx) { *out_flags |= ZADD_OUT_NOP; return 1; } - curscore = *(double *)dictGetVal(de); + curscore = (*existing_node)->score; /* Prepare the score for the increment if needed. */ if (incr) { @@ -1536,18 +1534,16 @@ int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, dou /* Remove and re-insert when score changes. */ if (score != curscore) { - znode = zslUpdateScore(zs->zsl, curscore, ele, score); - /* Note that we did not removed the original element from - * the hash table representing the sorted set, so we just - * update the score. */ - dictSetVal(zs->dict, de, &znode->score); /* Update score ptr. */ + /* Note that this assignment updates the node pointer stored in + * the hashtable */ + *existing_node = zslUpdateScore(zs->zsl, curscore, ele, score); *out_flags |= ZADD_OUT_UPDATED; } return 1; } else if (!xx) { ele = sdsdup(ele); - znode = zslInsert(zs->zsl, score, ele); - serverAssert(dictAdd(zs->dict, ele, &znode->score) == DICT_OK); + zskiplistNode *new_node = zslInsert(zs->zsl, score, ele); + serverAssert(hashtableAdd(zs->ht, new_node)); *out_flags |= ZADD_OUT_ADDED; if (newscore) *newscore = score; return 1; @@ -1566,29 +1562,16 @@ int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, dou * element was not there). It does not resize the dict after deleting the * element. */ static int zsetRemoveFromSkiplist(zset *zs, sds ele) { - dictEntry *de; - double score; - - de = dictUnlink(zs->dict, ele); - if (de != NULL) { - /* Get the score in order to delete from the skiplist later. */ - score = *(double *)dictGetVal(de); - - /* Delete from the hash table and later from the skiplist. - * Note that the order is important: deleting from the skiplist - * actually releases the SDS string representing the element, - * which is shared between the skiplist and the hash table, so - * we need to delete from the skiplist as the final step. */ - dictFreeUnlinkedEntry(zs->dict, de); + zskiplistNode *node; + if (!hashtablePop(zs->ht, ele, (void **)&node)) return 0; - /* Delete from skiplist. */ - int retval = zslDelete(zs->zsl, score, ele, NULL); - serverAssert(retval); + /* hashtable only contains pointers to skiplist nodes. Nothing to free. */ - return 1; - } + /* Delete from skiplist. */ + int retval = zslDelete(zs->zsl, node->score, ele, NULL); + serverAssert(retval); - return 0; + return 1; } /* Delete the element 'ele' from the sorted set, returning 1 if the element @@ -1656,25 +1639,18 @@ long zsetRank(robj *zobj, sds ele, int reverse, double *output_score) { } } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; - zskiplist *zsl = zs->zsl; - dictEntry *de; - double score; - de = dictFind(zs->dict, ele); - if (de != NULL) { - zskiplistNode *n = zsetGetSLNodeByEntry(de); - score = n->score; - rank = zslGetRankByNode(zsl, n); - /* Existing elements always have a rank. */ - serverAssert(rank != 0); - if (output_score) *output_score = score; - if (reverse) - return llen - rank; - else - return rank - 1; - } else { - return -1; - } + zskiplistNode *node; + if (!hashtableFind(zs->ht, ele, (void **)&node)) return -1; + + rank = zslGetRankByNode(zs->zsl, node); + /* Existing elements always have a rank. */ + serverAssert(rank != 0); + if (output_score) *output_score = node->score; + if (reverse) + return llen - rank; + else + return rank - 1; } else { serverPanic("Unknown sorted set encoding"); } @@ -1704,7 +1680,7 @@ robj *zsetDup(robj *o) { zobj = createZsetObject(); zs = o->ptr; new_zs = zobj->ptr; - dictExpand(new_zs->dict, dictSize(zs->dict)); + hashtableExpand(new_zs->ht, hashtableSize(zs->ht)); zskiplist *zsl = zs->zsl; zskiplistNode *ln; sds ele; @@ -1721,7 +1697,7 @@ robj *zsetDup(robj *o) { ele = ln->ele; sds new_ele = sdsdup(ele); zskiplistNode *znode = zslInsert(new_zs->zsl, ln->score, new_ele); - dictAdd(new_zs->dict, new_ele, &znode->score); + hashtableAdd(new_zs->ht, znode); ln = ln->backward; } } else { @@ -1751,11 +1727,11 @@ void zsetReplyFromListpackEntry(client *c, listpackEntry *e) { void zsetTypeRandomElement(robj *zsetobj, unsigned long zsetsize, listpackEntry *key, double *score) { if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zsetobj->ptr; - dictEntry *de = dictGetFairRandomKey(zs->dict); - sds s = dictGetKey(de); - key->sval = (unsigned char *)s; - key->slen = sdslen(s); - if (score) *score = *(double *)dictGetVal(de); + zskiplistNode *node; + hashtableFairRandomEntry(zs->ht, (void **)&node); + key->sval = (unsigned char *)node->ele; + key->slen = sdslen(node->ele); + if (score) *score = node->score; } else if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) { listpackEntry val; lpRandomPair(zsetobj->ptr, zsetsize, key, &val); @@ -2012,19 +1988,17 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) { } } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; - dictPauseAutoResize(zs->dict); + hashtablePauseAutoShrink(zs->ht); switch (rangetype) { case ZRANGE_AUTO: - case ZRANGE_RANK: deleted = zslDeleteRangeByRank(zs->zsl, start + 1, end + 1, zs->dict); break; - case ZRANGE_SCORE: deleted = zslDeleteRangeByScore(zs->zsl, &range, zs->dict); break; - case ZRANGE_LEX: deleted = zslDeleteRangeByLex(zs->zsl, &lexrange, zs->dict); break; + case ZRANGE_RANK: deleted = zslDeleteRangeByRank(zs->zsl, start + 1, end + 1, zs->ht); break; + case ZRANGE_SCORE: deleted = zslDeleteRangeByScore(zs->zsl, &range, zs->ht); break; + case ZRANGE_LEX: deleted = zslDeleteRangeByLex(zs->zsl, &lexrange, zs->ht); break; } - dictResumeAutoResize(zs->dict); - if (dictSize(zs->dict) == 0) { + hashtableResumeAutoShrink(zs->ht); + if (hashtableSize(zs->ht) == 0) { dbDelete(c->db, key); keyremoved = 1; - } else { - dictShrinkIfNeeded(zs->dict); } } else { serverPanic("Unknown sorted set encoding"); @@ -2357,9 +2331,9 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) { } } else if (op->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = op->subject->ptr; - dictEntry *de; - if ((de = dictFind(zs->dict, val->ele)) != NULL) { - *score = *(double *)dictGetVal(de); + zskiplistNode *node; + if (hashtableFind(zs->ht, val->ele, (void **)&node)) { + *score = node->score; return 1; } else { return 0; @@ -2406,20 +2380,18 @@ inline static void zunionInterAggregate(double *target, double val, int aggregat } } -static size_t zsetDictGetMaxElementLength(dict *d, size_t *totallen) { - dictIterator *di; - dictEntry *de; +static size_t zsetHashtableGetMaxElementLength(hashtable *s, size_t *totallen) { size_t maxelelen = 0; - di = dictGetIterator(d); - - while ((de = dictNext(di)) != NULL) { - sds ele = dictGetKey(de); - if (sdslen(ele) > maxelelen) maxelelen = sdslen(ele); - if (totallen) (*totallen) += sdslen(ele); + hashtableIterator iter; + hashtableInitIterator(&iter, s); + zskiplistNode *node; + while (hashtableNext(&iter, (void **)&node)) { + size_t elelen = sdslen(node->ele); + if (elelen > maxelelen) maxelelen = elelen; + if (totallen) (*totallen) += elelen; } - - dictReleaseIterator(di); + hashtableResetIterator(&iter); return maxelelen; } @@ -2469,7 +2441,7 @@ static void zdiffAlgorithm1(zsetopsrc *src, long setnum, zset *dstzset, size_t * if (!exists) { tmp = zuiNewSdsFromValue(&zval); znode = zslInsert(dstzset->zsl, zval.score, tmp); - dictAdd(dstzset->dict, tmp, &znode->score); + hashtableAdd(dstzset->ht, znode); if (sdslen(tmp) > *maxelelen) *maxelelen = sdslen(tmp); (*totelelen) += sdslen(tmp); } @@ -2500,6 +2472,7 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t * zskiplistNode *znode; sds tmp; + hashtablePauseAutoShrink(dstzset->ht); for (j = 0; j < setnum; j++) { if (zuiLength(&src[j]) == 0) continue; @@ -2509,15 +2482,13 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t * if (j == 0) { tmp = zuiNewSdsFromValue(&zval); znode = zslInsert(dstzset->zsl, zval.score, tmp); - dictAdd(dstzset->dict, tmp, &znode->score); + hashtableAdd(dstzset->ht, znode); cardinality++; } else { - dictPauseAutoResize(dstzset->dict); tmp = zuiSdsFromValue(&zval); if (zsetRemoveFromSkiplist(dstzset, tmp)) { cardinality--; } - dictResumeAutoResize(dstzset->dict); } /* Exit if result set is empty as any additional removal @@ -2530,16 +2501,14 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t * } /* Resize dict if needed after removing multiple elements */ - dictShrinkIfNeeded(dstzset->dict); + hashtableResumeAutoShrink(dstzset->ht); /* Using this algorithm, we can't calculate the max element as we go, * we have to iterate through all elements to find the max one after. */ - *maxelelen = zsetDictGetMaxElementLength(dstzset->dict, totelelen); + *maxelelen = zsetHashtableGetMaxElementLength(dstzset->ht, totelelen); } static int zsetChooseDiffAlgorithm(zsetopsrc *src, long setnum) { - int j; - /* Select what DIFF algorithm to use. * * Algorithm 1 is O(N*M + K*log(K)) where N is the size of the @@ -2554,7 +2523,7 @@ static int zsetChooseDiffAlgorithm(zsetopsrc *src, long setnum) { long long algo_one_work = 0; long long algo_two_work = 0; - for (j = 0; j < setnum; j++) { + for (int j = 0; j < setnum; j++) { /* If any other set is equal to the first set, there is nothing to be * done, since we would remove all elements anyway. */ if (j > 0 && src[0].subject == src[j].subject) { @@ -2585,6 +2554,22 @@ static void zdiff(zsetopsrc *src, long setnum, zset *dstzset, size_t *maxelelen, } } +struct sdsDoublePair { + sds key; + double score; +}; + +const void *sdsDoubleMapGetKey(const void *entry) { + struct sdsDoublePair *keyval = (struct sdsDoublePair *)entry; + return keyval->key; +} + +hashtableType sds_double_map_hashtable = { + .hashFunction = dictSdsHash, + .entryGetKey = sdsDoubleMapGetKey, + .keyCompare = hashtableSdsKeyCompare, + .entryDestructor = zfree}; + /* The zunionInterDiffGenericCommand() function is called in order to implement the * following commands: ZUNION, ZINTER, ZDIFF, ZUNIONSTORE, ZINTERSTORE, ZDIFFSTORE, * ZINTERCARD. @@ -2607,7 +2592,6 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in size_t maxelelen = 0, totelelen = 0; robj *dstobj = NULL; zset *dstzset = NULL; - zskiplistNode *znode; int withscores = 0; unsigned long cardinality = 0; long limit = 0; /* Stop searching after reaching the limit. 0 means unlimited. */ @@ -2762,8 +2746,8 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in } } else if (j == setnum) { tmp = zuiNewSdsFromValue(&zval); - znode = zslInsert(dstzset->zsl, score, tmp); - dictAdd(dstzset->dict, tmp, &znode->score); + zskiplistNode *znode = zslInsert(dstzset->zsl, score, tmp); + hashtableAdd(dstzset->ht, znode); totelelen += sdslen(tmp); if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp); } @@ -2771,64 +2755,62 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in zuiClearIterator(&src[0]); } } else if (op == SET_OP_UNION) { - dictIterator *di; - dictEntry *de, *existing; - double score; - + /* Step 1: Create a hash table of elements -> aggregated-scores + * by iterating one sorted set after the other. */ + hashtable *temp_ht = hashtableCreate(&sds_double_map_hashtable); if (setnum) { /* Our union is at least as large as the largest set. * Resize the dictionary ASAP to avoid useless rehashing. */ - dictExpand(dstzset->dict, zuiLength(&src[setnum - 1])); + hashtableExpand(temp_ht, zuiLength(&src[setnum - 1])); } - - /* Step 1: Create a dictionary of elements -> aggregated-scores - * by iterating one sorted set after the other. */ for (i = 0; i < setnum; i++) { if (zuiLength(&src[i]) == 0) continue; zuiInitIterator(&src[i]); while (zuiNext(&src[i], &zval)) { /* Initialize value */ - score = src[i].weight * zval.score; + double score = src[i].weight * zval.score; if (isnan(score)) score = 0; /* Search for this element in the accumulating dictionary. */ - de = dictAddRaw(dstzset->dict, zuiSdsFromValue(&zval), &existing); + struct sdsDoublePair *existing; + sds sdsval = zuiSdsFromValue(&zval); + hashtablePosition position; /* If we don't have it, we need to create a new entry. */ - if (!existing) { - tmp = zuiNewSdsFromValue(&zval); + if (hashtableFindPositionForInsert(temp_ht, sdsval, &position, (void **)&existing)) { + struct sdsDoublePair *new_element = zmalloc(sizeof(struct sdsDoublePair)); + new_element->key = zuiNewSdsFromValue(&zval); + new_element->score = score; + hashtableInsertAtPosition(temp_ht, new_element, &position); /* Remember the longest single element encountered, * to understand if it's possible to convert to listpack * at the end. */ - totelelen += sdslen(tmp); - if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp); - /* Update the element with its initial score. */ - dictSetKey(dstzset->dict, de, tmp); - dictSetDoubleVal(de, score); + totelelen += sdslen(new_element->key); + if (sdslen(new_element->key) > maxelelen) { + maxelelen = sdslen(new_element->key); + } } else { /* Update the score with the score of the new instance - * of the element found in the current sorted set. - * - * Here we access directly the dictEntry double - * value inside the union as it is a big speedup - * compared to using the getDouble/setDouble API. */ - double *existing_score_ptr = dictGetDoubleValPtr(existing); - zunionInterAggregate(existing_score_ptr, score, aggregate); + * of the element found in the current sorted set. */ + zunionInterAggregate(&existing->score, score, aggregate); } } zuiClearIterator(&src[i]); } /* Step 2: convert the dictionary into the final sorted set. */ - di = dictGetIterator(dstzset->dict); - - while ((de = dictNext(di)) != NULL) { - sds ele = dictGetKey(de); - score = dictGetDoubleVal(de); - znode = zslInsert(dstzset->zsl, score, ele); - dictSetVal(dstzset->dict, de, &znode->score); + hashtableExpand(dstzset->ht, hashtableSize(temp_ht)); + hashtableIterator iter; + hashtableInitIterator(&iter, temp_ht); + struct sdsDoublePair *element; + + while (hashtableNext(&iter, (void **)&element)) { + zskiplistNode *znode = zslInsert(dstzset->zsl, element->score, element->key); + element->key = NULL; /* passed ownership to skiplist node */ + hashtableAdd(dstzset->ht, znode); } - dictReleaseIterator(di); + hashtableResetIterator(&iter); + hashtableRelease(temp_ht); } else if (op == SET_OP_DIFF) { zdiff(src, setnum, dstzset, &maxelelen, &totelelen); } else { @@ -4169,12 +4151,12 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) { addReplyArrayLen(c, count); if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zsetobj->ptr; + zskiplistNode *node; while (count--) { - dictEntry *de = dictGetFairRandomKey(zs->dict); - sds key = dictGetKey(de); + serverAssert(hashtableFairRandomEntry(zs->ht, (void **)&node)); if (withscores && c->resp > 2) addReplyArrayLen(c, 2); - addReplyBulkCBuffer(c, key, sdslen(key)); - if (withscores) addReplyDouble(c, *(double *)dictGetVal(de)); + addReplyBulkCBuffer(c, node->ele, sdslen(node->ele)); + if (withscores) addReplyDouble(c, node->score); if (c->flag.close_asap) break; } } else if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {