Skip to content

Commit

Permalink
Major revision.
Browse files Browse the repository at this point in the history
- Updated the aggregation to bypass nested commands, and instead
  account for wrapper commands (EXEC, EVAL, FCALL).
- Added slot invalidation for cross-slot lua scripting.
- Added resetClusterStats().
- Added documentation to valkey.conf

Signed-off-by: Kyle Kim <[email protected]>
  • Loading branch information
kyle-yh-kim committed Jul 20, 2024
1 parent ba492ba commit 99ed2cb
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 54 deletions.
10 changes: 10 additions & 0 deletions src/cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include "server.h"
#include "cluster.h"
#include "cluster_slot_stats.h"

#include <ctype.h>

Expand Down Expand Up @@ -1460,3 +1461,12 @@ void readwriteCommand(client *c) {
c->flags &= ~CLIENT_READONLY;
addReply(c, shared.ok);
}

/* Resets transient cluster stats that we expose via INFO or other means that we want
* to reset via CONFIG RESETSTAT. The function is also used in order to
* initialize these fields in clusterInit() at server startup. */
void resetClusterStats(void) {
if (!server.cluster_enabled) return;

clusterSlotStatResetAll();
}
1 change: 1 addition & 0 deletions src/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,5 @@ ConnectionType *connTypeOfCluster(void);
int isNodeAvailable(clusterNode *node);
long long getNodeReplicationOffset(clusterNode *node);
sds aggregateClientOutputBuffer(client *c);
void resetClusterStats(void);
#endif /* __CLUSTER_H */
2 changes: 1 addition & 1 deletion src/cluster_legacy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ void clusterInit(void) {
clusterUpdateMyselfIp();
clusterUpdateMyselfHostname();
clusterUpdateMyselfHumanNodename();
clusterSlotStatResetAll();
resetClusterStats();
}

void clusterInitLast(void) {
Expand Down
31 changes: 19 additions & 12 deletions src/cluster_slot_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

#define UNASSIGNED_SLOT 0

typedef enum {
INVALID,
KEY_COUNT,
CPU_USEC,
} slotStatTypes;
typedef enum { KEY_COUNT, CPU_USEC, SLOT_STAT_COUNT, INVALID } slotStatTypes;

/* -----------------------------------------------------------------------------
* CLUSTER SLOT-STATS command
Expand Down Expand Up @@ -82,7 +78,8 @@ static void addReplySlotStat(client *c, int slot) {
addReplyArrayLen(c, 2); /* Array of size 2, where 0th index represents (int) slot,
* and 1st index represents (map) usage statistics. */
addReplyLongLong(c, slot);
addReplyMapLen(c, (server.cluster_slot_stats_enabled) ? 2 : 1); /* Nested map representing slot usage statistics. */
addReplyMapLen(c, (server.cluster_slot_stats_enabled) ? SLOT_STAT_COUNT
: 1); /* Nested map representing slot usage statistics. */
addReplyBulkCString(c, "key-count");
addReplyLongLong(c, countKeysInSlot(slot));

Expand Down Expand Up @@ -129,20 +126,22 @@ void clusterSlotStatReset(int slot) {
}

void clusterSlotStatResetAll(void) {
if (server.cluster == NULL) return;

memset(server.cluster->slot_stats, 0, sizeof(server.cluster->slot_stats));
}

/* For cpu-usec accumulation, EXEC, EVAl and FCALL commands are skipped.
/* For cpu-usec accumulation, nested commands within EXEC, EVAL, FCALL are skipped.
* This is due to their unique callstack, where the c->duration for
* EXEC, EVAL and FCALL already includes all of its nested commands.
* Meaning, the accumulation of cpu-usec for these wrapper commands
* Meaning, the accumulation of cpu-usec for these nested commands
* would equate to repeating the same calculation twice.
*/
static int canAddCpuDuration(client *c) {
return server.cluster_slot_stats_enabled && server.cluster_enabled && c->slot != -1 &&
c->cmd->proc != execCommand && c->cmd->proc != evalCommand && c->cmd->proc != fcallCommand;
return server.cluster_slot_stats_enabled && // Config should be enabled.
server.cluster_enabled && // Cluster mode should be enbaled.
c->slot != -1 && // Command should be slot specific.
(!server.execution_nesting || // Either;
(server.execution_nesting && // 1) Command should not be nested, or
c->realcmd->flags & CMD_BLOCKING)); // 2) If command is nested, it must be due to unblocking.
}

void clusterSlotStatsAddCpuDuration(client *c, ustime_t duration) {
Expand All @@ -152,6 +151,14 @@ void clusterSlotStatsAddCpuDuration(client *c, ustime_t duration) {
server.cluster->slot_stats[c->slot].cpu_usec += duration;
}

/* For cross-slot scripting, its caller client's slot must be invalidated,
* such that its slot-stats aggregation is bypassed. */
void clusterSlotStatsInvalidateSlotIfApplicable(scriptRunCtx *ctx) {
if (!(ctx->flags & SCRIPT_ALLOW_CROSS_SLOT)) return;

ctx->original_client->slot = -1;
}

void clusterSlotStatsCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c, "This instance has cluster support disabled");
Expand Down
2 changes: 2 additions & 0 deletions src/cluster_slot_stats.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "server.h"
#include "cluster.h"
#include "script.h"
#include "cluster_legacy.h"

void clusterSlotStatReset(int slot);
void clusterSlotStatResetAll(void);
void clusterSlotStatsAddCpuDuration(client *c, ustime_t duration);
void clusterSlotStatsInvalidateSlotIfApplicable(scriptRunCtx *ctx);
1 change: 1 addition & 0 deletions src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3381,6 +3381,7 @@ void configHelpCommand(client *c) {

void configResetStatCommand(client *c) {
resetServerStats();
resetClusterStats();
resetCommandTableStats(server.commands);
resetErrorTableStats();
addReply(c, shared.ok);
Expand Down
2 changes: 2 additions & 0 deletions src/script.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "server.h"
#include "script.h"
#include "cluster.h"
#include "cluster_slot_stats.h"

scriptFlag scripts_flags_def[] = {
{.flag = SCRIPT_FLAG_NO_WRITES, .str = "no-writes"},
Expand Down Expand Up @@ -583,6 +584,7 @@ void scriptCall(scriptRunCtx *run_ctx, sds *err) {
}
call(c, call_flags);
serverAssert((c->flags & CLIENT_BLOCKED) == 0);
clusterSlotStatsInvalidateSlotIfApplicable(run_ctx);
return;

error:
Expand Down
1 change: 0 additions & 1 deletion src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -2500,7 +2500,6 @@ void resetServerStats(void) {
memset(server.duration_stats, 0, sizeof(durationStats) * EL_DURATION_TYPE_NUM);
server.el_cmd_cnt_max = 0;
lazyfreeResetStats();
clusterSlotStatResetAll();
}

/* Make the thread killable at any time, so that kill threads functions
Expand Down
54 changes: 14 additions & 40 deletions tests/unit/cluster/slot-stats.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,14 @@ proc wait_for_replica_key_exists {key key_count} {
# Test cases for CLUSTER SLOT-STATS cpu-usec metric correctness.
# -----------------------------------------------------------------------------

start_cluster 1 0 {tags {external:skip cluster}} {
start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {

# Define shared variables.
set key "FOO"
set key_slot [R 0 cluster keyslot $key]
set key_secondary "FOO2"
set key_secondary_slot [R 0 cluster keyslot $key_secondary]
set metrics_to_assert [list cpu-usec]
R 0 CONFIG SET cluster-slot-stats-enabled yes

test "CLUSTER SLOT-STATS cpu-usec reset upon CONFIG RESETSTAT." {
R 0 SET $key VALUE
Expand Down Expand Up @@ -264,11 +263,10 @@ start_cluster 1 0 {tags {external:skip cluster}} {
# Execute transaction, and assert that all nested command times have been accumulated.
$r1 EXEC
set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
set get_usec [get_cmdstat_usec set r]
set set_usec [get_cmdstat_usec get r]
set exec_usec [get_cmdstat_usec exec r]
set expected_slot_stats [
dict create $key_slot [
dict create cpu-usec [expr $get_usec + $set_usec]
dict create cpu-usec $exec_usec
]
]
assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
Expand All @@ -277,15 +275,15 @@ start_cluster 1 0 {tags {external:skip cluster}} {
R 0 FLUSHALL

test "CLUSTER SLOT-STATS cpu-usec for lua-scripts, without cross-slot keys." {
r eval [format "redis.call('set', '%s', 'bar'); redis.call('get', '%s')" $key $key] 0
r eval [format "#!lua
redis.call('set', '%s', 'bar'); redis.call('get', '%s')" $key $key] 0

set get_usec [get_cmdstat_usec set r]
set set_usec [get_cmdstat_usec get r]
set eval_usec [get_cmdstat_usec eval r]
set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]

set expected_slot_stats [
dict create $key_slot [
dict create cpu-usec [expr $get_usec + $set_usec]
dict create cpu-usec $eval_usec
]
]
assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
Expand All @@ -298,20 +296,9 @@ start_cluster 1 0 {tags {external:skip cluster}} {
redis.call('set', '%s', 'bar'); redis.call('get', '%s');
" $key $key_secondary] 0

set set_usec [get_cmdstat_usec set r]
set get_usec [get_cmdstat_usec get r]
# For cross-slot, we do not accumulate at all.
set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]

set expected_slot_stats [
dict create \
$key_slot [
dict create cpu-usec $set_usec
] \
$key_secondary_slot [
dict create cpu-usec $get_usec
]
]
assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
assert_empty_slot_stats $slot_stats $metrics_to_assert
}
R 0 CONFIG RESETSTAT
R 0 FLUSHALL
Expand All @@ -325,13 +312,12 @@ start_cluster 1 0 {tags {external:skip cluster}} {
r function load replace $function_str
r fcall f1 0

set set_usec [get_cmdstat_usec set r]
set get_usec [get_cmdstat_usec get r]
set fcall_usec [get_cmdstat_usec fcall r]
set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]

set expected_slot_stats [
dict create $key_slot [
dict create cpu-usec [expr $get_usec + $set_usec]
dict create cpu-usec $fcall_usec
]
]
assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
Expand All @@ -349,20 +335,9 @@ start_cluster 1 0 {tags {external:skip cluster}} {
r function load replace $function_str
r fcall f1 0

set set_usec [get_cmdstat_usec set r]
set get_usec [get_cmdstat_usec get r]
# For cross-slot, we do not accumulate at all.
set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]

set expected_slot_stats [
dict create \
$key_slot [
dict create cpu-usec $set_usec
] \
$key_secondary_slot [
dict create cpu-usec $get_usec
]
]
assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
assert_empty_slot_stats $slot_stats $metrics_to_assert
}
R 0 CONFIG RESETSTAT
R 0 FLUSHALL
Expand Down Expand Up @@ -529,12 +504,11 @@ start_cluster 1 0 {tags {external:skip cluster}} {
# Test cases for CLUSTER SLOT-STATS replication.
# -----------------------------------------------------------------------------

start_cluster 1 1 {tags {external:skip cluster}} {
start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {

# Define shared variables.
set key "FOO"
set key_slot [R 0 CLUSTER KEYSLOT $key]
R 0 CONFIG SET cluster-slot-stats-enabled yes

# For replication, only those metrics that are deterministic upon replication are asserted.
# * key-count is asserted, as both the primary and its replica must hold the same number of keys.
Expand Down
10 changes: 10 additions & 0 deletions valkey.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1771,6 +1771,16 @@ aof-timestamp-enabled no
#
# cluster-preferred-endpoint-type ip

# Clusters can be configured to track per-slot resource statistics,
# which are further accessible by the CLUSTER SLOT-STATS command.
#
# By default, the 'cluster-slot-stats-enabled' is disabled, and only 'key-count' is captured.
# By enabling the 'cluster-slot-stats-enabled' config, the cluster will begin to capture advanced statistics.
# These statistics can be leveraged to assess general slot usage trends, identify hot / cold slots,
# migrate slots for a balanced cluster workload, and / or re-write application logic to better utilize slots.
#
# cluster-slot-stats-enabled no

# In order to setup your cluster make sure to read the documentation
# available at https://valkey.io web site.

Expand Down

0 comments on commit 99ed2cb

Please sign in to comment.