Skip to content

Commit

Permalink
SENTINEL FAILOVER COORDINATED actually does a leader election
Browse files Browse the repository at this point in the history
Doing a proper leader election allows to increase the time available
for the actual FAILOVER command, as other Sentinels will not attempt
to initiate another failover during the failover timeout.

Interestingly, Sentinel does not take the reported primary up/down
status into account when counting replies to leader election. Thus,
Sentinel will simply proceed with the failover once we reach the
quorum even if we don't are in O_DOWN state.

Signed-off-by: Simon Baatz <[email protected]>
  • Loading branch information
gmbnomis committed Nov 12, 2024
1 parent 9b2d9c6 commit bde2e68
Showing 1 changed file with 29 additions and 12 deletions.
41 changes: 29 additions & 12 deletions src/sentinel.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ static mstime_t sentinel_default_failover_timeout = 60 * 3 * 1000;
#define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION (1 << 0)
#define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION (1 << 1)

/* sentinelAskPrimaryStateToOtherSentinels flags */
#define SENTINEL_ASK_FORCED (1 << 0)

/* The link to a sentinelValkeyInstance. When we have the same set of Sentinels
* monitoring many primaries, we have different instances representing the
* same Sentinels, one per primary, and we need to share the hiredis connections
Expand Down Expand Up @@ -416,6 +419,7 @@ int sentinelSendPing(sentinelValkeyInstance *ri);
int sentinelForceHelloUpdateForPrimary(sentinelValkeyInstance *primary);
sentinelValkeyInstance *getSentinelValkeyInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
void sentinelSimFailureCrash(void);
void sentinelAskPrimaryStateToOtherSentinels(sentinelValkeyInstance *primary, int flags);

/* ========================= Dictionary types =============================== */

Expand Down Expand Up @@ -3884,7 +3888,7 @@ void sentinelCommand(client *c) {
return;
if (c->argc == 4) {
if (!strcasecmp(c->argv[3]->ptr, "coordinated")) {
coordinated = SRI_COORD_FAILOVER;
coordinated = 1;
} else {
addReplyError(c, "Unknown failover option specified");
return;
Expand All @@ -3899,8 +3903,19 @@ void sentinelCommand(client *c) {
return;
}
serverLog(LL_NOTICE, "Executing user requested FAILOVER of '%s'", ri->name);
ri->s_down_since_time = mstime();
ri->flags |= SRI_S_DOWN;
sentinelStartFailover(ri);
ri->flags |= SRI_FORCE_FAILOVER | coordinated;
if (coordinated) {
ri->flags |= SRI_COORD_FAILOVER;
/* Initiate a leader election, The SENTINEL_FAILOVER_STATE_WAIT_START
state will wait until we are elected. */
sentinelAskPrimaryStateToOtherSentinels(ri, SENTINEL_ASK_FORCED);
} else {
/* SRI_FORCE_FAILOVER will cause the SENTINEL_FAILOVER_STATE_WAIT_START
state to regard us as leader (without election). */
ri->flags |= SRI_FORCE_FAILOVER;
}
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "pending-scripts")) {
/* SENTINEL PENDING-SCRIPTS */
Expand Down Expand Up @@ -4513,7 +4528,6 @@ void sentinelReceiveIsPrimaryDownReply(redisAsyncContext *c, void *reply, void *
* SENTINEL IS-PRIMARY-DOWN-BY-ADDR requests to other sentinels
* in order to get the replies that allow to reach the quorum
* needed to mark the primary in ODOWN state and trigger a failover. */
#define SENTINEL_ASK_FORCED (1 << 0)
void sentinelAskPrimaryStateToOtherSentinels(sentinelValkeyInstance *primary, int flags) {
dictIterator *di;
dictEntry *de;
Expand Down Expand Up @@ -5067,25 +5081,28 @@ void sentinelFailoverSelectReplica(sentinelValkeyInstance *ri) {

void sentinelFailoverSendFailover(sentinelValkeyInstance *ri) {
int retval;
mstime_t time_passed = mstime() - ri->failover_state_change_time;

/* If we don't have enough time left (1 second) for the FAILOVER command
* timeout, then abort the failover. */
if (ri->failover_timeout - time_passed < 1000) {
sentinelEvent(LL_WARNING, "-failover-abort-master-timeout", ri, "%@");
sentinelAbortFailover(ri);
return;
}
/* We can't send the command to the master if it is now
* disconnected. Retry again and again with this state until the timeout
* is reached, then abort the failover. */
mstime_t time_passed = mstime() - ri->failover_state_change_time;
* disconnected. Retry again and again with this state (until the timeout
* is reached and we abort the failover.) */
if (ri->link->disconnected) {
if (time_passed > ri->failover_timeout) {
sentinelEvent(LL_WARNING, "-failover-abort-master-timeout", ri, "%@");
sentinelAbortFailover(ri);
}
return;
}

/* Send FAILOVER command to switch the role of the master and the
/* Send FAILOVER command to switch the role of the primary and the
* promoted replica. We actually register a generic callback for this
* command as we don't really care about the reply. We check if it worked
* indirectly observing if INFO returns a different role (master instead of
* slave). */
retval = sentinelFailoverTo(ri, ri->promoted_replica->addr, ri->down_after_period);
retval = sentinelFailoverTo(ri, ri->promoted_replica->addr, ri->failover_timeout - time_passed);
if (retval != C_OK) return;
sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion",
ri->promoted_replica, "%@");
Expand Down

0 comments on commit bde2e68

Please sign in to comment.