forked from valkey-io/valkey
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make cluster meet reliable under link failures (valkey-io#461)
When there is a link failure while an ongoing MEET request is sent the sending node stops sending anymore MEET and starts sending PINGs. Since every node responds to PINGs from unknown nodes with a PONG, the receiving node never adds the sending node. But the sending node adds the receiving node when it sees a PONG. This can lead to asymmetry in cluster membership. This changes makes the sender keep sending MEET until it sees a PONG, avoiding the asymmetry. --------- Signed-off-by: Sankar <[email protected]> Signed-off-by: Ping Xie <[email protected]>
- Loading branch information
Showing
5 changed files
with
189 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# make sure the test infra won't use SELECT | ||
set old_singledb $::singledb | ||
set ::singledb 1 | ||
|
||
tags {tls:skip external:skip cluster} { | ||
set base_conf [list cluster-enabled yes] | ||
start_multiple_servers 2 [list overrides $base_conf] { | ||
test "Cluster nodes are reachable" { | ||
for {set id 0} {$id < [llength $::servers]} {incr id} { | ||
# Every node should be reachable. | ||
wait_for_condition 1000 50 { | ||
([catch {R $id ping} ping_reply] == 0) && | ||
($ping_reply eq {PONG}) | ||
} else { | ||
catch {R $id ping} err | ||
fail "Node #$id keeps replying '$err' to PING." | ||
} | ||
} | ||
} | ||
|
||
test "Before slots allocation, all nodes report cluster failure" { | ||
wait_for_cluster_state fail | ||
} | ||
|
||
set CLUSTER_PACKET_TYPE_PONG 1 | ||
set CLUSTER_PACKET_TYPE_NONE -1 | ||
|
||
test "Cluster nodes haven't met each other" { | ||
assert {[llength [get_cluster_nodes 1]] == 1} | ||
assert {[llength [get_cluster_nodes 0]] == 1} | ||
} | ||
|
||
test "Allocate slots" { | ||
cluster_allocate_slots 2 0;# primaries replicas | ||
} | ||
|
||
test "Multiple MEETs from Node 1 to Node 0 should work" { | ||
# Make 1 drop the PONG responses to MEET | ||
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_PONG | ||
# It is important to close the connection on drop, otherwise a subsequent MEET won't be sent | ||
R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 | ||
|
||
R 1 CLUSTER MEET 127.0.0.1 [srv 0 port] | ||
|
||
# Wait for at least a few MEETs to be sent so that we are sure that 1 is dropping the response to MEET. | ||
wait_for_condition 1000 50 { | ||
[CI 0 cluster_stats_messages_meet_received] > 1 && | ||
[CI 1 cluster_state] eq {fail} && [CI 0 cluster_state] eq {ok} | ||
} else { | ||
fail "Cluster node 1 never sent multiple MEETs to 0" | ||
} | ||
|
||
# 0 will be connected to 1, but 1 won't see that 0 is connected | ||
assert {[llength [get_cluster_nodes 1 connected]] == 1} | ||
assert {[llength [get_cluster_nodes 0 connected]] == 2} | ||
|
||
# Drop incoming and outgoing links from/to 1 | ||
R 0 DEBUG CLUSTERLINK KILL ALL [R 1 CLUSTER MYID] | ||
|
||
# Wait for 0 to know about 1 again after 1 sends a MEET | ||
wait_for_condition 1000 50 { | ||
[llength [get_cluster_nodes 0 connected]] == 2 | ||
} else { | ||
fail "Cluster node 1 never sent multiple MEETs to 0" | ||
} | ||
|
||
# Undo packet drop | ||
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE | ||
R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0 | ||
|
||
# Both a and b will turn to cluster state ok | ||
wait_for_condition 1000 50 { | ||
[CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} && | ||
[CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received] | ||
} else { | ||
fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]" | ||
} | ||
} | ||
} ;# stop servers | ||
} ;# tags | ||
|
||
set ::singledb $old_singledb | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# make sure the test infra won't use SELECT | ||
set old_singledb $::singledb | ||
set ::singledb 1 | ||
|
||
tags {tls:skip external:skip cluster} { | ||
set base_conf [list cluster-enabled yes] | ||
start_multiple_servers 2 [list overrides $base_conf] { | ||
test "Cluster nodes are reachable" { | ||
for {set id 0} {$id < [llength $::servers]} {incr id} { | ||
# Every node should be reachable. | ||
wait_for_condition 1000 50 { | ||
([catch {R $id ping} ping_reply] == 0) && | ||
($ping_reply eq {PONG}) | ||
} else { | ||
catch {R $id ping} err | ||
fail "Node #$id keeps replying '$err' to PING." | ||
} | ||
} | ||
} | ||
|
||
test "Before slots allocation, all nodes report cluster failure" { | ||
wait_for_cluster_state fail | ||
} | ||
|
||
set CLUSTER_PACKET_TYPE_MEET 2 | ||
set CLUSTER_PACKET_TYPE_NONE -1 | ||
|
||
test "Cluster nodes haven't met each other" { | ||
assert {[llength [get_cluster_nodes 1]] == 1} | ||
assert {[llength [get_cluster_nodes 0]] == 1} | ||
} | ||
|
||
test "Allocate slots" { | ||
cluster_allocate_slots 2 0 | ||
} | ||
|
||
test "MEET is reliable when target drops the initial MEETs" { | ||
# Make 0 drop the initial MEET messages due to link failure | ||
R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET | ||
R 0 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1 | ||
|
||
R 1 CLUSTER MEET 127.0.0.1 [srv 0 port] | ||
|
||
# Wait for at least a few MEETs to be sent so that we are sure that 0 is | ||
# dropping them. | ||
wait_for_condition 1000 50 { | ||
[CI 0 cluster_stats_messages_meet_received] >= 3 | ||
} else { | ||
fail "Cluster node 1 never sent multiple MEETs to 0" | ||
} | ||
|
||
# Make sure the nodes still don't know about each other | ||
assert {[llength [get_cluster_nodes 1 connected]] == 1} | ||
assert {[llength [get_cluster_nodes 0 connected]] == 1} | ||
|
||
R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE | ||
|
||
# If the MEET is reliable, both a and b will turn to cluster state ok | ||
wait_for_condition 1000 50 { | ||
[CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} && | ||
[CI 0 cluster_stats_messages_meet_received] >= 4 && | ||
[CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received] | ||
} else { | ||
fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]" | ||
} | ||
} | ||
} ;# stop servers | ||
} ;# tags | ||
|
||
set ::singledb $old_singledb | ||
|