Skip to content

Commit

Permalink
Merge pull request hashicorp#4528 from hashicorp/autopilot-fixes
Browse files Browse the repository at this point in the history
Fix inconsistency caused by the autopilot StatsFetcher
  • Loading branch information
kyhavlov authored Aug 15, 2018
2 parents 771e0ba + 4b35d87 commit fa8990c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
11 changes: 9 additions & 2 deletions agent/consul/autopilot/autopilot.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,19 @@ func (a *Autopilot) pruneDeadServers() error {
}
if server != nil {
// todo(kyhavlov): change this to index by UUID
if _, ok := staleRaftServers[server.Addr.String()]; ok {
s, found := staleRaftServers[server.Addr.String()]
if found {
delete(staleRaftServers, server.Addr.String())
}

if member.Status == serf.StatusFailed {
failed = append(failed, member.Name)
// If the node is a nonvoter, we can remove it immediately.
if found && s.Suffrage == raft.Nonvoter {
a.logger.Printf("[INFO] autopilot: Attempting removal of failed server node %q", member.Name)
go serfLAN.RemoveFailedNode(member.Name)
} else {
failed = append(failed, member.Name)
}
}
}
}
Expand Down
24 changes: 24 additions & 0 deletions agent/consul/autopilot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,30 @@ func testCleanupDeadServer(t *testing.T, raftVersion int) {
}
}

func TestAutopilot_CleanupDeadNonvoter(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()

dir2, s2 := testServerDCBootstrap(t, "dc1", false)
defer os.RemoveAll(dir2)
defer s2.Shutdown()

testrpc.WaitForLeader(t, s1.RPC, "dc1")

// Have s2 join and then shut it down immediately before it gets a chance to
// be promoted to a voter.
joinLAN(t, s2, s1)
retry.Run(t, func(r *retry.R) {
r.Check(wantRaft([]*Server{s1, s2}))
})
s2.Shutdown()

retry.Run(t, func(r *retry.R) {
r.Check(wantRaft([]*Server{s1}))
})
}

func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) {
t.Parallel()
dir1, s1 := testServerWithConfig(t, func(c *Config) {
Expand Down
8 changes: 8 additions & 0 deletions agent/consul/stats_fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ func (f *StatsFetcher) Fetch(ctx context.Context, members []serf.Member) map[str
// canceled.
replies := make(map[string]*autopilot.ServerStats)
for _, workItem := range work {
// Drain the reply first if there is one.
select {
case reply := <-workItem.replyCh:
replies[workItem.server.ID] = reply
continue
default:
}

select {
case reply := <-workItem.replyCh:
replies[workItem.server.ID] = reply
Expand Down

0 comments on commit fa8990c

Please sign in to comment.