Skip to content

Commit

Permalink
Merge pull request #659 from gocardless/hmac/fix-hanging-sentinels
Browse files Browse the repository at this point in the history
Fix hanging sentinels
  • Loading branch information
sgotti authored Jul 20, 2019
2 parents ed435bd + beadd2a commit 54df13a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
6 changes: 6 additions & 0 deletions cmd/sentinel/cmd/sentinel.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ func (s *Sentinel) electionLoop(ctx context.Context) {
case err := <-errCh:
if err != nil {
log.Errorw("election loop error", zap.Error(err))

// It's important to Stop() any on-going elections, as most stores will block
// until all previous elections have completed. If we continue without stopping,
// we run the risk of preventing any subsequent elections from successfully
// electing a leader.
s.election.Stop()
}
goto end
case <-ctx.Done():
Expand Down
5 changes: 0 additions & 5 deletions internal/store/etcdv3.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,12 @@ func (e *etcdv3Election) campaign() {
e.electedCh <- false
s, err := concurrency.NewSession(e.c, concurrency.WithTTL(int(e.ttl.Seconds())), concurrency.WithContext(e.ctx))
if err != nil {
e.running = false
e.errCh <- err
return
}

etcdElection := concurrency.NewElection(s, e.path)
if err = etcdElection.Campaign(e.ctx, e.candidateUID); err != nil {
e.running = false
e.errCh <- err
return
}
Expand All @@ -215,11 +213,8 @@ func (e *etcdv3Election) campaign() {

select {
case <-e.ctx.Done():
e.running = false
etcdElection.Resign(context.TODO())
return
case <-s.Done():
etcdElection.Resign(context.TODO())
e.electedCh <- false
}
}
Expand Down
5 changes: 5 additions & 0 deletions internal/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ type Election interface {
// TODO(sgotti) this mimics the current docker/leadership API and the etcdv3
// implementations adapt to it. In future it could be replaced with a better
// api like the current one implemented by etcdclientv3/concurrency.
//
// WARNING: If the election error channel receives any error, it is vital that
// the consuming code calls election.Stop(). Failure to do so can cause
// subsequent elections to hang indefinitely across all participants of an
// election.
RunForElection() (<-chan bool, <-chan error)
Leader() (string, error)
Stop()
Expand Down

0 comments on commit 54df13a

Please sign in to comment.