diff --git a/snow/engine/avalanche/bootstrap/bootstrapper.go b/snow/engine/avalanche/bootstrap/bootstrapper.go index 2de048c49009..86b07a12df83 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper.go @@ -283,6 +283,10 @@ func (*bootstrapper) Gossip(context.Context) error { func (b *bootstrapper) Shutdown(ctx context.Context) error { b.Ctx.Log.Info("shutting down bootstrapper") + + b.Ctx.Lock.Lock() + defer b.Ctx.Lock.Unlock() + return b.VM.Shutdown(ctx) } diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 8f8336022b25..b19ff3c19dca 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -295,6 +295,10 @@ func (*bootstrapper) Gossip(context.Context) error { func (b *bootstrapper) Shutdown(ctx context.Context) error { b.Ctx.Log.Info("shutting down bootstrapper") + + b.Ctx.Lock.Lock() + defer b.Ctx.Lock.Unlock() + return b.VM.Shutdown(ctx) } diff --git a/snow/engine/snowman/syncer/state_syncer.go b/snow/engine/snowman/syncer/state_syncer.go index 264b5a2b8d96..79933cd995b4 100644 --- a/snow/engine/snowman/syncer/state_syncer.go +++ b/snow/engine/snowman/syncer/state_syncer.go @@ -590,6 +590,10 @@ func (*stateSyncer) Gossip(context.Context) error { func (ss *stateSyncer) Shutdown(ctx context.Context) error { ss.Config.Ctx.Log.Info("shutting down state syncer") + + ss.Ctx.Lock.Lock() + defer ss.Ctx.Lock.Unlock() + return ss.VM.Shutdown(ctx) } diff --git a/snow/engine/snowman/transitive.go b/snow/engine/snowman/transitive.go index d61e36128006..69c890743269 100644 --- a/snow/engine/snowman/transitive.go +++ b/snow/engine/snowman/transitive.go @@ -367,6 +367,10 @@ func (*Transitive) Halt(context.Context) {} func (t *Transitive) Shutdown(ctx context.Context) error { t.Ctx.Log.Info("shutting down consensus engine") + + t.Ctx.Lock.Lock() + defer t.Ctx.Lock.Unlock() + return t.VM.Shutdown(ctx) } diff --git a/snow/networking/handler/handler.go b/snow/networking/handler/handler.go index e908d76a57b5..e23876eb1abf 100644 --- a/snow/networking/handler/handler.go +++ b/snow/networking/handler/handler.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "sync" + "sync/atomic" "time" "github.com/prometheus/client_golang/prometheus" @@ -116,7 +117,7 @@ type handler struct { startClosingTime time.Time totalClosingTime time.Duration closingChan chan struct{} - numDispatchersClosed int + numDispatchersClosed atomic.Uint32 // Closed when this handler and [engine] are done shutting down closed chan struct{} @@ -220,22 +221,24 @@ func (h *handler) selectStartingGear(ctx context.Context) (common.Engine, error) func (h *handler) Start(ctx context.Context, recoverPanic bool) { h.ctx.Lock.Lock() - defer h.ctx.Lock.Unlock() - gear, err := h.selectStartingGear(ctx) if err != nil { + h.ctx.Lock.Unlock() + h.ctx.Log.Error("chain failed to select starting gear", zap.Error(err), ) - h.shutdown(ctx) + h.shutdown(ctx, h.clock.Time()) return } - if err := gear.Start(ctx, 0); err != nil { + err = gear.Start(ctx, 0) + h.ctx.Lock.Unlock() + if err != nil { h.ctx.Log.Error("chain failed to start", zap.Error(err), ) - h.shutdown(ctx) + h.shutdown(ctx, h.clock.Time()) return } @@ -326,7 +329,7 @@ func (h *handler) Stop(ctx context.Context) { state := h.ctx.State.Get() bootstrapper, ok := h.engineManager.Get(state.Type).Get(snow.Bootstrapping) if !ok { - h.ctx.Log.Error("bootstrapping engine doesn't exists", + h.ctx.Log.Error("bootstrapping engine doesn't exist", zap.Stringer("type", state.Type), ) return @@ -998,35 +1001,27 @@ func (h *handler) popUnexpiredMsg( } } +// Invariant: if closeDispatcher is called, Stop has already been called. func (h *handler) closeDispatcher(ctx context.Context) { - h.ctx.Lock.Lock() - defer h.ctx.Lock.Unlock() - - h.numDispatchersClosed++ - if h.numDispatchersClosed < numDispatchersToClose { + if h.numDispatchersClosed.Add(1) < numDispatchersToClose { return } - h.shutdown(ctx) + h.shutdown(ctx, h.startClosingTime) } -// Note: shutdown is only called after all message dispatchers have exited. -func (h *handler) shutdown(ctx context.Context) { +// Note: shutdown is only called after all message dispatchers have exited or if +// no message dispatchers ever started. +func (h *handler) shutdown(ctx context.Context, startClosingTime time.Time) { defer func() { if h.onStopped != nil { go h.onStopped() } - h.totalClosingTime = h.clock.Time().Sub(h.startClosingTime) + h.totalClosingTime = h.clock.Time().Sub(startClosingTime) close(h.closed) }() - // shutdown may be called during Start, so we populate the start closing - // time here in case Stop was never called. - if h.startClosingTime.IsZero() { - h.startClosingTime = h.clock.Time() - } - state := h.ctx.State.Get() engine, ok := h.engineManager.Get(state.Type).Get(state.State) if !ok {