From 4d9bfdd533cea2b2a35bdfd7fb8df82a7c6d8b40 Mon Sep 17 00:00:00 2001 From: marun Date: Thu, 21 Mar 2024 16:42:00 +0100 Subject: [PATCH 1/3] `tmpnet`: Reuse dynamically-allocated API port across restarts (#2857) --- tests/fixture/tmpnet/network.go | 17 +++++++++++++++++ tests/fixture/tmpnet/node.go | 29 ++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/tests/fixture/tmpnet/network.go b/tests/fixture/tmpnet/network.go index ceff7bcff982..3ca8318efa85 100644 --- a/tests/fixture/tmpnet/network.go +++ b/tests/fixture/tmpnet/network.go @@ -441,6 +441,23 @@ func (n *Network) Restart(ctx context.Context, w io.Writer) error { return err } for _, node := range n.Nodes { + // Ensure the node reuses the same API port across restarts to ensure + // consistent labeling of metrics. Otherwise prometheus's automatic + // addition of the `instance` label (host:port) results in + // segmentation of results for a given node every time the port + // changes on restart. This segmentation causes graphs on the grafana + // dashboards to display multiple series per graph for a given node, + // one for each port that the node used. + // + // There is a non-zero chance of the port being allocatted to a + // different process and the node subsequently being unable to start, + // but the alternative is having to update the grafana dashboards + // query-by-query to ensure that node metrics ignore the instance + // label. + if err := node.SaveAPIPort(); err != nil { + return err + } + if err := node.Stop(ctx); err != nil { return fmt.Errorf("failed to stop node %s: %w", node.NodeID, err) } diff --git a/tests/fixture/tmpnet/node.go b/tests/fixture/tmpnet/node.go index 10f80371cbf4..2d20ce098587 100644 --- a/tests/fixture/tmpnet/node.go +++ b/tests/fixture/tmpnet/node.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "io" + "net" "net/http" "os" "path/filepath" @@ -214,13 +215,14 @@ func (n *Node) Stop(ctx context.Context) error { // Sets networking configuration for the node. // Convenience method for setting networking flags. func (n *Node) SetNetworkingConfig(bootstrapIDs []string, bootstrapIPs []string) { - var ( - // Use dynamic port allocation. - httpPort uint16 = 0 - stakingPort uint16 = 0 - ) - n.Flags[config.HTTPPortKey] = httpPort - n.Flags[config.StakingPortKey] = stakingPort + if _, ok := n.Flags[config.HTTPPortKey]; !ok { + // Default to dynamic port allocation + n.Flags[config.HTTPPortKey] = 0 + } + if _, ok := n.Flags[config.StakingPortKey]; !ok { + // Default to dynamic port allocation + n.Flags[config.StakingPortKey] = 0 + } n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",") n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",") } @@ -348,3 +350,16 @@ func (n *Node) EnsureNodeID() error { return nil } + +// Saves the currently allocated API port to the node's configuration +// for use across restarts. Reusing the port ensures consistent +// labeling of metrics. +func (n *Node) SaveAPIPort() error { + hostPort := strings.TrimPrefix(n.URI, "http://") + _, port, err := net.SplitHostPort(hostPort) + if err != nil { + return err + } + n.Flags[config.HTTPPortKey] = port + return nil +} From 14cdc04ea44a6b01fe1a308714e0a2e4e24b0213 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 21 Mar 2024 11:43:13 -0400 Subject: [PATCH 2/3] Remove useless bootstrapping metric (#2858) --- .../avalanche/bootstrap/bootstrapper.go | 4 ---- snow/engine/avalanche/bootstrap/metrics.go | 16 ++----------- snow/engine/avalanche/bootstrap/tx_job.go | 15 +++++------- snow/engine/avalanche/bootstrap/vertex_job.go | 24 +++++++++---------- snow/engine/snowman/bootstrap/block_job.go | 17 ++++++------- snow/engine/snowman/bootstrap/bootstrapper.go | 2 -- .../snowman/bootstrap/bootstrapper_test.go | 1 - snow/engine/snowman/bootstrap/metrics.go | 10 ++------ 8 files changed, 28 insertions(+), 61 deletions(-) diff --git a/snow/engine/avalanche/bootstrap/bootstrapper.go b/snow/engine/avalanche/bootstrap/bootstrapper.go index cd530d1cb1f8..d3028bc3bfce 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper.go @@ -325,7 +325,6 @@ func (b *bootstrapper) Start(ctx context.Context, startReqID uint32) error { if err := b.VtxBlocked.SetParser(ctx, &vtxParser{ log: b.Ctx.Log, numAccepted: b.numAcceptedVts, - numDropped: b.numDroppedVts, manager: b.Manager, }); err != nil { return err @@ -334,7 +333,6 @@ func (b *bootstrapper) Start(ctx context.Context, startReqID uint32) error { if err := b.TxBlocked.SetParser(&txParser{ log: b.Ctx.Log, numAccepted: b.numAcceptedTxs, - numDropped: b.numDroppedTxs, vm: b.VM, }); err != nil { return err @@ -475,7 +473,6 @@ func (b *bootstrapper) process(ctx context.Context, vtxs ...avalanche.Vertex) er pushed, err := b.VtxBlocked.Push(ctx, &vertexJob{ log: b.Ctx.Log, numAccepted: b.numAcceptedVts, - numDropped: b.numDroppedVts, vtx: vtx, }) if err != nil { @@ -497,7 +494,6 @@ func (b *bootstrapper) process(ctx context.Context, vtxs ...avalanche.Vertex) er pushed, err := b.TxBlocked.Push(ctx, &txJob{ log: b.Ctx.Log, numAccepted: b.numAcceptedTxs, - numDropped: b.numDroppedTxs, tx: tx, }) if err != nil { diff --git a/snow/engine/avalanche/bootstrap/metrics.go b/snow/engine/avalanche/bootstrap/metrics.go index cc357f25901f..5ad1b3713647 100644 --- a/snow/engine/avalanche/bootstrap/metrics.go +++ b/snow/engine/avalanche/bootstrap/metrics.go @@ -10,8 +10,8 @@ import ( ) type metrics struct { - numFetchedVts, numDroppedVts, numAcceptedVts, - numFetchedTxs, numDroppedTxs, numAcceptedTxs prometheus.Counter + numFetchedVts, numAcceptedVts, + numFetchedTxs, numAcceptedTxs prometheus.Counter } func (m *metrics) Initialize( @@ -23,11 +23,6 @@ func (m *metrics) Initialize( Name: "fetched_vts", Help: "Number of vertices fetched during bootstrapping", }) - m.numDroppedVts = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "dropped_vts", - Help: "Number of vertices dropped during bootstrapping", - }) m.numAcceptedVts = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Name: "accepted_vts", @@ -39,11 +34,6 @@ func (m *metrics) Initialize( Name: "fetched_txs", Help: "Number of transactions fetched during bootstrapping", }) - m.numDroppedTxs = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "dropped_txs", - Help: "Number of transactions dropped during bootstrapping", - }) m.numAcceptedTxs = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Name: "accepted_txs", @@ -52,10 +42,8 @@ func (m *metrics) Initialize( return utils.Err( registerer.Register(m.numFetchedVts), - registerer.Register(m.numDroppedVts), registerer.Register(m.numAcceptedVts), registerer.Register(m.numFetchedTxs), - registerer.Register(m.numDroppedTxs), registerer.Register(m.numAcceptedTxs), ) } diff --git a/snow/engine/avalanche/bootstrap/tx_job.go b/snow/engine/avalanche/bootstrap/tx_job.go index 5a2ff3d98f64..17a2dcb53127 100644 --- a/snow/engine/avalanche/bootstrap/tx_job.go +++ b/snow/engine/avalanche/bootstrap/tx_job.go @@ -23,9 +23,9 @@ import ( var errMissingTxDependenciesOnAccept = errors.New("attempting to accept a transaction with missing dependencies") type txParser struct { - log logging.Logger - numAccepted, numDropped prometheus.Counter - vm vertex.LinearizableVM + log logging.Logger + numAccepted prometheus.Counter + vm vertex.LinearizableVM } func (p *txParser) Parse(ctx context.Context, txBytes []byte) (queue.Job, error) { @@ -36,15 +36,14 @@ func (p *txParser) Parse(ctx context.Context, txBytes []byte) (queue.Job, error) return &txJob{ log: p.log, numAccepted: p.numAccepted, - numDropped: p.numDropped, tx: tx, }, nil } type txJob struct { - log logging.Logger - numAccepted, numDropped prometheus.Counter - tx snowstorm.Tx + log logging.Logger + numAccepted prometheus.Counter + tx snowstorm.Tx } func (t *txJob) ID() ids.ID { @@ -67,14 +66,12 @@ func (t *txJob) Execute(ctx context.Context) error { return err } if hasMissingDeps { - t.numDropped.Inc() return errMissingTxDependenciesOnAccept } status := t.tx.Status() switch status { case choices.Unknown, choices.Rejected: - t.numDropped.Inc() return fmt.Errorf("attempting to execute transaction with status %s", status) case choices.Processing: txID := t.tx.ID() diff --git a/snow/engine/avalanche/bootstrap/vertex_job.go b/snow/engine/avalanche/bootstrap/vertex_job.go index 8860b61d816a..21c0b93ed937 100644 --- a/snow/engine/avalanche/bootstrap/vertex_job.go +++ b/snow/engine/avalanche/bootstrap/vertex_job.go @@ -20,12 +20,15 @@ import ( "github.com/ava-labs/avalanchego/utils/set" ) -var errMissingVtxDependenciesOnAccept = errors.New("attempting to execute blocked vertex") +var ( + errMissingVtxDependenciesOnAccept = errors.New("attempting to execute blocked vertex") + errTxNotAcceptedInVtxOnAccept = errors.New("attempting to execute vertex with non-accepted transaction") +) type vtxParser struct { - log logging.Logger - numAccepted, numDropped prometheus.Counter - manager vertex.Manager + log logging.Logger + numAccepted prometheus.Counter + manager vertex.Manager } func (p *vtxParser) Parse(ctx context.Context, vtxBytes []byte) (queue.Job, error) { @@ -36,15 +39,14 @@ func (p *vtxParser) Parse(ctx context.Context, vtxBytes []byte) (queue.Job, erro return &vertexJob{ log: p.log, numAccepted: p.numAccepted, - numDropped: p.numDropped, vtx: vtx, }, nil } type vertexJob struct { - log logging.Logger - numAccepted, numDropped prometheus.Counter - vtx avalanche.Vertex + log logging.Logger + numAccepted prometheus.Counter + vtx avalanche.Vertex } func (v *vertexJob) ID() ids.ID { @@ -85,7 +87,6 @@ func (v *vertexJob) Execute(ctx context.Context) error { return err } if hasMissingDependencies { - v.numDropped.Inc() return errMissingVtxDependenciesOnAccept } txs, err := v.vtx.Txs(ctx) @@ -94,15 +95,12 @@ func (v *vertexJob) Execute(ctx context.Context) error { } for _, tx := range txs { if tx.Status() != choices.Accepted { - v.numDropped.Inc() - v.log.Warn("attempting to execute vertex with non-accepted transactions") - return nil + return errTxNotAcceptedInVtxOnAccept } } status := v.vtx.Status() switch status { case choices.Unknown, choices.Rejected: - v.numDropped.Inc() return fmt.Errorf("attempting to execute vertex with status %s", status) case choices.Processing: v.numAccepted.Inc() diff --git a/snow/engine/snowman/bootstrap/block_job.go b/snow/engine/snowman/bootstrap/block_job.go index a9496316f1fb..403327006f85 100644 --- a/snow/engine/snowman/bootstrap/block_job.go +++ b/snow/engine/snowman/bootstrap/block_job.go @@ -23,9 +23,9 @@ import ( var errMissingDependenciesOnAccept = errors.New("attempting to accept a block with missing dependencies") type parser struct { - log logging.Logger - numAccepted, numDropped prometheus.Counter - vm block.ChainVM + log logging.Logger + numAccepted prometheus.Counter + vm block.ChainVM } func (p *parser) Parse(ctx context.Context, blkBytes []byte) (queue.Job, error) { @@ -36,17 +36,16 @@ func (p *parser) Parse(ctx context.Context, blkBytes []byte) (queue.Job, error) return &blockJob{ log: p.log, numAccepted: p.numAccepted, - numDropped: p.numDropped, blk: blk, vm: p.vm, }, nil } type blockJob struct { - log logging.Logger - numAccepted, numDropped prometheus.Counter - blk snowman.Block - vm block.Getter + log logging.Logger + numAccepted prometheus.Counter + blk snowman.Block + vm block.Getter } func (b *blockJob) ID() ids.ID { @@ -76,13 +75,11 @@ func (b *blockJob) Execute(ctx context.Context) error { return err } if hasMissingDeps { - b.numDropped.Inc() return errMissingDependenciesOnAccept } status := b.blk.Status() switch status { case choices.Unknown, choices.Rejected: - b.numDropped.Inc() return fmt.Errorf("attempting to execute block with status %s", status) case choices.Processing: blkID := b.blk.ID() diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 29754a24d734..1579fa5bf091 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -170,7 +170,6 @@ func (b *Bootstrapper) Start(ctx context.Context, startReqID uint32) error { b.parser = &parser{ log: b.Ctx.Log, numAccepted: b.numAccepted, - numDropped: b.numDropped, vm: b.VM, } if err := b.Blocked.SetParser(ctx, b.parser); err != nil { @@ -616,7 +615,6 @@ func (b *Bootstrapper) process(ctx context.Context, blk snowman.Block, processin pushed, err := b.Blocked.Push(ctx, &blockJob{ log: b.Ctx.Log, numAccepted: b.numAccepted, - numDropped: b.numDropped, blk: blk, vm: b.VM, }) diff --git a/snow/engine/snowman/bootstrap/bootstrapper_test.go b/snow/engine/snowman/bootstrap/bootstrapper_test.go index d5cb9cc763fc..846b082ce8f0 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper_test.go +++ b/snow/engine/snowman/bootstrap/bootstrapper_test.go @@ -1386,7 +1386,6 @@ func TestBootstrapNoParseOnNew(t *testing.T) { pushed, err := blocker.Push(context.Background(), &blockJob{ log: logging.NoLog{}, numAccepted: prometheus.NewCounter(prometheus.CounterOpts{}), - numDropped: prometheus.NewCounter(prometheus.CounterOpts{}), blk: blk1, vm: vm, }) diff --git a/snow/engine/snowman/bootstrap/metrics.go b/snow/engine/snowman/bootstrap/metrics.go index f6ad90d16419..aea46d2a93e8 100644 --- a/snow/engine/snowman/bootstrap/metrics.go +++ b/snow/engine/snowman/bootstrap/metrics.go @@ -10,8 +10,8 @@ import ( ) type metrics struct { - numFetched, numDropped, numAccepted prometheus.Counter - fetchETA prometheus.Gauge + numFetched, numAccepted prometheus.Counter + fetchETA prometheus.Gauge } func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) { @@ -21,11 +21,6 @@ func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, e Name: "fetched", Help: "Number of blocks fetched during bootstrapping", }), - numDropped: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "dropped", - Help: "Number of blocks dropped during bootstrapping", - }), numAccepted: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Name: "accepted", @@ -40,7 +35,6 @@ func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, e err := utils.Err( registerer.Register(m.numFetched), - registerer.Register(m.numDropped), registerer.Register(m.numAccepted), registerer.Register(m.fetchETA), ) From ffdb67f315e5edbb40666909b308b0d5346e2ee8 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 21 Mar 2024 15:39:56 -0400 Subject: [PATCH 3/3] Remove duplicate log (#2860) --- snow/engine/snowman/bootstrap/bootstrapper.go | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 1579fa5bf091..ebbbd6f13ff7 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -307,16 +307,6 @@ func (b *Bootstrapper) sendBootstrappingMessagesOrFinish(ctx context.Context) er return b.startBootstrapping(ctx) } - if !b.restarted { - b.Ctx.Log.Info("bootstrapping started syncing", - zap.Int("numAccepted", numAccepted), - ) - } else { - b.Ctx.Log.Debug("bootstrapping started syncing", - zap.Int("numAccepted", numAccepted), - ) - } - return b.startSyncing(ctx, accepted) } @@ -392,9 +382,14 @@ func (b *Bootstrapper) startSyncing(ctx context.Context, acceptedContainerIDs [] // Append the list of accepted container IDs to pendingContainerIDs to ensure // we iterate over every container that must be traversed. pendingContainerIDs = append(pendingContainerIDs, acceptedContainerIDs...) - b.Ctx.Log.Debug("starting bootstrapping", - zap.Int("numPendingBlocks", len(pendingContainerIDs)), + + log := b.Ctx.Log.Info + if b.restarted { + log = b.Ctx.Log.Debug + } + log("starting to fetch blocks", zap.Int("numAcceptedBlocks", len(acceptedContainerIDs)), + zap.Int("numMissingBlocks", len(pendingContainerIDs)), ) toProcess := make([]snowman.Block, 0, len(pendingContainerIDs))