From 5c923d64e014b77634af07fbe58eafe82e8fdee9 Mon Sep 17 00:00:00 2001 From: Jorres Tarasov Date: Sat, 8 Mar 2025 00:58:35 +0100 Subject: [PATCH 1/2] fix: account for --delay-between-restarts when requesting nodes --- pkg/rolling/options.go | 10 ++++-- pkg/rolling/rolling.go | 2 +- tests/maintenance_test.go | 5 +-- tests/mock/cms-nodes.go | 64 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 74 insertions(+), 7 deletions(-) diff --git a/pkg/rolling/options.go b/pkg/rolling/options.go index bc868bd..4bf830d 100644 --- a/pkg/rolling/options.go +++ b/pkg/rolling/options.go @@ -2,6 +2,7 @@ package rolling import ( "fmt" + "math" "time" "github.com/spf13/pflag" @@ -101,6 +102,11 @@ ydbops will try to figure out if you broke this rule by comparing before\after o `Delay between two consecutive restarts. E.g. '60s', '2m'. The number of simultaneous restarts is limited by 'nodes-inflight'.`) } -func (o *RestartOptions) GetRestartDuration() *durationpb.Duration { - return durationpb.New(time.Second * time.Duration(o.RestartDuration) * time.Duration(o.RestartRetryNumber)) +func (o *RestartOptions) GetRestartDuration(nNodes int) *durationpb.Duration { + singleBatchRestartTime := time.Second * time.Duration(o.RestartDuration) * time.Duration(o.RestartRetryNumber) + singleBatchWithWait := singleBatchRestartTime + o.DelayBetweenRestarts + maximumTotalBatches := int(math.Ceil(float64(nNodes) / float64(o.NodesInflight))) + + finalDuration := time.Duration(maximumTotalBatches) * singleBatchWithWait + return durationpb.New(finalDuration) } diff --git a/pkg/rolling/rolling.go b/pkg/rolling/rolling.go index 4606e64..a1c1e0f 100644 --- a/pkg/rolling/rolling.go +++ b/pkg/rolling/rolling.go @@ -167,7 +167,7 @@ func (r *Rolling) DoRestart() error { taskParams := cms.MaintenanceTaskParams{ TaskUID: r.state.restartTaskUID, AvailabilityMode: r.opts.GetAvailabilityMode(), - Duration: r.opts.GetRestartDuration(), + Duration: r.opts.GetRestartDuration(len(nodesToRestart)), ScopeType: cms.NodeScope, Nodes: nodesToRestart, } diff --git a/tests/maintenance_test.go b/tests/maintenance_test.go index ae7b41a..4bf140b 100644 --- a/tests/maintenance_test.go +++ b/tests/maintenance_test.go @@ -3,6 +3,7 @@ package tests import ( "fmt" "path/filepath" + "time" . "github.com/onsi/ginkgo/v2" "github.com/ydb-platform/ydb-go-genproto/draft/protos/Ydb_Maintenance" @@ -49,7 +50,7 @@ var _ = Describe("Test Maintenance", func() { Description: "Rolling restart maintenance task", AvailabilityMode: Ydb_Maintenance.AvailabilityMode_AVAILABILITY_MODE_STRONG, }, - ActionGroups: mock.MakeActionGroupsFromHostFQDNs("ydb-1.ydb.tech", "ydb-2.ydb.tech"), + ActionGroups: mock.MakeActionGroupsFromHostFQDNsFixedDuration(time.Second * 180, "ydb-1.ydb.tech", "ydb-2.ydb.tech"), }, }, expectedOutputRegexps: []string{ @@ -237,7 +238,7 @@ var _ = Describe("Test Maintenance", func() { Description: "Rolling restart maintenance task", AvailabilityMode: Ydb_Maintenance.AvailabilityMode_AVAILABILITY_MODE_STRONG, }, - ActionGroups: mock.MakeActionGroupsFromNodeIds(1, 2), + ActionGroups: mock.MakeActionGroupsFromNodesIdsFixedDuration(time.Second * 180, 1, 2), }, }, expectedOutputRegexps: []string{ diff --git a/tests/mock/cms-nodes.go b/tests/mock/cms-nodes.go index d74c8c3..1d27287 100644 --- a/tests/mock/cms-nodes.go +++ b/tests/mock/cms-nodes.go @@ -36,6 +36,41 @@ func makeNode(nodeID uint32) *Ydb_Maintenance.Node { } } +func determineRestartDuration(nNodes int) time.Duration { + defaultRestartDuration := time.Second * 60 + singleBatchRestartTime := defaultRestartDuration * 3 + + singleBatchWithWait := singleBatchRestartTime + 1 * time.Second + maximumTotalBatches := nNodes + + return time.Duration(maximumTotalBatches) * singleBatchWithWait +} + +func MakeActionGroupsFromNodesIdsFixedDuration(duration time.Duration, nodeIDs ...uint32) []*Ydb_Maintenance.ActionGroup { + result := []*Ydb_Maintenance.ActionGroup{} + for _, nodeID := range nodeIDs { + result = append(result, + &Ydb_Maintenance.ActionGroup{ + Actions: []*Ydb_Maintenance.Action{ + { + Action: &Ydb_Maintenance.Action_LockAction{ + LockAction: &Ydb_Maintenance.LockAction{ + Scope: &Ydb_Maintenance.ActionScope{ + Scope: &Ydb_Maintenance.ActionScope_NodeId{ + NodeId: nodeID, + }, + }, + Duration: durationpb.New(duration), + }, + }, + }, + }, + }, + ) + } + return result +} + func MakeActionGroupsFromNodeIds(nodeIDs ...uint32) []*Ydb_Maintenance.ActionGroup { result := []*Ydb_Maintenance.ActionGroup{} for _, nodeID := range nodeIDs { @@ -50,7 +85,32 @@ func MakeActionGroupsFromNodeIds(nodeIDs ...uint32) []*Ydb_Maintenance.ActionGro NodeId: nodeID, }, }, - Duration: durationpb.New(180 * time.Second), + Duration: durationpb.New(determineRestartDuration(len(nodeIDs))), + }, + }, + }, + }, + }, + ) + } + return result +} + +func MakeActionGroupsFromHostFQDNsFixedDuration(duration time.Duration, hostFQDNs ...string) []*Ydb_Maintenance.ActionGroup { + result := []*Ydb_Maintenance.ActionGroup{} + for _, hostFQDN := range hostFQDNs { + result = append(result, + &Ydb_Maintenance.ActionGroup{ + Actions: []*Ydb_Maintenance.Action{ + { + Action: &Ydb_Maintenance.Action_LockAction{ + LockAction: &Ydb_Maintenance.LockAction{ + Scope: &Ydb_Maintenance.ActionScope{ + Scope: &Ydb_Maintenance.ActionScope_Host{ + Host: hostFQDN, + }, + }, + Duration: durationpb.New(duration), }, }, }, @@ -75,7 +135,7 @@ func MakeActionGroupsFromHostFQDNs(hostFQDNs ...string) []*Ydb_Maintenance.Actio Host: hostFQDN, }, }, - Duration: durationpb.New(180 * time.Second), + Duration: durationpb.New(determineRestartDuration(len(hostFQDNs))), }, }, }, From 689ef3bbb889bd562004d3efbfdd11302d54c814 Mon Sep 17 00:00:00 2001 From: Jorres Tarasov Date: Sat, 8 Mar 2025 00:58:58 +0100 Subject: [PATCH 2/2] fix: lint --- tests/mock/cms-nodes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mock/cms-nodes.go b/tests/mock/cms-nodes.go index 1d27287..bf10c91 100644 --- a/tests/mock/cms-nodes.go +++ b/tests/mock/cms-nodes.go @@ -40,7 +40,7 @@ func determineRestartDuration(nNodes int) time.Duration { defaultRestartDuration := time.Second * 60 singleBatchRestartTime := defaultRestartDuration * 3 - singleBatchWithWait := singleBatchRestartTime + 1 * time.Second + singleBatchWithWait := singleBatchRestartTime + 1*time.Second maximumTotalBatches := nNodes return time.Duration(maximumTotalBatches) * singleBatchWithWait