From 9fb460bf04b09e0dc35edc16bfb455a199a5cd8a Mon Sep 17 00:00:00 2001 From: Yao Xiao <87789492+yao-xiao-github@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:37:39 -0800 Subject: [PATCH] [release-7.3] Pause perpetual storage wiggle when TSS count target is met. (#11824) * TSS pause * Add condition --- fdbclient/ServerKnobs.cpp | 1 + fdbclient/include/fdbclient/ServerKnobs.h | 1 + fdbserver/DDTeamCollection.actor.cpp | 29 +++++++++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index e00be3308d3..eac1b2fde86 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -286,6 +286,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( PERPETUAL_WIGGLE_MIN_BYTES_BALANCE_RATIO, 0.85 ); init( PW_MAX_SS_LESSTHAN_MIN_BYTES_BALANCE_RATIO, 8 ); init( PERPETUAL_WIGGLE_DISABLE_REMOVER, true ); + init( PERPETUAL_WIGGLE_PAUSE_AFTER_TSS_TARGET_MET, false ); if (isSimulated)PERPETUAL_WIGGLE_PAUSE_AFTER_TSS_TARGET_MET = deterministicRandom()->coinflip(); init( LOG_ON_COMPLETION_DELAY, DD_QUEUE_LOGGING_INTERVAL ); init( BEST_TEAM_MAX_TEAM_TRIES, 10 ); init( BEST_TEAM_OPTION_COUNT, 4 ); diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index ad6e574f25b..2d04959a34c 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -269,6 +269,7 @@ class ServerKnobs : public KnobsImpl { // balanced/filledup before starting the next wiggle. double PERPETUAL_WIGGLE_DELAY; // The max interval between the last wiggle finish and the next wiggle start bool PERPETUAL_WIGGLE_DISABLE_REMOVER; // Whether the start of perpetual wiggle replace team remover + bool PERPETUAL_WIGGLE_PAUSE_AFTER_TSS_TARGET_MET; double LOG_ON_COMPLETION_DELAY; int BEST_TEAM_MAX_TEAM_TRIES; int BEST_TEAM_OPTION_COUNT; diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index 89854947e6d..062f40aaf38 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -2234,19 +2234,28 @@ class DDTeamCollectionImpl { takeRest = self->server_info.size() <= self->configuration.storageTeamSize || self->machine_info.size() < self->configuration.storageTeamSize || imbalance; + if (SERVER_KNOBS->PERPETUAL_WIGGLE_PAUSE_AFTER_TSS_TARGET_MET && + self->configuration.storageMigrationType == StorageMigrationType::DEFAULT) { + takeRest = takeRest || (self->getTargetTSSInDC() > 0 && self->reachTSSPairTarget()); + } + // log the extra delay and change the wiggler state if (takeRest) { self->storageWiggler->setWiggleState(StorageWiggler::PAUSE); - if (self->configuration.storageMigrationType == StorageMigrationType::GRADUAL) { - TraceEvent(SevWarn, "PerpetualStorageWiggleSleep", self->distributorId) - .suppressFor(SERVER_KNOBS->PERPETUAL_WIGGLE_DELAY * 4) - .detail("ImbalanceFactor", - SERVER_KNOBS->PW_MAX_SS_LESSTHAN_MIN_BYTES_BALANCE_RATIO ? numSSToBeLoadBytesBalanced - : ratio) - .detail("ServerSize", self->server_info.size()) - .detail("MachineSize", self->machine_info.size()) - .detail("StorageTeamSize", self->configuration.storageTeamSize); - } + Severity sev = + self->configuration.storageMigrationType == StorageMigrationType::GRADUAL ? SevWarn : SevInfo; + TraceEvent(sev, "PerpetualStorageWiggleSleep", self->distributorId) + .suppressFor(SERVER_KNOBS->PERPETUAL_WIGGLE_DELAY * 4) + .detail("Primary", self->primary) + .detail("ImbalanceFactor", + SERVER_KNOBS->PW_MAX_SS_LESSTHAN_MIN_BYTES_BALANCE_RATIO ? numSSToBeLoadBytesBalanced + : ratio) + .detail("ServerSize", self->server_info.size()) + .detail("MachineSize", self->machine_info.size()) + .detail("StorageTeamSize", self->configuration.storageTeamSize) + .detail("TargetTSSInDC", self->getTargetTSSInDC()) + .detail("ReachTSSPairTarget", self->reachTSSPairTarget()) + .detail("MigrationType", self->configuration.storageMigrationType.toString()); } } return Void();