Skip to content

Commit

Permalink
SERVER-64750 Implement a quick-find shortcut to skip running isSelf w…
Browse files Browse the repository at this point in the history
…hen we have a valid selfIndex
  • Loading branch information
vessy-mongodb authored and Evergreen Agent committed Sep 2, 2022
1 parent 2ba8bfb commit d4d4a26
Show file tree
Hide file tree
Showing 7 changed files with 291 additions and 25 deletions.
70 changes: 53 additions & 17 deletions src/mongo/db/repl/repl_set_config_checks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,16 @@
#include "mongo/db/repl/replication_coordinator_external_state.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
#include "mongo/util/str.h"

#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kReplication

namespace mongo {
namespace repl {

namespace {

/**
* Checks if the node with the given config index is electable, returning a useful
* status message if not.
*/
Status checkElectable(const ReplSetConfig& newConfig, int configIndex) {
const MemberConfig& myConfig = newConfig.getMemberAt(configIndex);
if (!myConfig.isElectable()) {
return Status(ErrorCodes::NodeNotElectable,
str::stream() << "This node, " << myConfig.getHostAndPort().toString()
<< ", with _id " << myConfig.getId()
<< " is not electable under the new configuration with "
<< newConfig.getConfigVersionAndTerm().toString()
<< " for replica set " << newConfig.getReplSetName());
}
return Status::OK();
}

/**
* Checks that the priorities of all the arbiters in the configuration are 0. If they were 1,
* they should have been set to 0 in MemberConfig::initialize(). Otherwise, they are illegal.
Expand Down Expand Up @@ -302,6 +288,23 @@ Status validateOldAndNewConfigsCompatible(const ReplSetConfig& oldConfig,
}
} // namespace

/**
* Checks if the node with the given config index is electable, returning a useful
* status message if not.
*/
Status checkElectable(const ReplSetConfig& newConfig, int configIndex) {
const MemberConfig& myConfig = newConfig.getMemberAt(configIndex);
if (!myConfig.isElectable()) {
return Status(ErrorCodes::NodeNotElectable,
str::stream() << "This node, " << myConfig.getHostAndPort().toString()
<< ", with _id " << myConfig.getId()
<< " is not electable under the new configuration with "
<< newConfig.getConfigVersionAndTerm().toString()
<< " for replica set " << newConfig.getReplSetName());
}
return Status::OK();
}

bool sameConfigContents(const ReplSetConfig& oldConfig, const ReplSetConfig& newConfig) {
auto oldBSON = oldConfig.toBSON();
auto newBSON = newConfig.toBSON();
Expand Down Expand Up @@ -373,6 +376,30 @@ StatusWith<int> findSelfInConfigIfElectable(ReplicationCoordinatorExternalState*
return result;
}

int findOwnHostInConfigQuick(const ReplSetConfig& newConfig, HostAndPort host) {
if (host.empty()) {
return -1;
}

int firstMatchIndex = -1;
int currIndex = 0;

for (ReplSetConfig::MemberIterator iter = newConfig.membersBegin();
iter != newConfig.membersEnd();
++iter) {

if (iter->getHostAndPort() == host) {
firstMatchIndex = currIndex;
invariant(firstMatchIndex >= 0);
break;
}

currIndex++;
}

return firstMatchIndex;
}

StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState,
const ReplSetConfig& newConfig,
ServiceContext* ctx) {
Expand Down Expand Up @@ -478,6 +505,7 @@ Status validateConfigForReconfig(const ReplSetConfig& oldConfig,
StatusWith<int> validateConfigForHeartbeatReconfig(
ReplicationCoordinatorExternalState* externalState,
const ReplSetConfig& newConfig,
HostAndPort ownHost,
ServiceContext* ctx) {
Status status = newConfig.validateAllowingSplitHorizonIP();
if (!status.isOK()) {
Expand All @@ -491,6 +519,14 @@ StatusWith<int> validateConfigForHeartbeatReconfig(
"set a cluster-wide default writeConcern.",
!newConfig.containsCustomizedGetLastErrorDefaults());

auto quickIndex = findOwnHostInConfigQuick(newConfig, ownHost);
if (quickIndex >= 0) {
LOGV2(6475001,
"Was able to quickly find new index in config. Skipping full isSelf checks",
"index"_attr = quickIndex);
return quickIndex;
}

return findSelfInConfig(externalState, newConfig, ctx);
}

Expand Down
13 changes: 13 additions & 0 deletions src/mongo/db/repl/repl_set_config_checks.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#pragma once

#include "mongo/base/status_with.h"
#include "mongo/util/net/hostandport.h"

namespace mongo {

Expand All @@ -40,6 +41,11 @@ namespace repl {
class ReplicationCoordinatorExternalState;
class ReplSetConfig;

/**
* Checks if the member given by the config index is electable in the new config.
*/
Status checkElectable(const ReplSetConfig& newConfig, int configIndex);

/**
* Checks if two configs are the same in content, ignoring 'version' and 'term' fields.
*/
Expand All @@ -65,6 +71,12 @@ StatusWith<int> findSelfInConfigIfElectable(ReplicationCoordinatorExternalState*
const ReplSetConfig& newConfig,
ServiceContext* ctx);

/**
* Does a quick pass to see whether a host exists in the new config. Not as precise as
* findSelfInConfig.
*/
int findOwnHostInConfigQuick(const ReplSetConfig& newConfig, HostAndPort host);

/**
* Validates that "newConfig" is a legal configuration that the current
* node can accept from its local storage during startup.
Expand Down Expand Up @@ -115,6 +127,7 @@ Status validateConfigForReconfig(const ReplSetConfig& oldConfig,
StatusWith<int> validateConfigForHeartbeatReconfig(
ReplicationCoordinatorExternalState* externalState,
const ReplSetConfig& newConfig,
HostAndPort ownHost,
ServiceContext* ctx);
} // namespace repl
} // namespace mongo
33 changes: 30 additions & 3 deletions src/mongo/db/repl/repl_set_config_checks_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ TEST_F(ServiceContextTest, ValidateConfigForHeartbeatReconfig_NewConfigInvalid)
presentOnceExternalState.addSelf(HostAndPort("h2"));
ASSERT_EQUALS(ErrorCodes::BadValue,
validateConfigForHeartbeatReconfig(
&presentOnceExternalState, newConfig, getServiceContext())
&presentOnceExternalState, newConfig, HostAndPort(), getServiceContext())
.getStatus());
}

Expand All @@ -894,7 +894,7 @@ TEST_F(ServiceContextTest, ValidateConfigForHeartbeatReconfig_NewConfigValid) {
ReplicationCoordinatorExternalStateMock presentOnceExternalState;
presentOnceExternalState.addSelf(HostAndPort("h2"));
ASSERT_OK(validateConfigForHeartbeatReconfig(
&presentOnceExternalState, newConfig, getServiceContext())
&presentOnceExternalState, newConfig, HostAndPort(), getServiceContext())
.getStatus());
}

Expand All @@ -917,7 +917,7 @@ DEATH_TEST_REGEX_F(ServiceContextTest,
ReplicationCoordinatorExternalStateMock presentOnceExternalState;
presentOnceExternalState.addSelf(HostAndPort("h2"));
ASSERT_THROWS_CODE(validateConfigForHeartbeatReconfig(
&presentOnceExternalState, newConfig, getServiceContext())
&presentOnceExternalState, newConfig, HostAndPort(), getServiceContext())
.getStatus(),
AssertionException,
5624103);
Expand Down Expand Up @@ -1342,6 +1342,33 @@ TEST_F(ServiceContextTest, FindSelfInConfigFastAndSlow) {
}
}

TEST_F(ServiceContextTest, FindOwnHostInConfigQuick) {
ReplSetConfig newConfig;
newConfig = ReplSetConfig::parse(BSON("_id"
<< "rs0"
<< "version" << 2 << "protocolVersion" << 1 << "members"
<< BSON_ARRAY(BSON("_id" << 1 << "host"
<< "h1:1234")
<< BSON("_id" << 2 << "host"
<< "h2:1234")
<< BSON("_id" << 3 << "host"
<< "h3:1234")
<< BSON("_id" << 4 << "host"
<< "h2:1234"))));

// Does not exist.
ASSERT_EQUALS(-1, findOwnHostInConfigQuick(newConfig, HostAndPort("non-existent")));

// First in config, not duplicated.
ASSERT_EQUALS(0, findOwnHostInConfigQuick(newConfig, HostAndPort("h1:1234")));

// Not first in config but also not duplicated.
ASSERT_EQUALS(2, findOwnHostInConfigQuick(newConfig, HostAndPort("h3:1234")));

// First match in a tie.
ASSERT_EQUALS(1, findOwnHostInConfigQuick(newConfig, HostAndPort("h2:1234")));
}

} // namespace
} // namespace repl
} // namespace mongo
28 changes: 25 additions & 3 deletions src/mongo/db/repl/replication_coordinator_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3177,7 +3177,6 @@ int ReplicationCoordinatorImpl::getMyId() const {

HostAndPort ReplicationCoordinatorImpl::getMyHostAndPort() const {
stdx::unique_lock<Latch> lk(_mutex);

if (_selfIndex == -1) {
return HostAndPort();
}
Expand Down Expand Up @@ -3868,8 +3867,31 @@ Status ReplicationCoordinatorImpl::_doReplSetReconfig(OperationContext* opCtx,

// Make sure we can find ourselves in the config. If the config contents have not changed, then
// we bypass the check for finding ourselves in the config, since we know it should already be
// satisfied.
if (!sameConfigContents(oldConfig, newConfig)) {
// satisfied. There is also one further optimization here: if we have a valid _selfIndex, we can
// do a quick and cheap pass first to see if host and port exist in the new config. This is safe
// as we are not allowed to have the same HostAndPort in the config twice. Matching HostandPort
// implies matching isSelf, and it is actually preferrable to avoid checking the latter as it is
// succeptible to transient DNS errors.
auto quickIndex =
_selfIndex >= 0 ? findOwnHostInConfigQuick(newConfig, getMyHostAndPort()) : -1;
if (quickIndex >= 0) {
if (!force) {
auto electableStatus = checkElectable(newConfig, quickIndex);
if (!electableStatus.isOK()) {
LOGV2_ERROR(6475002,
"Not electable in new config and force=false, rejecting",
"error"_attr = electableStatus,
"newConfig"_attr = newConfigObj);
return electableStatus;
}
}
LOGV2(6475000,
"Was able to quickly find new index in config. Skipping full checks.",
"index"_attr = quickIndex,
"force"_attr = force);
myIndex = quickIndex;
} else {
// Either our HostAndPort changed in the config or we didn't have a _selfIndex.
if (skipSafetyChecks) {
LOGV2_ERROR(5986700,
"Configuration changed substantially in a config change that should have "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
}
}
return validateConfigForHeartbeatReconfig(
_externalState.get(), newConfig, cc().getServiceContext());
_externalState.get(), newConfig, getMyHostAndPort(), cc().getServiceContext());
}();

if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,33 @@ TEST_F(ReplCoordHBV1ReconfigTest, ScheduleImmediateHeartbeatToSpeedUpConfigCommi
}
}

TEST_F(ReplCoordHBV1ReconfigTest, FindOwnHostForHeartbeatReconfigQuick) {
// Prepare a config which is newer than the installed config
ReplSetConfig newConfig = [&]() {
auto mutableConfig =
makeRSConfigWithVersionAndTerm(initConfigVersion + 1, initConfigTerm + 1).getMutable();
ReplSetConfigSettings settings;
settings.setHeartbeatIntervalMillis(10000);
mutableConfig.setSettings(settings);
return ReplSetConfig(std::move(mutableConfig));
}();

// Receive the newer config from the primary
receiveHeartbeatFrom(newConfig, 1, HostAndPort("h2", 1));
{
InNetworkGuard guard(getNet());
auto noi = getNet()->getNextReadyRequest();
auto response = makePrimaryHeartbeatResponseFrom(newConfig);
getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(response));

startCapturingLogMessages();
getNet()->runReadyNetworkOperations();
stopCapturingLogMessages();
ASSERT_EQUALS(
1, countTextFormatLogLinesContaining("Was able to quickly find new index in config"));
}
}

TEST_F(ReplCoordHBV1Test, RejectHeartbeatReconfigDuringElection) {
auto severityGuard = unittest::MinimumLoggedSeverityGuard{
logv2::LogComponent::kReplicationHeartbeats, logv2::LogSeverity::Debug(1)};
Expand Down
Loading

0 comments on commit d4d4a26

Please sign in to comment.