diff --git a/jstests/sharding/abort_resharding_test.js b/jstests/sharding/abort_resharding_test.js new file mode 100644 index 00000000000..d2a3ca8647a --- /dev/null +++ b/jstests/sharding/abort_resharding_test.js @@ -0,0 +1,69 @@ +(function() { +'use strict'; + +load('jstests/libs/fail_point_util.js'); +var st = new ShardingTest({shards: 2}); +var shard0Name = st.shard0.shardName; +var shard1Name = st.shard1.shardName; +st.s.adminCommand({enableSharding: 'foo', primaryShard: shard0Name}); +st.s.adminCommand({shardCollection: 'foo.test', key: {_id: 1}}); +st.s.adminCommand({split: 'foo.test', middle: {_id: 0}}); +st.s.adminCommand({moveChunk: 'foo.test', find: {_id: 0}, to: shard1Name}); + +st.s.getDB('foo').runCommand({createIndexes: 'test', indexes: [{a: 1, name: "a_1"}]}); +st.s.getCollection('foo.test').insert({_id: -1, a: -1}); +st.s.getCollection('foo.test').insert({_id: 1, a: 1}); + +var fp = configureFailPoint(st.rs0.getPrimary(), "failCommand", { + failInternalCommands: true, + errorCode: ErrorCodes.HostUnreachable, + failCommands: ["_shardsvrAbortReshardCollection"], +}); + +var fp2 = configureFailPoint(st.rs1.getPrimary(), "failCommand", { + failInternalCommands: true, + errorCode: ErrorCodes.HostUnreachable, + failCommands: ["_shardsvrAbortReshardCollection"], +}); + +var rsFp = + configureFailPoint(st.configRS.getPrimary(), "reshardingPauseCoordinatorBeforeBlockingWrites"); + +var configSecondary = st.configRS.getSecondary(); +var secReshardingFp = configureFailPoint(configSecondary, 'pauseBeforeSendingAbortToShards'); +var secRefreshFp = configureFailPoint(configSecondary, 'pauseAfterSettingIsUp'); + +var reshardThread = new Thread((mongosConnString) => { + let mongos = new Mongo(mongosConnString); + assert.commandFailedWithCode(mongos.adminCommand({reshardCollection: 'foo.test', key: {a: 1}}), + ErrorCodes.ReshardCollectionAborted); +}, st.s.host); + +var abortThread = new Thread((mongosConnString) => { + let mongos = new Mongo(mongosConnString); + mongos.adminCommand({abortReshardCollection: 'foo.test'}); +}, st.s.host); + +reshardThread.start(); +rsFp.wait(); +abortThread.start(); +rsFp.off(); + +st.configRS.stepUp(configSecondary); + +secRefreshFp.wait(); +secReshardingFp.wait(); + +secReshardingFp.off(); +// Wait for a second while the config server sends the command to the shard. +sleep(1000); +secRefreshFp.off(); + +fp.off(); +fp2.off(); + +reshardThread.join(); +abortThread.join(); + +st.stop(); +})(); diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp index 68ad17e3b62..e0e623574e2 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp @@ -97,6 +97,7 @@ MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeStartingErrorFlow); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforePersistingStateTransition); MONGO_FAIL_POINT_DEFINE(pauseBeforeTellDonorToRefresh); MONGO_FAIL_POINT_DEFINE(pauseBeforeInsertCoordinatorDoc); +MONGO_FAIL_POINT_DEFINE(pauseBeforeSendingAbortToShards); const std::string kReshardingCoordinatorActiveIndexName = "ReshardingCoordinatorActiveIndex"; const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max()); @@ -1549,6 +1550,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::_onAbortCoordinatorAndParti .then([this] { return _waitForMajority(_ctHolder->getStepdownToken()); }) .thenRunOn(**executor) .then([this, executor, status] { + pauseBeforeSendingAbortToShards.pauseWhileSet(); _tellAllParticipantsToAbort(executor, status == ErrorCodes::ReshardCollectionAborted); diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index d8211ec4c82..dd6b3c82418 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -56,6 +56,7 @@ namespace mongo { namespace { +MONGO_FAIL_POINT_DEFINE(pauseAfterSettingIsUp); const Seconds kRefreshPeriod(30); const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max()); @@ -192,6 +193,7 @@ ShardRegistry::Cache::LookupResult ShardRegistry::_lookup(OperationContext* opCt if (fetchedFromConfigServers) { _isUp.store(true); } + pauseAfterSettingIsUp.pauseWhileSet(); Time returnTime{returnTopologyTime, rsmIncrementForConnStrings, returnForceReloadIncrement}; LOGV2_DEBUG(4620251,