diff --git a/jstests/sharding/inconsistent_indexes_fail_migrations.js b/jstests/sharding/inconsistent_indexes_fail_migrations.js new file mode 100644 index 00000000000..fa2280f3243 --- /dev/null +++ b/jstests/sharding/inconsistent_indexes_fail_migrations.js @@ -0,0 +1,74 @@ +/** + * Generates an scenario where a migration can cause inconsistent indexes when running with + * concurrent drop indexes. + */ + +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; + +const st = new ShardingTest({shards: 2, config: 1}); + +assert.commandWorked( + st.s.adminCommand({enableSharding: 'test9_fsmdb0', primaryShard: st.shard1.shardName})); +assert.commandWorked(st.s.getDB('test9_fsmdb0').runCommand({ + createIndexes: 'fsmcoll0', + indexes: [{key: {value: 1}, name: 'value_1'}] +})); +assert.commandWorked(st.s.adminCommand( + {shardCollection: 'test9_fsmdb0.fsmcoll0', key: {_id: 'hashed'}, numInitialChunks: 2})); + +// Add data that goes to each shard. +st.s.getCollection('test9_fsmdb0.fsmcoll0').insert({_id: "1", value: 0}); // will go to shard1 + +// Drop value_1 index, BUT, wait before executing and aborting the migration +const dropIndexRS0Fp = configureFailPoint(st.rs0.getPrimary(), 'hangDropIndexes'); +const dropIndexRS1Fp = configureFailPoint(st.rs1.getPrimary(), 'hangDropIndexes'); +const dropIndexThread = new Thread((mongosConnString) => { + let mongos = new Mongo(mongosConnString); + assert.commandWorked( + mongos.getDB('test9_fsmdb0').runCommand({dropIndexes: 'fsmcoll0', index: 'value_1'})); +}, st.s.host); +dropIndexThread.start(); +dropIndexRS1Fp.wait(); +dropIndexRS0Fp.wait(); + +// hang migrations before cloning indexes +const migrationDestinationFp1 = configureFailPoint(st.rs0.getPrimary(), 'migrateThreadHangAtStep1'); +const migrationThread = new Thread((mongosConnString, key, shardName) => { + let mongos = new Mongo(mongosConnString); + assert.commandFailed( + mongos.adminCommand({moveChunk: 'test9_fsmdb0.fsmcoll0', find: {_id: key}, to: shardName})); +}, st.s.host, "1", st.shard0.shardName); +migrationThread.start(); +migrationDestinationFp1.wait(); + +// Allow execution of drop index in the destination +dropIndexRS0Fp.off(); + +// wait until drop indexes is finished +sleep(5000); + +// Allow the creation of indexes before droping the index in the source shard +const migrationDestinationFp2 = configureFailPoint(st.rs0.getPrimary(), 'migrateThreadHangAtStep2'); +migrationDestinationFp1.off(); +migrationDestinationFp2.wait(); + +// Free the drop indexes, droping it in the source shard and aborting the migration. +dropIndexRS1Fp.off(); +// wait until drop indexes is finished +sleep(5000) +migrationDestinationFp2.off(); + +// Allow the threads to finish. +dropIndexThread.join(); +migrationThread.join(); + +// End result, shards with inconsistent indexes, which now prevent migrations from 'shard0' to +// 'shard1', which will make this test to fail. In practice this means that the drain of shard0 will +// never succeed, making removeShard to never work unless there is some manual intervention. If +// shard0 were a config shard, then the transition to dedicated config sever will never succeed +// because the shard will never drain, considering that transitionToDedicatedConfigServer using the +// removeShard machinery. +assert.commandWorked(st.s.adminCommand( + {moveChunk: 'test9_fsmdb0.fsmcoll0', find: {_id: "2"}, to: st.shard1.shardName})); + +st.stop(); diff --git a/src/mongo/db/commands/drop_indexes_cmd.cpp b/src/mongo/db/commands/drop_indexes_cmd.cpp index 1986d576f24..7a77a4b5bd4 100644 --- a/src/mongo/db/commands/drop_indexes_cmd.cpp +++ b/src/mongo/db/commands/drop_indexes_cmd.cpp @@ -86,6 +86,7 @@ namespace mongo { namespace { MONGO_FAIL_POINT_DEFINE(reIndexCrashAfterDrop); +MONGO_FAIL_POINT_DEFINE(hangDropIndexes); class CmdDropIndexes : public DropIndexesCmdVersion1Gen { public: @@ -125,6 +126,7 @@ public: ActionType::dropIndex)); } Reply typedRun(OperationContext* opCtx) final { + hangDropIndexes.pauseWhileSet(); // If the request namespace refers to a time-series collection, transform the user // time-series index request to one on the underlying bucket. auto isCommandOnTimeseriesBucketNamespace =