From 9a07001cbee5fac70000610fc1da930dfb2e9071 Mon Sep 17 00:00:00 2001
From: Jordi Serra Torrens <jordi.serra-torrens@mongodb.com>
Date: Fri, 8 Apr 2022 11:46:49 +0000
Subject: [PATCH] Repro BF-24832

---
 jstests/sharding/bf-24832-repro.js            | 62 +++++++++++++++++++
 src/mongo/db/s/migration_source_manager.cpp   |  4 ++
 .../db/s/shardsvr_move_range_command.cpp      |  5 ++
 3 files changed, 71 insertions(+)
 create mode 100644 jstests/sharding/bf-24832-repro.js

diff --git a/jstests/sharding/bf-24832-repro.js b/jstests/sharding/bf-24832-repro.js
new file mode 100644
index 00000000000..8565e141870
--- /dev/null
+++ b/jstests/sharding/bf-24832-repro.js
@@ -0,0 +1,62 @@
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallel_shell_helpers.js');
+
+const st = new ShardingTest({shards: 2, rs: {nodes: 2}});
+
+const dbName = "test";
+const collName = "foo";
+const ns = dbName + "." + collName;
+
+assert.commandWorked(
+    st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
+
+let db = st.s.getDB(dbName);
+let coll = db[collName];
+
+assert.commandWorked(db.adminCommand({shardCollection: ns, key: {x: 1}}));
+
+// Send a moveChunk command. Make it hang before the MSM has even been instantiated.
+const firstPrimary = st.rs0.getPrimary();
+let failpoint = configureFailPoint(firstPrimary, "moveChunkCmdHangBeforeMarkOpCtxAsInterruptible");
+let awaitMigrationShell =
+    startParallelShell(funWithArgs((ns, toShard) => {
+                           db.adminCommand({moveChunk: ns, find: {x: 0}, to: toShard});
+                       }, ns, st.shard1.shardName), st.s.port);
+failpoint.wait();
+
+// Step up a new donor primary
+const rs0Secondary = st.rs0.getSecondary();
+assert.commandWorked(st.rs0.getPrimary().adminCommand({replSetStepDown: 60, force: true}));
+st.rs0.awaitNodesAgreeOnPrimary({expectedPrimaryNode: rs0Secondary});
+
+// Let the first moveChunk proceed until it has checked that there is no migration pending recovery.
+let failpointHangAfterDrainMigrationsPendingRecoveryOnFirstPrimary =
+    configureFailPoint(firstPrimary, "hangAfterDrainMigrationsPendingRecovery");
+failpoint.off();
+failpointHangAfterDrainMigrationsPendingRecoveryOnFirstPrimary.wait();
+
+// Send a moveChunk to the new primary. Let it run until the MSM persists the recovery document.
+let failpoint2 = configureFailPoint(rs0Secondary, "moveChunkHangAtStep3")
+let awaitSecondMigrationShell = startParallelShell(
+    funWithArgs((ns, toShard) => {
+        assert.commandWorked(db.adminCommand({moveChunk: ns, find: {x: 0}, to: toShard}));
+    }, ns, st.shard1.shardName), st.s.port);
+failpoint2.wait();
+
+// Await for the former primary to have replicated the recovery document written by the new primary.
+st.rs0.awaitReplication();
+
+// Let the first move chunk continue. That node (old primary) has replicated the insert of the
+// migration started by the new primary, so it will hit an invariant that expects that there are no
+// documents on config.migrationCoordiantors!
+failpointHangAfterDrainMigrationsPendingRecoveryOnFirstPrimary.off();
+sleep(120 * 1000);
+
+awaitMigrationShell();
+awaitSecondMigrationShell();
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 147e29284ef..c9281eb5a2b 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -119,6 +119,8 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeLeavingCriticalSection);
 MONGO_FAIL_POINT_DEFINE(migrationCommitNetworkError);
 MONGO_FAIL_POINT_DEFINE(hangBeforePostMigrationCommitRefresh);
 
+MONGO_FAIL_POINT_DEFINE(hangAfterDrainMigrationsPendingRecovery);
+
 }  // namespace
 
 MigrationSourceManager* MigrationSourceManager::get(CollectionShardingRuntime* csr,
@@ -179,6 +181,8 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx,
     {
         migrationutil::drainMigrationsPendingRecovery(opCtx);
 
+        hangAfterDrainMigrationsPendingRecovery.pauseWhileSet();
+
         // Since the moveChunk command is holding the ActiveMigrationRegistry and we just drained
         // all migrations pending recovery, now there cannot be any document in
         // config.migrationCoordinators.
diff --git a/src/mongo/db/s/shardsvr_move_range_command.cpp b/src/mongo/db/s/shardsvr_move_range_command.cpp
index ad92264d0cd..f2c3dded83d 100644
--- a/src/mongo/db/s/shardsvr_move_range_command.cpp
+++ b/src/mongo/db/s/shardsvr_move_range_command.cpp
@@ -42,10 +42,13 @@
 #include "mongo/s/grid.h"
 #include "mongo/s/request_types/move_chunk_request.h"
 #include "mongo/s/request_types/move_range_request_gen.h"
+#include "mongo/util/fail_point.h"
 
 namespace mongo {
 namespace {
 
+MONGO_FAIL_POINT_DEFINE(moveChunkCmdHangBeforeMarkOpCtxAsInterruptible);
+
 const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
                                                 // Note: Even though we're setting UNSET here,
                                                 // kMajority implies JOURNAL if journaling is
@@ -140,6 +143,8 @@ public:
                             auto uniqueOpCtx = Client::getCurrent()->makeOperationContext();
                             auto opCtx = uniqueOpCtx.get();
 
+                            moveChunkCmdHangBeforeMarkOpCtxAsInterruptible.pauseWhileSet();
+
                             {
                                 // Ensure that opCtx will get interrupted in the event of a
                                 // stepdown. This is to ensure that the MigrationSourceManager
-- 
2.17.1