From 800d15a94d5b3f90b5e467f80378408e76d9ee62 Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Mon, 16 May 2022 13:20:03 +0000 Subject: [PATCH] Repro BF-25230 --- jstests/sharding/repro-bf-25230.js | 62 +++++++++++++++++++ .../db/s/migration_destination_manager.cpp | 3 + 2 files changed, 65 insertions(+) create mode 100644 jstests/sharding/repro-bf-25230.js diff --git a/jstests/sharding/repro-bf-25230.js b/jstests/sharding/repro-bf-25230.js new file mode 100644 index 00000000000..caacfee63dc --- /dev/null +++ b/jstests/sharding/repro-bf-25230.js @@ -0,0 +1,62 @@ +(function() { +"use strict"; + +load('jstests/libs/parallel_shell_helpers.js'); +load("jstests/libs/fail_point_util.js"); + +var st = new ShardingTest({shards: {rs0: {nodes: 1}, rs1: {nodes: 2}}}); + +const dbName = "test"; +const collName = "foo"; +const ns = dbName + "." + collName; + +const db = st.s.getDB(dbName); + +assert.commandWorked( + st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); +assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}})); +assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 0}})); + +// Start a migration. Make it hang on the recipient side after it has released the recipient +// critical section but before it has deleted the recovery document. +let fp = configureFailPoint(st.rs1.getPrimary(), 'migrationRecipientHangBeforeDeleteRecoveryDoc'); + +const awaitMigration = startParallelShell( + funWithArgs(function(ns, toShard) { + assert.commandWorked(db.adminCommand({moveChunk: ns, find: {x: 0}, to: toShard})); + }, ns, st.shard1.shardName), st.s.port); + +fp.wait(); + +// Set up a prepared txn +const hangBeforeWritingDecisionFp = configureFailPoint(st.rs1.getPrimary(), 'hangBeforeWritingDecision'); +const awaitTxn = startParallelShell( + funWithArgs(function(dbName, collName) { + var session = db.getMongo().startSession(); + var sessionDb = session.getDatabase(dbName) + session.startTransaction() + assert.commandWorked(sessionDb[collName].insert({x: 1})); + assert.commandWorked(sessionDb[collName].insert({x: -1})); + session.commitTransaction(); + }, dbName, collName), st.s.port); + +hangBeforeWritingDecisionFp.wait(); +jsTest.log("--XXXX-- Prepared txn"); + +// Stepdown the recipient +assert.commandWorked(st.rs1.getPrimary().adminCommand({replSetStepDown: 60, force: true})); +sleep(1*1000); +fp.off(); +hangBeforeWritingDecisionFp.off(); + +// Check that a new recipient primary managed to stepup +jsTest.log("--XXXX-- Waiting for new recipient primary to stepup"); +st.rs1.getPrimary(); +jsTest.log("--XXXX-- There's a new recipient primary node"); + +awaitTxn(); +jsTest.log("--XXXX-- Committed txn"); + +awaitMigration(); +st.stop(); +})(); diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 5d6976a3676..f8063e03ddc 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -303,6 +303,7 @@ MONGO_FAIL_POINT_DEFINE(failMigrationOnRecipient); MONGO_FAIL_POINT_DEFINE(failMigrationReceivedOutOfRangeOperation); MONGO_FAIL_POINT_DEFINE(migrationRecipientFailPostCommitRefresh); +MONGO_FAIL_POINT_DEFINE(migrationRecipientHangBeforeDeleteRecoveryDoc); } // namespace MigrationDestinationManager::MigrationDestinationManager() = default; @@ -1851,6 +1852,8 @@ void MigrationDestinationManager::awaitCriticalSectionReleaseSignalAndCompleteMi // persistence to be majority committed. CatalogCacheLoader::get(opCtx).waitForCollectionFlush(opCtx, _nss); + migrationRecipientHangBeforeDeleteRecoveryDoc.pauseWhileSet(opCtx); + // Delete the recovery document migrationutil::deleteMigrationRecipientRecoveryDocument(opCtx, *_migrationId); } -- 2.17.1