From 536870ce9d135f90c2d8cd0b189b7a48dcd731f3 Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Fri, 26 Aug 2022 14:35:28 +0000 Subject: [PATCH] Repro SERVER-69181 --- jstests/sharding/repro-server-69181.js | 59 +++++++++++++++++++++ src/mongo/db/s/move_primary_coordinator.cpp | 5 +- 2 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 jstests/sharding/repro-server-69181.js diff --git a/jstests/sharding/repro-server-69181.js b/jstests/sharding/repro-server-69181.js new file mode 100644 index 00000000000..1f603faae22 --- /dev/null +++ b/jstests/sharding/repro-server-69181.js @@ -0,0 +1,59 @@ +(function() { +'use strict'; + +load('jstests/libs/fail_point_util.js'); // For configureFailPoint +load('jstests/libs/parallel_shell_helpers.js'); + +const dbName = 'test'; +const collName = 'foo'; +const ns = dbName + '.' + collName; + +const st = new ShardingTest({shards: 2}); + +const db = st.s.getDB(dbName); +const unshardedColl = db['foo']; + +// Create the database, with shard0 as db-primary. +assert.commandWorked( + st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); + +let hangBeforeCleanStaleData = configureFailPoint(st.shard0, 'hangBeforeCleanStaleData'); + +// Create an unsharded collection on the db. +assert.commandWorked(unshardedColl.insert({x: 0})); + +// Start move primary +const awaitMovePrimary = startParallelShell( + funWithArgs(function(dbName, toShardName) { + assert.commandWorked(db.adminCommand({movePrimary: dbName, to: toShardName})); + }, dbName, st.shard1.shardName), st.s.port); + +// Wait for movePrimary to commit, and block before proceeding with cleaning the unsharded +// collections on the source shard. +hangBeforeCleanStaleData.wait(); + +// Drop the database from the new primary (that is, shard1). +assert.commandWorked(db.dropDatabase()); + +// Now recreate the db on shard0 as db-primary. +assert.commandWorked( + st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); + +// Do some writes on an unsharded collection on that db. Note, this collection needs to have the +// same name as one that existed before movePrimary. +assert.commandWorked(unshardedColl.insert({x: 1})); +assert.commandWorked(unshardedColl.insert({x: 2})); +assert.commandWorked(unshardedColl.insert({x: 3})); +assert.eq(3, unshardedColl.find().itcount()); + +// Let the movePrimary coordinator proceed with cleaning "stale" data. +hangBeforeCleanStaleData.off(); + +// Wait for the cleanup procedure to finish. +awaitMovePrimary(); + +// Check whether we lost documents. +assert.eq(3, unshardedColl.find().itcount()); // This will fail! Writes were lost. + +st.stop(); +})(); diff --git a/src/mongo/db/s/move_primary_coordinator.cpp b/src/mongo/db/s/move_primary_coordinator.cpp index a19a5ebb286..e552628fae6 100644 --- a/src/mongo/db/s/move_primary_coordinator.cpp +++ b/src/mongo/db/s/move_primary_coordinator.cpp @@ -40,12 +40,14 @@ #include "mongo/logv2/log.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" +#include "mongo/util/fail_point.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding - namespace mongo { +MONGO_FAIL_POINT_DEFINE(hangBeforeCleanStaleData); + void MovePrimaryCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const { stdx::lock_guard lk{_docMutex}; cmdInfoBuilder->append("request", BSON(_doc.kToShardIdFieldName << _doc.getToShardId())); @@ -109,6 +111,7 @@ ExecutorFuture MovePrimaryCoordinator::_runImpl( uassertStatusOK(movePrimarySourceManager.clone(opCtx)); uassertStatusOK(movePrimarySourceManager.enterCriticalSection(opCtx)); uassertStatusOK(movePrimarySourceManager.commitOnConfig(opCtx)); + hangBeforeCleanStaleData.pauseWhileSet(); uassertStatusOK(movePrimarySourceManager.cleanStaleData(opCtx)); }) .onError([this, anchor = shared_from_this()](const Status& status) { -- 2.17.1