From bc85a804edfb36fab88ac6088ee92a9613dced45 Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Mon, 26 Feb 2024 13:15:53 +0000 Subject: [PATCH] Repro resharding and txn --- jstests/sharding/repro.js | 60 +++++++++++++++++++ .../resharding_coordinator_service.cpp | 7 +++ 2 files changed, 67 insertions(+) create mode 100644 jstests/sharding/repro.js diff --git a/jstests/sharding/repro.js b/jstests/sharding/repro.js new file mode 100644 index 00000000000..a9bdac31879 --- /dev/null +++ b/jstests/sharding/repro.js @@ -0,0 +1,60 @@ +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; + +(function() { +'use strict'; +const dbName = 'test'; +const collName = 'foo'; +const ns = dbName + '.' + collName; + +const st = new ShardingTest({mongos: 1, shards: 2}); +const db = st.s.getDB(dbName); +const coll = db[collName]; + +assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}})) +assert.commandWorked(st.splitAt(ns, {x: 0})); +assert.commandWorked(st.moveChunk(ns, {x: -1}, st.shard0.shardName)); +assert.commandWorked(st.moveChunk(ns, {x: 1}, st.shard1.shardName)); + +assert.commandWorked(coll.insertMany([{x: -1, y: 0}, {x: 1, y: 0}])); + +// Set fpto block resharding after commit on configsvr but before commit on shards. +const fp = configureFailPoint(st.configRS.getPrimary(), + "reshardingPauseBeforeTellingParticipantsToCommit"); + +// On parallel shell, start resharding +const joinResharding = startParallelShell(() => { + assert.commandWorked(db.adminCommand({reshardCollection: 'test.foo', key: {y: 1}})); +}, st.s.port); + +// Await configsvr to have done its part of the commit. +fp.wait(); + +const session = st.s.startSession(); +const sessionDB = session.getDatabase(dbName); +const sessionColl = sessionDB.getCollection(collName); +const sessionColl2 = sessionDB.getCollection('otherColl'); + +// Make sure the session knows of a clusterTime inclusive of the resharding operation up to the +// commit on the confisvr. +assert.commandWorked(sessionColl2.insert({a: 1})); + +// Start txn. +session.startTransaction({readConcern: {level: 'snapshot'}}); +assert.eq(1, sessionColl2.find().itcount()); + +// Unset fp and wait for resharding to finish. +fp.off(); +joinResharding(); + +// Make sure the router is aware of the new (post-resharding) routing table for test.foo +assert.eq(2, coll.find({y: 0}).itcount()); + +// Query / write to coll. Check results. +const docs = sessionColl.find({y: 0}).toArray(); +session.commitTransaction(); + +jsTest.log("--DEBUG-- docs: " + tojson(docs)); +assert.eq(2, docs.length); // BUG: Fails here! The query only sees one doc. + +st.stop(); +})(); diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp index 2aa1954cc52..ab05f6082f6 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp @@ -140,6 +140,7 @@ MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeInitializing); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeCloning); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeBlockingWrites); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeDecisionPersisted); +MONGO_FAIL_POINT_DEFINE(reshardingPauseBeforeTellingParticipantsToCommit); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeRemovingStateDoc); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeCompletion); MONGO_FAIL_POINT_DEFINE(reshardingPauseCoordinatorBeforeStartingErrorFlow); @@ -2760,6 +2761,12 @@ void ReshardingCoordinator::_tellAllDonorsToRefresh( void ReshardingCoordinator::_tellAllParticipantsToCommit( const NamespaceString& nss, const std::shared_ptr& executor) { + { + auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc()); + reshardingPauseBeforeTellingParticipantsToCommit.pauseWhileSetAndNotCanceled( + opCtx.get(), _ctHolder->getAbortToken()); + } + auto opts = createShardsvrCommitReshardCollectionOptions( nss, _coordinatorDoc.getReshardingUUID(), **executor, _ctHolder->getStepdownToken(), {}); opts->cmd.setDbName(DatabaseName::kAdmin); -- 2.34.1