diff --git a/jstests/sharding/repro.js b/jstests/sharding/repro.js new file mode 100644 index 00000000000..73a05744492 --- /dev/null +++ b/jstests/sharding/repro.js @@ -0,0 +1,106 @@ +(function() { + +load('jstests/libs/write_concern_util.js'); +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallel_shell_helpers.js"); + +var st = new ShardingTest({ + config: 1, + shards: { + rs0: { + nodes: [ + {rsConfig: {priority: 1, tags: {nodeToSelectForReads: 'yes'}}}, + ], + }, + rs1: { + nodes: [ + {rsConfig: {priority: 2}}, + {rsConfig: {priority: 1}}, + {rsConfig: {priority: 1, tags: {nodeToSelectForReads: 'yes'}}}, + ], + }, + } +}); +st.enableSharding('D', st.shard0.shardName); +st.shardCollection('D.C', {K: 1}); + +const D = st.getDB('D'); + +assert.commandWorked(D.C.insert([ + {K: 0}, + {K: 100}, + {K: 200}, +])); + +// Make both shards own chunks of the collection. +st.moveRange('D.C', {K: 500}, {K: 600}, st.shard1.shardName); + +function checkSecondaryResults(host, lsid) { + jsTest.log(`Checking secondary results using lsid of ${tojson(lsid)}`); + var conn = new Mongo(host); + conn.setReadPref('secondaryPreferred', [{nodeToSelectForReads: 'yes'}]); + var result = assert.commandWorked(conn.getCollection('D.C').runReadCommand('find', {lsid})); + jsTest.log(`Result: ${tojson(result)}`); + + // Check that a single batch was returned. TODO: Checking just for sanity. The way the test is + // written wouldn't exhaust the cursor it it happened to have more batches. + assert.eq(0, result.cursor.id); + + // Check that 3 documents were returned. + assert.eq(3, result.cursor.firstBatch.length); +} + +const lsid = ({id: UUID()}); + +// Ensure the secondary node knows about the collection (otherwise getCollectionDescription will +// throw if the collection's state is UNKNOWN) +checkSecondaryResults(st.s0.host, lsid); + +const stoppedSecondary = st.rs1.getSecondaries()[1]; +stopServerReplication(stoppedSecondary); + +st.moveRange('D.C', {K: 100}, {K: 200}, st.shard1.shardName); + +// Ensure the router knows the latest collection version +assert.eq(3, D.C.find({}).toArray().length); + +// Await replication on configsvr and shard0 to make sure their nodes have replicated the effects of +// the migration. +st.configRS.awaitReplication(); +st.rs0.awaitReplication(); + +// Start a find that will target the secondary. Make it hang after establishing the storage engine +// snapshot but before checking the shard version. Note that the router has sent the post-migration +// shard version, but the storage engine snapshot corresponds to a pre-migration time. +const hangBeforeAutoGetShardVersionCheckFp = + configureFailPoint(stoppedSecondary, 'hangBeforeAutoGetShardVersionCheck', {lsid: lsid.id}); +var awaitFind = + startParallelShell(funWithArgs(checkSecondaryResults, st.s0.host, lsid), st.s0.port); + +jsTest.log('Command dispatched, waiting for failpoint on ' + stoppedSecondary); +hangBeforeAutoGetShardVersionCheckFp.wait(); +jsTest.log('Failpoint reached, resuming replication on ' + stoppedSecondary); + +// Restart replication on the secondary. +restartServerReplication(stoppedSecondary); +jsTest.log('About to await replication'); +st.rs1.awaitReplication(); +jsTest.log('Awaited replication'); + +// Do another find that targets the secondary so that it refreshes its metadata (i.e. installs the +// post-migration metadata). Otherwise, the query in the parallel shell would find that the metadata +// is unknown and retry (advancing the snapshot). +const lsid2 = ({id: UUID()}); +checkSecondaryResults(st.s0.host, lsid2); + +// Let the parallel shell find continue. Despite LFT having acquired a pre-migration storage engine +// snapshot, the shard version check will pass (router sent post-migration SV, shard has +// post-migration SV installed too). +hangBeforeAutoGetShardVersionCheckFp.off(); + +// The assertion in checkSecondaryResults will fail. The query returned just 2 documents, instead of +// the expected 3. + +awaitFind(); +st.stop(); +})(); diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp index 3e9dde0821b..d21fc6f84cf 100644 --- a/src/mongo/db/db_raii.cpp +++ b/src/mongo/db/db_raii.cpp @@ -1442,6 +1442,9 @@ AutoGetCollectionForReadCommandBase:: _autoCollForRead.getNss().dbName()), options._deadline, options._secondaryNssOrUUIDs) { + if (_autoCollForRead.getNss() == NamespaceString("D.C")) { + logd("AutoGetCollectionForReadCommandBase {} {}", opCtx->getLogicalSessionId(), nsOrUUID); + } hangBeforeAutoGetShardVersionCheck.executeIf( [&](auto&) { hangBeforeAutoGetShardVersionCheck.pauseWhileSet(opCtx); }, [&](const BSONObj& data) { diff --git a/src/mongo/shell/utils_sh.js b/src/mongo/shell/utils_sh.js index 84280578475..06ce698b840 100644 --- a/src/mongo/shell/utils_sh.js +++ b/src/mongo/shell/utils_sh.js @@ -185,6 +185,12 @@ sh.moveChunk = function(fullName, find, to) { return sh._adminCommand({moveChunk: fullName, find: find, to: to}); }; +sh.moveRange = function(fullName, min, max, to) { + sh._checkFullName(fullName); + return sh._adminCommand( + {moveRange: fullName, min: min, max: max, toShard: to, _waitForDelete: true}); +}; + sh.setBalancerState = function(isOn) { assert(typeof (isOn) == "boolean", "Must pass boolean to setBalancerState"); if (isOn) {