From 446b052a6241a5d12369355a94952f6c59f964c8 Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Mon, 8 Jul 2024 16:35:18 +0000 Subject: [PATCH] Repro SERVER-92189 --- jstests/sharding/repro-92189.js | 58 +++++++++++++++++++++++++++++ src/mongo/db/ops/write_ops_exec.cpp | 16 ++++++-- 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 jstests/sharding/repro-92189.js diff --git a/jstests/sharding/repro-92189.js b/jstests/sharding/repro-92189.js new file mode 100644 index 00000000000..25532ef5bc9 --- /dev/null +++ b/jstests/sharding/repro-92189.js @@ -0,0 +1,58 @@ +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; +import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js"; + +let st = ShardingTest({shards: 2}); + +const dbName = "test"; +const collName = "foo"; +const coll = st.s.getDB(dbName)[collName]; + +assert.commandWorked(st.s.adminCommand({shardCollection: coll.getFullName(), key: {a: "hashed"}})); +assert.commandWorked(coll.insertMany([{a: 0}, {a: 1}, {a: 2}, {a: 3}, {a: 4}])); + +let fp = + configureFailPoint(st.rs0.getPrimary(), "hangDuringBatchRemove", {nss: coll.getFullName()}); + +const awaitBulkWrite = startParallelShell(() => { + assert.commandWorked(db.adminCommand({ + bulkWrite: 1, + ops: [{delete: 0, filter: {}, multi: true}, {delete: 0, filter: {b: -10}, multi: false}], + nsInfo: [{ns: "test.foo"}, {ns: "test.foo"}], + })); +}, st.s.port); + +jsTest.log("--DEBUG-- Waiting for fp"); +fp.wait(); +jsTest.log("--DEBUG-- Hit fp"); +sleep(1 * 1000); + +let fpBlockReshardingDonorInCriticalSection = + configureFailPoint(st.rs0.getPrimary(), "reshardingPauseDonorAfterBlockingReads"); + +const awaitResharding = + startParallelShell(funWithArgs(function(shard0, shard1) { + assert.commandWorked(db.adminCommand({ + reshardCollection: "test.foo", + key: {b: 1}, + shardDistribution: [ + {shard: shard0, min: {b: MinKey}, max: {b: 0}}, + {shard: shard1, min: {b: 0}, max: {b: MaxKey}} + ] + })); + }, st.shard0.shardName, st.shard1.shardName), st.s.port); + +jsTest.log("--DEBUG-- Waiting for resharding CS fp"); +fpBlockReshardingDonorInCriticalSection.wait(); +jsTest.log("--DEBUG-- Hit resharding CS fp"); + +jsTest.log("--JORDI-- Letting multi-write continue and hit the resharding CS."); +fp.off(); +sleep(5 * 1000); + +fpBlockReshardingDonorInCriticalSection.off(); +awaitResharding(); +jsTest.log("--JORDI-- Resharding finished"); + +awaitBulkWrite(); + +st.stop(); diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 08057301d50..219df95bd57 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -982,11 +982,15 @@ long long performDelete(OperationContext* opCtx, !opCtx->isRetryableWrite()); CurOpFailpointHelpers::waitWhileFailPointEnabled( - &hangDuringBatchRemove, opCtx, "hangDuringBatchRemove", []() { + &hangDuringBatchRemove, + opCtx, + "hangDuringBatchRemove", + []() { LOGV2(7280401, "Batch remove - hangDuringBatchRemove fail point enabled. Blocking until fail " "point is disabled"); - }); + }, + nss); if (MONGO_unlikely(failAllRemoves.shouldFail())) { uasserted(ErrorCodes::InternalError, "failAllRemoves failpoint active!"); } @@ -1849,11 +1853,15 @@ static SingleWriteResult performSingleDeleteOp(OperationContext* opCtx, request.setHint(op.getHint()); CurOpFailpointHelpers::waitWhileFailPointEnabled( - &hangDuringBatchRemove, opCtx, "hangDuringBatchRemove", []() { + &hangDuringBatchRemove, + opCtx, + "hangDuringBatchRemove", + []() { LOGV2(20891, "Batch remove - hangDuringBatchRemove fail point enabled. Blocking until fail " "point is disabled"); - }); + }, + ns); if (MONGO_unlikely(failAllRemoves.shouldFail())) { uasserted(ErrorCodes::InternalError, "failAllRemoves failpoint active!"); } -- 2.34.1