diff --git a/jstests/sharding/repro.js b/jstests/sharding/repro.js index e69de29bb2d..44d810189f3 100644 --- a/jstests/sharding/repro.js +++ b/jstests/sharding/repro.js @@ -0,0 +1,67 @@ +const st = new ShardingTest({ + shards: 2, + rs: {setParameter: {logComponentVerbosity: tojson({sharding: 4})}}, + mongosOptions: {setParameter: {logComponentVerbosity: tojson({sharding: 4})}} +}); + +const configDB = st.s.getDB('config'); +const shard0 = st.shard0.shardName; +const shard1 = st.shard1.shardName; + +const dbName = 'testDB'; +const collName = 'shardedColl'; +const nss = dbName + '.' + collName; +const db = st.s.getDB(dbName); + +// Setup a sharded collection with chunks {[min, 5), [5, max)}, each hosted by a different shard. +assert.commandWorked(st.s.adminCommand({enableSharding: dbName, primaryShard: shard0})); +assert.commandWorked(st.s.adminCommand({shardCollection: nss, key: {sk: 1}})); + +assert.commandWorked(st.s.adminCommand({split: nss, middle: {sk: 5}})); +assert.commandWorked( + st.s.adminCommand({moveChunk: nss, find: {sk: 5}, to: shard1, _waitForDelete: true})); + +// Perform a sequence of 3 bulk writes to test insert/update/delete operation: +// - Each bulk will generate a batch write on each shard. +// - Each Batch write targeting shard0 is expected to fail at least once, but all bulk requests have +// to eventually succeed and persist the correct content. + +/// +jsTest.log("Testing bulk insert"); +let bulk = db[collName].initializeUnorderedBulkOp(); +for (let i = 0; i <= 5; i++) { + bulk.insert({sk: i, counter: 1, fieldWithoutIndex: i}); +} + +// No duplicate key errors, no missing docs. +assert.commandWorked(bulk.execute()); +assert.eq( + 6, db[collName].count(), `Failed bulk insert: ${tojsononeline(db[collName].find().toArray())}`); + +/// +jsTest.log("Testing bulk updates"); +bulk = db[collName].initializeUnorderedBulkOp(); +for (var i = 0; i <= 5; i++) { + bulk.find({sk: i}).update({$inc: {counter: 1}}); +} + +// No repeated or missing write ops. +assert.commandWorked(bulk.execute()); +assert.eq(6, + db[collName].count({counter: 2}), + `Failed bulk update: ${tojsononeline(db[collName].find().toArray())}`); + +/// +jsTest.log("Testing bulk deletes"); + +bulk = db[collName].initializeUnorderedBulkOp(); +for (var i = 0; i <= 5; i++) { + bulk.find({fieldWithoutIndex: i}).remove(); +} + +// Collection must be empty. +assert.commandWorked(bulk.execute()); +assert.eq( + 0, db[collName].count(), `Failed bulk delete: ${tojsononeline(db[collName].find().toArray())}`); + +st.stop(); diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 59e757b29e1..7a8fa157ef8 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -626,7 +626,7 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx, auto txnParticipant = TransactionParticipant::get(opCtx); auto inTxn = txnParticipant && opCtx->inMultiDocumentTransaction(); - bool shouldProceedWithBatchInsert = true; + bool shouldProceedWithBatchInsert = false; // FORCE one-at-a-time op insertion. try { acquireCollection(); diff --git a/src/mongo/db/s/sharding_write_router.cpp b/src/mongo/db/s/sharding_write_router.cpp index d66f6dc9158..efb6aa18ef7 100644 --- a/src/mongo/db/s/sharding_write_router.cpp +++ b/src/mongo/db/s/sharding_write_router.cpp @@ -40,6 +40,7 @@ #include "mongo/s/chunk.h" #include "mongo/s/grid.h" #include "mongo/s/resharding/type_collection_fields_gen.h" +#include "mongo/s/shard_cannot_refresh_due_to_locks_held_exception.h" #include "mongo/util/assert_util.h" namespace mongo { @@ -54,6 +55,18 @@ ShardingWriteRouter::ShardingWriteRouter(OperationContext* opCtx, const Namespac return; } + // Raise periodically ShardCannotRefreshDueToLocksHeldInfo to simplify repro conditions. + // (The exception is usually thrown in case nss is a collection under resharding and its + // catalog cache cannot be immediately accessed). + if (nss == NamespaceString::createNamespaceString_forTest("testDB", "shardedColl")) { + static int count = 0; + ++count; + if (count % 4 == 0) { + logd("Injecting ShardCannotRefreshDueToLocksHeldInfo exception! {}", count); + uasserted(Status{ShardCannotRefreshDueToLocksHeldInfo(nss), "Injected error"}); + } + } + _reshardingKeyPattern = _collDesc->getReshardingKeyIfShouldForwardOps(); if (_reshardingKeyPattern) { _ownershipFilter = diff --git a/src/mongo/s/write_ops/batch_write_exec.cpp b/src/mongo/s/write_ops/batch_write_exec.cpp index cec4a485088..26db46c7a91 100644 --- a/src/mongo/s/write_ops/batch_write_exec.cpp +++ b/src/mongo/s/write_ops/batch_write_exec.cpp @@ -93,7 +93,7 @@ void noteStaleShardResponses(OperationContext* opCtx, NSTargeter* targeter) { for (const auto& error : staleErrors) { LOGV2_DEBUG(22902, - 4, + 0, "Noting stale config response", "shardId"_attr = error.endpoint.shardName, "status"_attr = error.error.getStatus()); @@ -206,6 +206,11 @@ bool processResponseFromRemote(OperationContext* opCtx, trackedErrors.startTracking(ErrorCodes::StaleDbVersion); trackedErrors.startTracking(ErrorCodes::TenantMigrationAborted); + if (targeter.getNS() == + NamespaceString::createNamespaceString_forTest("testDB", "shardedColl")) { + logd("In batch_write_exec.cpp response: {} from {}", batchedCommandResponse, shardInfo); + } + LOGV2_DEBUG(22907, 4, "Write results received", @@ -610,6 +615,9 @@ void BatchWriteExec::executeBatch(OperationContext* opCtx, size_t nextOpIndex = 0; while (!batchOp.isFinished() && !abortBatch) { + if (nss == NamespaceString::createNamespaceString_forTest("testDB", "shardedColl")) { + logd("Running executeBatch round {} / {} / {}", rounds, numCompletedOps, nextOpIndex); + } // // Get child batches to send using the targeter // diff --git a/src/mongo/s/write_ops/batch_write_op.cpp b/src/mongo/s/write_ops/batch_write_op.cpp index 2d44ed6d8d5..93bb3086bd2 100644 --- a/src/mongo/s/write_ops/batch_write_op.cpp +++ b/src/mongo/s/write_ops/batch_write_op.cpp @@ -720,6 +720,7 @@ void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, // Handle batch and per-item errors if (response.isErrDetailsSet()) { // Per-item errors were set + logd("Per-item errors set. {}", response); itemErrors.insert( itemErrors.begin(), response.getErrDetails().begin(), response.getErrDetails().end());