[SERVER-54127] Retryable update by _id may execute more than once if intervening write changes document's owning shard Created: 28/Jan/21  Updated: 12/Dec/23

Status: Backlog
Project: Core Server
Component/s: Sharding
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Max Hirschhorn Assignee: Backlog - Cluster Scalability
Resolution: Unresolved Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Related
is related to SERVER-54019 Session migration from moveChunk can ... Closed
Assigned Teams:
Cluster Scalability
Operating System: ALL
Steps To Reproduce:

python buildscripts/resmoke.py run --suite=sharding repro_retryable_update_by_id_multiple_execution.js

repro_retryable_update_by_id_multiple_execution.js

(function() {
"use strict";
 
load("jstests/sharding/libs/create_sharded_collection_util.js");
 
const st = new ShardingTest({mongos: 1, config: 1, shards: 2, rs: {nodes: 1}});
 
const db = st.s.getDB("test");
const collection = db.getCollection("mycoll");
CreateShardedCollectionUtil.shardCollectionWithChunks(collection, {x: 1}, [
    {min: {x: MinKey}, max: {x: 0}, shard: st.shard0.shardName},
    {min: {x: 0}, max: {x: 10}, shard: st.shard0.shardName},
    {min: {x: 10}, max: {x: 20}, shard: st.shard1.shardName},
    {min: {x: 20}, max: {x: MaxKey}, shard: st.shard1.shardName},
]);
 
assert.commandWorked(collection.insert({_id: 0, x: 5, counter: 0}));
 
const session1 = st.s.startSession({causalConsistency: false, retryWrites: false});
const sessionCollection1 = session1.getDatabase(db.getName()).getCollection(collection.getName());
 
const session2 = st.s.startSession({causalConsistency: false, retryWrites: false});
const sessionCollection2 = session2.getDatabase(db.getName()).getCollection(collection.getName());
 
// Updates by _id are broadcasted to all shards which own chunks for the collection. Session
// information from the retryable write which touched the document isn't migrated when the
// document's shard key value is updated. This allows the new owning shard to execute the statement
// a second time.
const updateCmd = {
    updates: [
        {q: {_id: 0}, u: {$inc: {counter: 1}}},
        {q: {_id: 10000}, u: {$inc: {counter: 1}}},
    ],
    txnNumber: NumberLong(0),
};
 
const firstRes = sessionCollection1.runCommand("update", updateCmd);
assert.eq({n: firstRes.n, nModified: firstRes.nModified}, {n: 1, nModified: 1});
 
session2.startTransaction();
assert.commandWorked(sessionCollection2.update({x: 5}, {$set: {x: 25}}));
assert.commandWorked(session2.commitTransaction_forTesting());
 
const secondRes = sessionCollection1.runCommand("update", updateCmd);
print(`secondRes: ${tojsononeline(secondRes)}`);
assert.eq(collection.findOne({_id: 0}), {_id: 0, x: 25, counter: 1});
 
st.stop();
})();

Participants:

 Description   

Updates by _id are broadcasted to all shards which own chunks for the collection when the collection is not sharded by _id. Session information from any retryable writes which touched the document isn't migrated when the document's shard key value is updated. This allows the new owning shard to execute those statements a second time.

[js_test:repro_retryable_update_by_id_multiple_execution] 2021-01-28T23:10:11.492+0000 secondRes: {  "nModified" : 2,  "n" : 2,  "ok" : 1,  "$clusterTime" : {  "clusterTime" : Timestamp(1611875411, 85),  "signature" : {  "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),  "keyId" : NumberLong(0) } },  "operationTime" : Timestamp(1611875411, 85) }
[js_test:repro_retryable_update_by_id_multiple_execution] 2021-01-28T23:10:11.492+0000 s20023| {"t":{"$date":"2021-01-28T23:10:11.440+00:00"},"s":"I",  "c":"COMMAND",  "id":51803,   "ctx":"conn6","msg":"Slow query","attr":{"type":"command","ns":"test.mycoll","appName":"MongoDB Shell","command":{"find":"mycoll","filter":{"_id":0.0},"limit":1.0,"singleBatch":true,"lsid":{"id":{"$uuid":"b5e17cb0-ce96-436b-9523-2875959096a1"}},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1611875411,"i":85}},"signature":{"hash":{"$binary":{"base64":"AAAAAAAAAAAAAAAAAAAAAAAAAAA=","subType":"0"}},"keyId":0}},"$db":"test"},"nShards":2,"cursorExhausted":true,"numYields":0,"nreturned":1,"reslen":274,"remote":"127.0.0.1:45432","protocol":"op_msg","durationMillis":0}}
[js_test:repro_retryable_update_by_id_multiple_execution] 2021-01-28T23:10:11.492+0000 uncaught exception: Error: [{ "_id" : 0, "x" : 25, "counter" : 2 }] != [{ "_id" : 0, "x" : 25, "counter" : 1 }] are not equal :


Generated at Thu Feb 08 05:32:44 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.