|
I applied the debugging.patch on top of commit 9810e6d6986a3dc5bc1b5c57a524155bdba45452 and ran the union_with_read_preference.js test with the "network.asio" log-level set to 4 in order to trace both the requests and the responses. Comparing the two between a passing and failing run made me realize that specifying --readMode=legacy to the mongo shell disables logical sessions. Note that the server technically supports causal consistency (being that it is a read concern option) outside of logical sessions, but normal drivers won't ever expose it to our users this way. The rule for the default read concern level when a command is run on a secondary is based on around whether the operation is in a logical session (though I'll admit I couldn't find in the C++ where that actually happens).
The only difference in the logs seems to be when we insert the documents into that collection – in the passing version this triggers a refresh of the collection shard version on the shards, whereas the insert doesnt trigger that in the failing version
The find command on the union_with_read_preference_db.second_union_target collection is triggering the collection metadata refresh.
[js_test:union_with_read_preference] 2020-04-13T20:34:36.351-0400 s20028| {"t":{"$date":"2020-04-13T20:34:36.350-04:00"},"s":"D4","c":"ASIO", "id":22596, "ctx":"conn10","msg":"startCommand","attr":{"request":"RemoteCommand 114 -- target:[hanamizu:20022] db:union_with_read_preference_db cmd:{ find: \"second_union_target\", readConcern: { afterClusterTime: Timestamp(1586824476, 48) }, runtimeConstants: { localNow: new Date(1586824476350), clusterTime: Timestamp(1586824476, 48) }, shardVersion: [ Timestamp(3, 0), ObjectId('5e95051b676fb0420f047bd3') ], lsid: { id: UUID(\"8e26cb19-e907-44d7-8ede-dc919acfdfa1\"), uid: BinData(0, E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855) } }"}}
|
...
|
[js_test:union_with_read_preference] 2020-04-13T20:34:36.374-0400 s20028| {"t":{"$date":"2020-04-13T20:34:36.374-04:00"},"s":"D2","c":"ASIO", "id":22597, "ctx":"conn10","msg":"Request finished with response","attr":{"requestId":114,"response":"{ operationTime: Timestamp(1586824476, 49), ok: 0.0, errmsg: \"epoch mismatch detected for union_with_read_preference_db.second_union_target\", code: 13388, codeName: \"StaleConfig\", ns: \"union_with_read_preference_db.second_union_target\", vReceived: Timestamp(3, 0), vReceivedEpoch: ObjectId('5e95051b676fb0420f047bd3'), vWanted: Timestamp(0, 0), vWantedEpoch: ObjectId('000000000000000000000000'), shardId: \"union_with_read_pref-rs0\", $gleStats: { lastOpTime: Timestamp(0, 0), electionId: ObjectId('000000000000000000000000') }, lastCommittedOpTime: Timestamp(1586824476, 47), $configServerState: { opTime: { ts: Timestamp(1586824476, 40), t: 1 } }, $clusterTime: { clusterTime: Timestamp(1586824476, 49), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } } }"}}
|
[js_test:union_with_read_preference] 2020-04-13T20:34:36.374-0400 s20028| {"t":{"$date":"2020-04-13T20:34:36.374-04:00"},"s":"D1","c":"QUERY", "id":22840, "ctx":"conn10","msg":"Received error status for query {query_Short} on attempt {retries} of {kMaxRetries}: {ex}","attr":{"query_Short":"ns: union_with_read_preference_db.second_union_target query: {} sort: {} projection: {}","retries":1,"kMaxRetries":10,"ex":"StaleConfig{ ns: \"union_with_read_preference_db.second_union_target\", vReceived: Timestamp(3, 1), vReceivedEpoch: ObjectId('5e95051b676fb0420f047bd3'), vWanted: Timestamp(0, 0), vWantedEpoch: ObjectId('000000000000000000000000'), shardId: \"union_with_read_pref-rs1\" }: epoch mismatch detected for union_with_read_preference_db.second_union_target"}}
|
[js_test:union_with_read_preference] 2020-04-13T20:34:36.374-0400 s20028| {"t":{"$date":"2020-04-13T20:34:36.374-04:00"},"s":"D1","c":"SH_REFR", "id":24106, "ctx":"conn10","msg":"Refreshing cached collection {namespace} with version {currentCollectionVersion}","attr":{"namespace":"union_with_read_preference_db.second_union_target","currentCollectionVersion":{"0":{"$timestamp":{"t":3,"i":1}},"1":{"$oid":"5e95051b676fb0420f047bd3"}}}}
|
...
|
[js_test:union_with_read_preference] 2020-04-13T20:34:36.374-0400 d20022| {"t":{"$date":"2020-04-13T20:34:36.374-04:00"},"s":"I", "c":"SH_REFR", "id":24104, "ctx":"ShardServerCatalogCacheLoader-0","msg":"Refreshed cached collection","attr":{"namespace":"union_with_read_preference_db.second_union_target","newVersion":{"0":{"$timestamp":{"t":3,"i":1}},"1":{"$oid":"5e95051b676fb0420f047bd3"}},"oldVersion":"","durationMillis":6}}
|
[js_test:union_with_read_preference] 2020-04-13T20:34:36.378-0400 s20028| {"t":{"$date":"2020-04-13T20:34:36.377-04:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn10","msg":"Slow query","attr":{"type":"command","ns":"union_with_read_preference_db.second_union_target","appName":"MongoDB Shell","command":{"find":"second_union_target","filter":{},"lsid":{"id":{"$uuid":"8e26cb19-e907-44d7-8ede-dc919acfdfa1"}},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1586824476,"i":48}},"signature":{"hash":{"$binary":{"base64":"AAAAAAAAAAAAAAAAAAAAAAAAAAA=","subType":"0"}},"keyId":0}},"readConcern":{"afterClusterTime":{"$timestamp":{"t":1586824476,"i":48}}},"$readPreference":{"mode":"secondary"},"$db":"union_with_read_preference_db"},"nShards":2,"cursorExhausted":true,"numYields":0,"nreturned":2,"reslen":333,"readConcern":{"afterClusterTime":{"$timestamp":{"t":1586824476,"i":48}},"provenance":"clientSupplied"},"protocol":"op_msg","durationMillis":26}}
|
If the goal of that query is to cause the secondaries to refresh, then explicitly specifying a "local" read concern by using the find command would do the trick.
diff --git a/jstests/sharding/union_with_read_preference.js b/jstests/sharding/union_with_read_preference.js
|
index d13266e133..b0d78302a3 100644
|
--- a/jstests/sharding/union_with_read_preference.js
|
+++ b/jstests/sharding/union_with_read_preference.js
|
@@ -108,7 +108,11 @@ const secondTargetColl = mongosDB.second_union_target;
|
st.shardColl(secondTargetColl, {_id: 1}, {_id: 0}, {_id: -1});
|
assert.commandWorked(
|
secondTargetColl.insert([{_id: -1, coll: 2}, {_id: 1, coll: 2}], {writeConcern: {w: "majority"}}));
|
-assert.eq(secondTargetColl.find().readPref("secondary").itcount(), 2);
|
+assert.eq(new DBCommandCursor(mongosDB, assert.commandWorked(mongosDB.runCommand({
|
+ query: {find: secondTargetColl.getName(), readConcern: {level: "local"}},
|
+ $readPreference: {mode: "secondary"}
|
+ }))).itcount(),
|
+ 2);
|
unionWithComment = 'complex union against secondary';
|
let runAgg = () => mongosColl
|
.aggregate(
|
|
|
I've been trying to write a test for this for a while now and I've been hitting some really strange behavior. Here's a list of the weird things I've seen.
- In the linked test union_with_read_preference.js
, it looks like I can consistently reproduce a case where the innermost $unionWith (against 'secondTargetColl' AKA 'second_union_target') does not find the document {_id: -1}. Looking at the logs, it seems the shard versioning protocol is breaking down. Here we can see that the innermost pipeline was sent with an "UNSHARDED" shard version (which is incorrect), but completed successfully and returned 1 document (the one with {_id: 1})
[js_test:union_with_read_preference] 2020-03-05T10:24:06.208-0500 d30024| {"t":{"$date":"2020-03-05T10:24:06.208-0500"},"s":"I", "c":"-", "id":51803,"ctx":"conn37","msg":"slow query","attr":{"type":"command","ns":"union_with_read_preference.second_union_target","appName":"MongoDB Shell","command":{"aggregate":"second_union_target","pipeline":[],"fromMongos":false,"collation":{"locale":"simple"},"cursor":{"batchSize":101},"runtimeConstants":{"localNow":{"$date":"2020-03-05T10:24:06.098-0500"},"clusterTime":{"$timestamp":{"t":1583421846,"i":30}}},"readConcern":{"level":"majority","afterClusterTime":{"$timestamp":{"t":1583421846,"i":30}},"provenance":"clientSupplied"},"writeConcern":{"w":1,"wtimeout":0,"provenance":"implicitDefault"},"shardVersion":[{"$timestamp":{"t":0,"i":0}},{"$oid":"000000000000000000000000"}],"databaseVersion":{"uuid":{"$uuid":"5ab2b608-53a2-49ca-8bb8-483931638606"},"lastMod":1},"comment":"complex union against secondary","lsid":{"id":{"$uuid":"c5bc416c-764c-40fd-99b8-96387f35c9b8"},"uid":{"$binary":{"base64":"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=","subType":"0"}}},"$readPreference":{"mode":"secondary"},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1583421846,"i":31}},"signature":{"hash":{"$binary":{"base64":"r9fkUy4pkQnK8w98NcWj4Sw9LOI=","subType":"0"}},"keyId":6800745014277177366}},"$client":{"application":{"name":"MongoDB Shell"},"driver":{"name":"MongoDB Internal Client","version":"4.3.4"},"os":{"type":"Linux","name":"Ubuntu","architecture":"x86_64","version":"18.04"},"mongos":{"host":"franklinia:30028","client":"127.0.0.1:32946","version":"4.3.4"}},"$configServerState":{"opTime":{"ts":{"$timestamp":{"t":1583421846,"i":22}},"t":1}},"$db":"union_with_read_preference"},"planSummary":"COLLSCAN","keysExamined":0,"docsExamined":1,"cursorExhausted":true,"numYields":0,"nreturned":1,"reslen":431,"locks":{"ReplicationStateTransition":{"acquireCount":{"w":1}},"Global":{"acquireCount":{"r":1}},"Database":{"acquireCount":{"r":1}},"Collection":{"acquireCount":{"r":1}},"Mutex":{"acquireCount":{"r":2}}},"readConcern":{"level":"majority","afterClusterTime":{"$timestamp":{"t":1583421846,"i":30}},"provenance":"clientSupplied"},"writeConcern":{"w":1,"wtimeout":0,"provenance":"implicitDefault"},"storage":{},"protocol":"op_msg","durationMillis":0}}
|
Adding some logging in 'CollectionShardingRuntime::_getMetadataWithVersionCheckAt', I can see that the recipient node does not have any knowledge of this collection:
[js_test:union_with_read_preference] 2020-03-05T10:24:06.208-0500 d30024| {"t":{"$date":"2020-03-05T10:24:06.207-0500"},"s":"I", "c":"SHARDING","id":4661200,"ctx":"conn37","msg":"Received shard version {version}","attr":{"version":{"0":{"$timestamp":{"t":0,"i":0}},"1":{"$oid":"000000000000000000000000"}}}}
|
[js_test:union_with_read_preference] 2020-03-05T10:24:06.208-0500 d30024| {"t":{"$date":"2020-03-05T10:24:06.207-0500"},"s":"I", "c":"SHARDING","id":4661201,"ctx":"conn37","msg":"Do I have a wanted shard version? {yes_or_no}","attr":{"yes_or_no":false}}
|
This means the two shards actually agree that the collection is unsharded (even though it is not). This would all make sense if the secondary just has not yet replicated the information about the sharding catalog, but it received the writes which came after the shardCollection command - and the command also had a readConcern with an afterClusterTime due to the causally consistent session, so I don't think that should be happening here.
- When I try to reproduce this issue with just a stale mongos (in read_preference_stale_mongos.js
), everything seems to work fine - I cannot observe a similar problem. In those logs I can see a replica set monitor may be helping somehow? In this log snapshot, s30029 is the second mongos which did not shard the collection, and d30023 is a secondary node. We can see here the secondary learns/knows the collection is sharded this time.
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.996-0500 [jsTest] ----
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.996-0500 [jsTest] CHARLIE shardCollection done
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.996-0500 [jsTest] ----
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.996-0500
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.997-0500 s30029| {"t":{"$date":"2020-03-04T17:46:28.997-0500"},"s":"D1","c":"NETWORK", "id":4333211,"ctx":"conn10","msg":"RSM {setName} getHosts: {readPref} -> {result}","attr":{"readPref":"{ mode: \"primary\" }","setName":"read_pref_stale_mongos-rs1","result":"franklinia:30023; "}}
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.998-0500 d30023| {"t":{"$date":"2020-03-04T17:46:28.997-0500"},"s":"I", "c":"NETWORK", "id":22943,"ctx":"listener","msg":"connection accepted from {session_remote} #{session_id} ({connectionCount}{word} now open)","attr":{"session_remote":"127.0.0.1:45860","session_id":48,"connectionCount":20,"word":" connections"}}
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.998-0500 d30023| {"t":{"$date":"2020-03-04T17:46:28.998-0500"},"s":"I", "c":"NETWORK", "id":51800,"ctx":"conn48","msg":"received client metadata from {remote} {client}: {doc}","attr":{"remote":"127.0.0.1:45860","client":"conn48","doc":{"driver":{"name":"NetworkInterfaceTL","version":"4.3.4"},"os":{"type":"Linux","name":"Ubuntu","architecture":"x86_64","version":"18.04"}}}}
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.998-0500 d30023| {"t":{"$date":"2020-03-04T17:46:28.998-0500"},"s":"I", "c":"-", "id":51803,"ctx":"conn48","msg":"slow query","attr":{"type":"command","ns":"admin.$cmd","command":{"isMaster":1,"client":{"driver":{"name":"NetworkInterfaceTL","version":"4.3.4"},"os":{"type":"Linux","name":"Ubuntu","architecture":"x86_64","version":"18.04"}},"hostInfo":"franklinia:30029","compression":["snappy","zstd","zlib"],"internalClient":{"minWireVersion":9,"maxWireVersion":9},"hangUpOnStepDown":false,"saslSupportedMechs":"local.__system","$db":"admin"},"numYields":0,"reslen":1028,"locks":{},"protocol":"op_query","durationMillis":0}}
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.998-0500 d30023| {"t":{"$date":"2020-03-04T17:46:28.998-0500"},"s":"I", "c":"SHARDING","id":4661200,"ctx":"conn48","msg":"Received shard version {version}","attr":{"version":{"0":{"$timestamp":{"t":0,"i":0}},"1":{"$oid":"000000000000000000000000"}}}}
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.998-0500 d30023| {"t":{"$date":"2020-03-04T17:46:28.998-0500"},"s":"I", "c":"SHARDING","id":4661201,"ctx":"conn48","msg":"Do I have a wanted shard version? {yes_or_no}","attr":{"yes_or_no":true}}
|
[js_test:read_preference_stale_mongos] 2020-03-04T17:46:28.998-0500 d30023| {"t":{"$date":"2020-03-04T17:46:28.998-0500"},"s":"I", "c":"SHARDING","id":4661202,"ctx":"conn48","msg":"Wanted shard version? {wanted}","attr":{"wanted":{"0":{"$timestamp":{"t":3,"i":1}},"1":{"$oid":"5e602fc47e032f943bc14cec"}}}}
|
|