-
Type: Bug
-
Resolution: Fixed
-
Priority: Major - P3
-
Affects Version/s: 4.1.6
-
Component/s: Replication, Sharding
-
None
-
Fully Compatible
-
ALL
-
v4.0
-
Sharding 2019-02-25, Sharding 2019-03-11
For update and delete operations, a change stream respectively relies upon the kObject2FieldName and kObjectFieldName fields of the oplog entry to provide the documentKey for that event; that is, an object containing the values of each field of the shard key, plus the _id field if it is not already present as part of the shard key.
On the 4.0 branch, this is working as expected. On current master, it appears that in at least some scenarios, the update and remove oplog entries incorrectly omit the shard key fields and provide only the _id.
// Collection 'test.shardkeymissingtest' is sharded on {shardKey: 1} mongos> db.getSiblingDB("config").collections.find({_id: "test.shardkeymissingtest"}) { "_id" : "test.shardkeymissingtest", "lastmodEpoch" : ObjectId("5c619cea471abef89beca4df"), "lastmod" : ISODate("1970-02-19T17:02:47.296Z"), "dropped" : false, "key" : { "shardKey" : 1 }, "unique" : false, "uuid" : UUID("7db17aa6-327f-49a8-b89a-ee90b47f20da") } // Insert documents on each shard mongos> db.shardkeymissingtest.insert({shardKey: 25, x:1}) WriteResult({ "nInserted" : 1 }) mongos> db.shardkeymissingtest.insert({shardKey: 55, x:1}) WriteResult({ "nInserted" : 1 }) ... // Start watching the collection mongos> let csCursor = db.shardkeymissingtest.watch() // Issue a multi-update mongos> db.shardkeymissingtest.update({}, {$set: {updated: true}}, {multi: true}) WriteResult({ "nMatched" : 3, "nUpserted" : 0, "nModified" : 3 }) // Obtain the change stream results. Note that the 'documentKey' field does NOT include the 'shardKey' field, only _id mongos> csCursor { "_id" : { "_data" : "825C61A95E000000012B022C0100296E5A10047DB17AA6327F49A8B89AEE90B47F20DA463C5F6964003C6162634431000004" }, "operationType" : "update", "clusterTime" : Timestamp(1549904222, 1), "ns" : { "db" : "test", "coll" : "shardkeymissingtest" }, "documentKey" : { "_id" : "abcD1" }, "updateDescription" : { "updatedFields" : { "updated" : true }, "removedFields" : [ ] } } { "_id" : { "_data" : "825C61A95E000000022B022C0100296E5A10047DB17AA6327F49A8B89AEE90B47F20DA46645F696400645C61A92CFC5E304063E4E4C20004" }, "operationType" : "update", "clusterTime" : Timestamp(1549904222, 2), "ns" : { "db" : "test", "coll" : "shardkeymissingtest" }, "documentKey" : { "_id" : ObjectId("5c61a92cfc5e304063e4e4c2") }, "updateDescription" : { "updatedFields" : { "updated" : true }, "removedFields" : [ ] } } mongos> db.shardkeymissingtest.getShardDistribution() // Remove all the documents. mongos> db.shardkeymissingtest.remove({}) WriteResult({ "nRemoved" : 3 }) // The documentKey is again incomplete in each of the resulting change stream events. mongos> it { "_id" : { "_data" : "825C61AC77000000012B022C0100296E5A10047DB17AA6327F49A8B89AEE90B47F20DA463C5F6964003C6162634431000004" }, "operationType" : "delete", "clusterTime" : Timestamp(1549905015, 1), "ns" : { "db" : "test", "coll" : "shardkeymissingtest" }, "documentKey" : { "_id" : "abcD1" } } { "_id" : { "_data" : "825C61AC77000000012B022C0100296E5A10047DB17AA6327F49A8B89AEE90B47F20DA46645F696400645C61A930FC5E304063E4E4C30004" }, "operationType" : "delete", "clusterTime" : Timestamp(1549905015, 1), "ns" : { "db" : "test", "coll" : "shardkeymissingtest" }, "documentKey" : { "_id" : ObjectId("5c61a930fc5e304063e4e4c3") } }
This behaviour persists even when the shards are force-refreshed via _flushRoutingTableCacheUpdates, and even in cases where internally examining the ScopedCollectionMetadata proves that the mongoD is aware of the shard key for this collection.