-
Type: Bug
-
Resolution: Works as Designed
-
Priority: Major - P3
-
None
-
Affects Version/s: None
-
Component/s: Sharding
-
Labels:None
-
Sharding EMEA 2021-12-13, Sharding EMEA 2021-12-27, Sharding EMEA 2022-01-10
The goal of this ticket is to investigate why secondary nodes are filtering oplog entries believing that they are over orphans. This shouldn't happen and probably means that the filtering information on the shard is not up to date.
We managed to reproduce it with this test + removing the skips that we have on secondaries + adding an error if we filter something on secondaries.
(function() { 'use strict'; var st = new ShardingTest({ shards: 2, rs: { nodes: 2 } }); assert.commandWorked(st.s.adminCommand({enablesharding: "test"})); assert.commandWorked(st.s.adminCommand({ setDefaultRWConcern: 1, defaultReadConcern: {level: "available"}, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"} })); st.ensurePrimaryShard('test', st.shard0.shardName); st.rs0.add({'shardsvr': ""}); try { st.rs0.reInitiate(); } catch (e) { print(e); } st.rs0.awaitReplication(); st.rs0.waitForState(st.rs0.getSecondaries(), ReplSetTest.State.SECONDARY, 180 * 1000); assert.commandWorked(st.s0.adminCommand({shardcollection: "test.foo", key: {x: 1}})); assert.commandWorked(st.s0.adminCommand({split: "test.foo", middle: {x: 50}})); var other = st.config.shards.findOne({_id: {$ne: st.shard0.shardName}}); assert.commandWorked(st.getDB('admin').runCommand({ moveChunk: "test.foo", find: {x: 10}, to: other._id, _secondaryThrottle: true, writeConcern: {w: 2}, _waitForDelete: true })); st.rs0.awaitReplication(); var m = new Mongo(st.s.name); var ts = m.getDB("test").foo; m.setSecondaryOk(); printjson(ts.find().batchSize(5).explain()); // THIS TRIGGERS THE PROBLEM!! const coll = st.s.getCollection("test.foo"); assert.commandWorked(coll.insert({primaryOnly: true, x: 60})); print("DEBUG-9 ---#1---"); assert.commandWorked(coll.remove({primaryOnly: true, x: 60}, {writeConcern: {w: 3}})); print("DEBUG-9 ---#2---"); st.stop(); })();