Secondary nodes filter out legit write operations believing that they were on orphans

XMLWordPrintableJSON

    • Type: Bug
    • Resolution: Works as Designed
    • Priority: Major - P3
    • None
    • Affects Version/s: None
    • Component/s: Sharding
    • None
    • Sharding EMEA 2021-12-13, Sharding EMEA 2021-12-27, Sharding EMEA 2022-01-10
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      The goal of this ticket is to investigate why secondary nodes are filtering oplog entries believing that they are over orphans. This shouldn't happen and probably means that the filtering information on the shard is not up to date.

      We managed to reproduce it with this test + removing the skips that we have on secondaries + adding an error if we filter something on secondaries.

      (function() {
      'use strict';
      ​
      var st = new ShardingTest({
          shards: 2,
          rs: { nodes: 2 }
      });
      ​
      assert.commandWorked(st.s.adminCommand({enablesharding: "test"}));
      assert.commandWorked(st.s.adminCommand({
          setDefaultRWConcern: 1,
          defaultReadConcern: {level: "available"},
          defaultWriteConcern: {w: 1},
          writeConcern: {w: "majority"}
      }));
      st.ensurePrimaryShard('test', st.shard0.shardName);
      ​
      st.rs0.add({'shardsvr': ""});
      try {
          st.rs0.reInitiate();
      } catch (e) {
          print(e);
      }
      st.rs0.awaitReplication();
      st.rs0.waitForState(st.rs0.getSecondaries(), ReplSetTest.State.SECONDARY, 180 * 1000);
      ​
      assert.commandWorked(st.s0.adminCommand({shardcollection: "test.foo", key: {x: 1}}));
      assert.commandWorked(st.s0.adminCommand({split: "test.foo", middle: {x: 50}}));
      ​
      var other = st.config.shards.findOne({_id: {$ne: st.shard0.shardName}});
      assert.commandWorked(st.getDB('admin').runCommand({
          moveChunk: "test.foo",
          find: {x: 10},
          to: other._id,
          _secondaryThrottle: true,
          writeConcern: {w: 2},
          _waitForDelete: true
      }));
      st.rs0.awaitReplication();
      ​
      var m = new Mongo(st.s.name);
      var ts = m.getDB("test").foo;
      m.setSecondaryOk();
      printjson(ts.find().batchSize(5).explain()); // THIS TRIGGERS THE PROBLEM!!
      const coll = st.s.getCollection("test.foo");
      assert.commandWorked(coll.insert({primaryOnly: true, x: 60}));
      ​
      print("DEBUG-9 ---#1---");
      assert.commandWorked(coll.remove({primaryOnly: true, x: 60}, {writeConcern: {w: 3}}));
      print("DEBUG-9 ---#2---");
      ​
      st.stop();
      })();
      

            Assignee:
            Antonio Fuschetto
            Reporter:
            Sergi Mateo Bellido
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: