Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-61878

Secondary nodes filter out legit write operations believing that they were on orphans

    XMLWordPrintableJSON

Details

    • Icon: Bug Bug
    • Resolution: Works as Designed
    • Icon: Major - P3 Major - P3
    • None
    • None
    • Sharding
    • None
    • Sharding EMEA 2021-12-13, Sharding EMEA 2021-12-27, Sharding EMEA 2022-01-10

    Description

      The goal of this ticket is to investigate why secondary nodes are filtering oplog entries believing that they are over orphans. This shouldn't happen and probably means that the filtering information on the shard is not up to date.

      We managed to reproduce it with this test + removing the skips that we have on secondaries + adding an error if we filter something on secondaries.

      (function() {
      'use strict';
      var st = new ShardingTest({
          shards: 2,
          rs: { nodes: 2 }
      });
      assert.commandWorked(st.s.adminCommand({enablesharding: "test"}));
      assert.commandWorked(st.s.adminCommand({
          setDefaultRWConcern: 1,
          defaultReadConcern: {level: "available"},
          defaultWriteConcern: {w: 1},
          writeConcern: {w: "majority"}
      }));
      st.ensurePrimaryShard('test', st.shard0.shardName);
      st.rs0.add({'shardsvr': ""});
      try {
          st.rs0.reInitiate();
      } catch (e) {
          print(e);
      }
      st.rs0.awaitReplication();
      st.rs0.waitForState(st.rs0.getSecondaries(), ReplSetTest.State.SECONDARY, 180 * 1000);
      assert.commandWorked(st.s0.adminCommand({shardcollection: "test.foo", key: {x: 1}}));
      assert.commandWorked(st.s0.adminCommand({split: "test.foo", middle: {x: 50}}));
      var other = st.config.shards.findOne({_id: {$ne: st.shard0.shardName}});
      assert.commandWorked(st.getDB('admin').runCommand({
          moveChunk: "test.foo",
          find: {x: 10},
          to: other._id,
          _secondaryThrottle: true,
          writeConcern: {w: 2},
          _waitForDelete: true
      }));
      st.rs0.awaitReplication();
      var m = new Mongo(st.s.name);
      var ts = m.getDB("test").foo;
      m.setSecondaryOk();
      printjson(ts.find().batchSize(5).explain()); // THIS TRIGGERS THE PROBLEM!!
      const coll = st.s.getCollection("test.foo");
      assert.commandWorked(coll.insert({primaryOnly: true, x: 60}));
      print("DEBUG-9 ---#1---");
      assert.commandWorked(coll.remove({primaryOnly: true, x: 60}, {writeConcern: {w: 3}}));
      print("DEBUG-9 ---#2---");
      st.stop();
      })();
      

      Attachments

        Activity

          People

            antonio.fuschetto@mongodb.com Antonio Fuschetto
            sergi.mateo-bellido@mongodb.com Sergi Mateo Bellido
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: