Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-48518

Rollback via refetch (EMRC = false) can make readers to see the rolled back data even after the rollback node catches up to primary.

    • Fully Compatible
    • ALL
    • v4.4, v4.2, v4.0, v3.6
    • Hide
      (function() {
      "use strict";
      
      load("jstests/core/txns/libs/prepare_helpers.js");
      load("jstests/replsets/libs/rollback_test.js");
      
      const dbName = jsTest.name();
      const collName = "coll";
      
      const rst = new ReplSetTest(
          {name: collName, nodes: 3, useBridge: true, nodeOptions: {enableMajorityReadConcern: "false"}});
      
      rst.startSet();
      const config = rst.getReplSetConfig();
      config.members[2].priority = 0;
      config.settings = {
          chainingAllowed: false
      };
      rst.initiateWithHighElectionTimeout(config);
      
      const rollbackTest = new RollbackTest(collName, rst);
      const primary = rollbackTest.getPrimary();
      const secondary = rollbackTest.getSecondary();
      const tiebreaker = rst.getSecondaries()[1];
      let testDB = primary.getDB(dbName);
      const testColl = testDB.getCollection(collName);
      
      
      jsTestLog("Do a document write");
      assert.commandWorked(testColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
      
      rollbackTest.transitionToRollbackOperations();
      assert.commandWorked(testColl.insert({_id: 1}, {"writeConcern": {"w": 1}}));
      
      jsTestLog(`Isolating the primary ${primary.host} from tiebreaker`);
      primary.disconnect([tiebreaker]);
      
      jsTestLog(`Reconnecting the secondary ${secondary.host} to the tiebreaker`);
      secondary.reconnect([tiebreaker]);
      
      jsTestLog(`stepup the secondary ${secondary.host} as the new primary`);
      assert.soonNoExcept(() => {
                  const res = secondary.adminCommand({replSetStepUp: 1});
                  return res.ok;
              }, `failed to step up node ${secondary.host}`, ReplSetTest.kDefaultTimeoutMS);
      const newPrimary = secondary;
      checkLog.containsJson(newPrimary, 21331);	
      
      assert.commandWorked(testDB["anotherCollection"].insert({_id: 0}, {"writeConcern": {"w": 1}}));	
      assert.commandWorked(testColl.remove({_id: 1}));
      assert.commandWorked(testColl.remove({_id: 0}));
      
      jsTestLog(`Reconnecting the stale primary ${primary.host} so it'll go into rollback`);
      primary.reconnect([newPrimary]);
      
      const stalePrimary = primary;
      checkLog.containsJson(stalePrimary, 21103);	
      checkLog.containsJson(stalePrimary, 21682);	
      
      jsTestLog(`Wait for the stale primary ${primary.host} to transition into secondary from rollback`);
      rst.waitForState(stalePrimary, ReplSetTest.State.SECONDARY);
      
      let itCount = testColl.find().itcount();
      let fastCount = testColl.count();
      
      // FastCount shows 1, itCount shows 3 - [ { "_id" : 0 }, { "_id" : 1 }, { "_id" : 0 } ]
      // Expected result of itCount is 1 =  [ { "_id" : 0 }]
      jsTestLog("Check collection count");
      assert.eq(itCount, fastCount, "test collection, fastCount: " + fastCount + ", itCount: " + itCount +  ", coll data : " + tojson(testColl.find().toArray()));
      
      rollbackTest.stop();
      }());
      
      Show
      (function() { "use strict"; load("jstests/core/txns/libs/prepare_helpers.js"); load("jstests/replsets/libs/rollback_test.js"); const dbName = jsTest.name(); const collName = "coll"; const rst = new ReplSetTest( {name: collName, nodes: 3, useBridge: true, nodeOptions: {enableMajorityReadConcern: "false"}}); rst.startSet(); const config = rst.getReplSetConfig(); config.members[2].priority = 0; config.settings = { chainingAllowed: false }; rst.initiateWithHighElectionTimeout(config); const rollbackTest = new RollbackTest(collName, rst); const primary = rollbackTest.getPrimary(); const secondary = rollbackTest.getSecondary(); const tiebreaker = rst.getSecondaries()[1]; let testDB = primary.getDB(dbName); const testColl = testDB.getCollection(collName); jsTestLog("Do a document write"); assert.commandWorked(testColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}})); rollbackTest.transitionToRollbackOperations(); assert.commandWorked(testColl.insert({_id: 1}, {"writeConcern": {"w": 1}})); jsTestLog(`Isolating the primary ${primary.host} from tiebreaker`); primary.disconnect([tiebreaker]); jsTestLog(`Reconnecting the secondary ${secondary.host} to the tiebreaker`); secondary.reconnect([tiebreaker]); jsTestLog(`stepup the secondary ${secondary.host} as the new primary`); assert.soonNoExcept(() => { const res = secondary.adminCommand({replSetStepUp: 1}); return res.ok; }, `failed to step up node ${secondary.host}`, ReplSetTest.kDefaultTimeoutMS); const newPrimary = secondary; checkLog.containsJson(newPrimary, 21331); assert.commandWorked(testDB["anotherCollection"].insert({_id: 0}, {"writeConcern": {"w": 1}})); assert.commandWorked(testColl.remove({_id: 1})); assert.commandWorked(testColl.remove({_id: 0})); jsTestLog(`Reconnecting the stale primary ${primary.host} so it'll go into rollback`); primary.reconnect([newPrimary]); const stalePrimary = primary; checkLog.containsJson(stalePrimary, 21103); checkLog.containsJson(stalePrimary, 21682); jsTestLog(`Wait for the stale primary ${primary.host} to transition into secondary from rollback`); rst.waitForState(stalePrimary, ReplSetTest.State.SECONDARY); let itCount = testColl.find().itcount(); let fastCount = testColl.count(); // FastCount shows 1, itCount shows 3 - [ { "_id" : 0 }, { "_id" : 1 }, { "_id" : 0 } ] // Expected result of itCount is 1 = [ { "_id" : 0 }] jsTestLog("Check collection count"); assert.eq(itCount, fastCount, "test collection, fastCount: " + fastCount + ", itCount: " + itCount + ", coll data : " + tojson(testColl.find().toArray())); rollbackTest.stop(); }());
    • Repl 2020-06-29, Repl 2020-07-13, Repl 2020-08-10
    • 96

      Below problems can be seen only for EMRC=false environment.

      Problem 1: Find command on secondaries can list the rollbacked records. 
      Assume, rollback node is Node A and current primary is node B. Common point is TS(10). And, Node B’s (current primary) lastAppliedOpTime  is TS(13).
      1. Also, assume the following oplog entries in Node A will be rolled back,

      • Insert  {_id:1} to collection foo at TS(11) .
      • Delete {_id:1} from  collection foo at TS(15).
        Since {_id:1} is not present in node B, the rollback via refetch algorithm effectively doesn't do anything for {_id:1} document on node A.

      2. Before rollback, the node A’s lastApplied opTime is TS(15). When the rollback completes, the lastApplied opTime of node A is set to TS(10) = common point.
      3. Node A after transitioning to secondary from rollback state, it catches up to primary and so node A's lastApplied opTime will be forwarded to TS(13).

      Now, if a user tries runs a find command on collection ‘foo’ on node A (secondary), the read source of that find command will be set to kNoOverlap (default read source for secondaries). As a result readTimestamp for that find command will be set as TS(13) = min(lastAppliedopTime, allDurable). Because of which find command would be able to list the rollbacked data record {id:1} .

      Problem 2: Find command on secondaries can also list records with duplicated _id. 
      Assume, rollback node is Node A and current primary is node B. Common point is TS(10). And, Node B’s (current primary) lastAppliedOpTime  is TS(13).
      1. Lets's say at TS(5), the user previously inserted a document {_id:1} collection foo with RecordId=1.
      2. Now, assume the below oplog entry in Node A will be rolled back,

      • Delete {_id:1} from  collection foo at TS(15).
        To rollback the delete operation, the rollback via refetch algorithm will refetch the document {_id:1} document from node B and insert the {_id:1} with RecordId=2 into node A and it will be a * non-timestamped write* (which is equivalent to TS(0)).

      3. Before rollback, the node A’s lastApplied opTime is TS(15). When the rollback completes, the lastApplied opTime of node A is set to TS(10) = common point.
      4. Node A after transitioning to secondary from rollback state, it catches up to primary and so node A's lastApplied opTime will be forwarded to TS(13).

      Now, if a user tries runs a find command on collection ‘foo’ on node A (secondary), the read source will be  kNoOverlap and read timestamp will be TS(13) = min(lastAppliedopTime, allDurable). Because of which find command would be able to list *two {_id:1} documents, one being RecordId=1 (whose TS(5)) and other being non-timestamped write RecordId=2 written during rollback.

      I have put the repro steps to demonstrate the above 2 issues. Also, to be noted, both the above issues causes cursor count (itcount) and fast count mismatch( which was the symptom seen in the build failures).

      Root cause:
      The main problem for EMRC= false is that  it's not safe to read on secondaries at a timestamp <  the local node's top of oplog's timestamp prior to entering rollback. 

            Assignee:
            jesse@mongodb.com A. Jesse Jiryu Davis
            Reporter:
            suganthi.mani@mongodb.com Suganthi Mani
            Votes:
            0 Vote for this issue
            Watchers:
            10 Start watching this issue

              Created:
              Updated:
              Resolved: