Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-48518

Rollback via refetch (EMRC = false) can make readers to see the rolled back data even after the rollback node catches up to primary.

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: 4.2.10, 4.4.1, 4.7.0, 4.0.21
    • Component/s: Replication
    • Labels:
      None
    • Backwards Compatibility:
      Fully Compatible
    • Operating System:
      ALL
    • Backport Requested:
      v4.4, v4.2, v4.0, v3.6
    • Steps To Reproduce:
      Hide

      (function() {
      "use strict";
       
      load("jstests/core/txns/libs/prepare_helpers.js");
      load("jstests/replsets/libs/rollback_test.js");
       
      const dbName = jsTest.name();
      const collName = "coll";
       
      const rst = new ReplSetTest(
          {name: collName, nodes: 3, useBridge: true, nodeOptions: {enableMajorityReadConcern: "false"}});
       
      rst.startSet();
      const config = rst.getReplSetConfig();
      config.members[2].priority = 0;
      config.settings = {
          chainingAllowed: false
      };
      rst.initiateWithHighElectionTimeout(config);
       
      const rollbackTest = new RollbackTest(collName, rst);
      const primary = rollbackTest.getPrimary();
      const secondary = rollbackTest.getSecondary();
      const tiebreaker = rst.getSecondaries()[1];
      let testDB = primary.getDB(dbName);
      const testColl = testDB.getCollection(collName);
       
       
      jsTestLog("Do a document write");
      assert.commandWorked(testColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
       
      rollbackTest.transitionToRollbackOperations();
      assert.commandWorked(testColl.insert({_id: 1}, {"writeConcern": {"w": 1}}));
       
      jsTestLog(`Isolating the primary ${primary.host} from tiebreaker`);
      primary.disconnect([tiebreaker]);
       
      jsTestLog(`Reconnecting the secondary ${secondary.host} to the tiebreaker`);
      secondary.reconnect([tiebreaker]);
       
      jsTestLog(`stepup the secondary ${secondary.host} as the new primary`);
      assert.soonNoExcept(() => {
                  const res = secondary.adminCommand({replSetStepUp: 1});
                  return res.ok;
              }, `failed to step up node ${secondary.host}`, ReplSetTest.kDefaultTimeoutMS);
      const newPrimary = secondary;
      checkLog.containsJson(newPrimary, 21331);	
       
      assert.commandWorked(testDB["anotherCollection"].insert({_id: 0}, {"writeConcern": {"w": 1}}));	
      assert.commandWorked(testColl.remove({_id: 1}));
      assert.commandWorked(testColl.remove({_id: 0}));
       
      jsTestLog(`Reconnecting the stale primary ${primary.host} so it'll go into rollback`);
      primary.reconnect([newPrimary]);
       
      const stalePrimary = primary;
      checkLog.containsJson(stalePrimary, 21103);	
      checkLog.containsJson(stalePrimary, 21682);	
       
      jsTestLog(`Wait for the stale primary ${primary.host} to transition into secondary from rollback`);
      rst.waitForState(stalePrimary, ReplSetTest.State.SECONDARY);
       
      let itCount = testColl.find().itcount();
      let fastCount = testColl.count();
       
      // FastCount shows 1, itCount shows 3 - [ { "_id" : 0 }, { "_id" : 1 }, { "_id" : 0 } ]
      // Expected result of itCount is 1 =  [ { "_id" : 0 }]
      jsTestLog("Check collection count");
      assert.eq(itCount, fastCount, "test collection, fastCount: " + fastCount + ", itCount: " + itCount +  ", coll data : " + tojson(testColl.find().toArray()));
       
      rollbackTest.stop();
      }());
      

      Show
      (function() { "use strict";   load("jstests/core/txns/libs/prepare_helpers.js"); load("jstests/replsets/libs/rollback_test.js");   const dbName = jsTest.name(); const collName = "coll";   const rst = new ReplSetTest( {name: collName, nodes: 3, useBridge: true, nodeOptions: {enableMajorityReadConcern: "false"}});   rst.startSet(); const config = rst.getReplSetConfig(); config.members[2].priority = 0; config.settings = { chainingAllowed: false }; rst.initiateWithHighElectionTimeout(config);   const rollbackTest = new RollbackTest(collName, rst); const primary = rollbackTest.getPrimary(); const secondary = rollbackTest.getSecondary(); const tiebreaker = rst.getSecondaries()[1]; let testDB = primary.getDB(dbName); const testColl = testDB.getCollection(collName);     jsTestLog("Do a document write"); assert.commandWorked(testColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));   rollbackTest.transitionToRollbackOperations(); assert.commandWorked(testColl.insert({_id: 1}, {"writeConcern": {"w": 1}}));   jsTestLog(`Isolating the primary ${primary.host} from tiebreaker`); primary.disconnect([tiebreaker]);   jsTestLog(`Reconnecting the secondary ${secondary.host} to the tiebreaker`); secondary.reconnect([tiebreaker]);   jsTestLog(`stepup the secondary ${secondary.host} as the new primary`); assert.soonNoExcept(() => { const res = secondary.adminCommand({replSetStepUp: 1}); return res.ok; }, `failed to step up node ${secondary.host}`, ReplSetTest.kDefaultTimeoutMS); const newPrimary = secondary; checkLog.containsJson(newPrimary, 21331);   assert.commandWorked(testDB["anotherCollection"].insert({_id: 0}, {"writeConcern": {"w": 1}})); assert.commandWorked(testColl.remove({_id: 1})); assert.commandWorked(testColl.remove({_id: 0}));   jsTestLog(`Reconnecting the stale primary ${primary.host} so it'll go into rollback`); primary.reconnect([newPrimary]);   const stalePrimary = primary; checkLog.containsJson(stalePrimary, 21103); checkLog.containsJson(stalePrimary, 21682);   jsTestLog(`Wait for the stale primary ${primary.host} to transition into secondary from rollback`); rst.waitForState(stalePrimary, ReplSetTest.State.SECONDARY);   let itCount = testColl.find().itcount(); let fastCount = testColl.count();   // FastCount shows 1, itCount shows 3 - [ { "_id" : 0 }, { "_id" : 1 }, { "_id" : 0 } ] // Expected result of itCount is 1 = [ { "_id" : 0 }] jsTestLog("Check collection count"); assert.eq(itCount, fastCount, "test collection, fastCount: " + fastCount + ", itCount: " + itCount + ", coll data : " + tojson(testColl.find().toArray()));   rollbackTest.stop(); }());
    • Sprint:
      Repl 2020-06-29, Repl 2020-07-13, Repl 2020-08-10
    • Linked BF Score:
      96

      Description

      Below problems can be seen only for EMRC=false environment.

      Problem 1: Find command on secondaries can list the rollbacked records. 
      Assume, rollback node is Node A and current primary is node B. Common point is TS(10). And, Node B’s (current primary) lastAppliedOpTime  is TS(13).
      1. Also, assume the following oplog entries in Node A will be rolled back,

      • Insert  {_id:1} to collection foo at TS(11) .
      • Delete {_id:1} from  collection foo at TS(15).
        Since {_id:1} is not present in node B, the rollback via refetch algorithm effectively doesn't do anything for {_id:1} document on node A.

      2. Before rollback, the node A’s lastApplied opTime is TS(15). When the rollback completes, the lastApplied opTime of node A is set to TS(10) = common point.
      3. Node A after transitioning to secondary from rollback state, it catches up to primary and so node A's lastApplied opTime will be forwarded to TS(13).

      Now, if a user tries runs a find command on collection ‘foo’ on node A (secondary), the read source of that find command will be set to kNoOverlap (default read source for secondaries). As a result readTimestamp for that find command will be set as TS(13) = min(lastAppliedopTime, allDurable). Because of which find command would be able to list the rollbacked data record {id:1} .

      Problem 2: Find command on secondaries can also list records with duplicated _id. 
      Assume, rollback node is Node A and current primary is node B. Common point is TS(10). And, Node B’s (current primary) lastAppliedOpTime  is TS(13).
      1. Lets's say at TS(5), the user previously inserted a document {_id:1} collection foo with RecordId=1.
      2. Now, assume the below oplog entry in Node A will be rolled back,

      • Delete {_id:1} from  collection foo at TS(15).
        To rollback the delete operation, the rollback via refetch algorithm will refetch the document {_id:1} document from node B and insert the {_id:1} with RecordId=2 into node A and it will be a * non-timestamped write* (which is equivalent to TS(0)).

      3. Before rollback, the node A’s lastApplied opTime is TS(15). When the rollback completes, the lastApplied opTime of node A is set to TS(10) = common point.
      4. Node A after transitioning to secondary from rollback state, it catches up to primary and so node A's lastApplied opTime will be forwarded to TS(13).

      Now, if a user tries runs a find command on collection ‘foo’ on node A (secondary), the read source will be  kNoOverlap and read timestamp will be TS(13) = min(lastAppliedopTime, allDurable). Because of which find command would be able to list *two {_id:1} documents, one being RecordId=1 (whose TS(5)) and other being non-timestamped write RecordId=2 written during rollback.

      I have put the repro steps to demonstrate the above 2 issues. Also, to be noted, both the above issues causes cursor count (itcount) and fast count mismatch( which was the symptom seen in the build failures).

      Root cause:
      The main problem for EMRC= false is that  it's not safe to read on secondaries at a timestamp <  the local node's top of oplog's timestamp prior to entering rollback. 

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              jesse A. Jesse Jiryu Davis
              Reporter:
              suganthi.mani Suganthi Mani
              Participants:
              Votes:
              0 Vote for this issue
              Watchers:
              10 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: