Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-60160

Initial syncing node can crash due to BSONObjectTooLarge exception thrown while replaying the oplog entries.

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Won't Fix
    • Affects Version/s: None
    • Fix Version/s: None
    • Component/s: None
    • Labels:
      None
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

       
      (function() {
      "use strict";
       
      load("jstests/libs/curop_helpers.js");  // for waitForCurOpByFailPoint().
      load("jstests/libs/fail_point_util.js");
       
      const testName = "testDB";
      const dbName = testName;
      const collName = "testcoll";
       
      // Start a 3 node replica set to avoid primary step down after secondary restart.
      const rst = new ReplSetTest(
          {nodes: [{}, {rsConfig: {priority: 0}}, {arbiter: true}], settings: {chainingAllowed: false}});
      rst.startSet();
      rst.initiate();
       
      var primary = rst.getPrimary();
      var primaryDB = primary.getDB(dbName);
      var primaryAdmin = primary.getDB("admin");
      var primaryColl = primaryDB[collName];
      var secondary = rst.getSecondary();
       
      // Write a document of 6 MB.
      const largeArray = new Array(6 * 1024 * 1024).join('x');
      assert.commandWorked(primaryColl.insert({_id: 1, a: largeArray}));
       
      jsTestLog("Stopping secondary.");
      rst.stop(secondary);
       
      jsTestLog("Enabling failpoint 'hangBeforeListDatabases' on primary (sync source).");
      assert.commandWorked(
          primary.adminCommand({configureFailPoint: "hangBeforeListDatabases", mode: "alwaysOn"}));
       
      jsTestLog("Starting secondary.");
      let secondaryStartupParams = {};
      secondaryStartupParams['numInitialSyncAttempts'] = 1;
      secondary = rst.start(secondary, {startClean: true, setParameter: secondaryStartupParams});
      rst.waitForState(secondary, ReplSetTest.State.STARTUP_2);
       
      jsTestLog("Waiting for primary to reach failPoint 'hangBeforeListDatabases'");
      waitForCurOpByFailPoint(primaryAdmin,
                              new RegExp('^' +
                                         ""),
                              "hangBeforeListDatabases");
       
      // Now perform following updates on the document. So, both collection cloner and initial sync
      // oplog appplier would try to apply those updates and lead to BSONObjectTooLarge exception error.
      assert.commandWorked(primaryColl.update({_id: 1}, {$set: {b: largeArray}}));
      assert.commandWorked(primaryColl.update({_id: 1}, {$unset: {b: 1}}));
      assert.commandWorked(primaryColl.update({_id: 1}, {$set: {c: largeArray}}));
       
      jsTestLog("Allowing initial sync to continue.");
      assert.commandWorked(
          primaryAdmin.adminCommand({configureFailPoint: 'hangBeforeListDatabases', mode: 'off'}));
       
      jsTestLog("Waiting for initial sync to complete.");
      rst.waitForState(secondary, ReplSetTest.State.SECONDARY);
       
      rst.stopSet();
      })();
      

      Show
        (function() { "use strict";   load("jstests/libs/curop_helpers.js"); // for waitForCurOpByFailPoint(). load("jstests/libs/fail_point_util.js");   const testName = "testDB"; const dbName = testName; const collName = "testcoll";   // Start a 3 node replica set to avoid primary step down after secondary restart. const rst = new ReplSetTest( {nodes: [{}, {rsConfig: {priority: 0}}, {arbiter: true}], settings: {chainingAllowed: false}}); rst.startSet(); rst.initiate();   var primary = rst.getPrimary(); var primaryDB = primary.getDB(dbName); var primaryAdmin = primary.getDB("admin"); var primaryColl = primaryDB[collName]; var secondary = rst.getSecondary();   // Write a document of 6 MB. const largeArray = new Array(6 * 1024 * 1024).join('x'); assert.commandWorked(primaryColl.insert({_id: 1, a: largeArray}));   jsTestLog("Stopping secondary."); rst.stop(secondary);   jsTestLog("Enabling failpoint 'hangBeforeListDatabases' on primary (sync source)."); assert.commandWorked( primary.adminCommand({configureFailPoint: "hangBeforeListDatabases", mode: "alwaysOn"}));   jsTestLog("Starting secondary."); let secondaryStartupParams = {}; secondaryStartupParams['numInitialSyncAttempts'] = 1; secondary = rst.start(secondary, {startClean: true, setParameter: secondaryStartupParams}); rst.waitForState(secondary, ReplSetTest.State.STARTUP_2);   jsTestLog("Waiting for primary to reach failPoint 'hangBeforeListDatabases'"); waitForCurOpByFailPoint(primaryAdmin, new RegExp('^' + ""), "hangBeforeListDatabases");   // Now perform following updates on the document. So, both collection cloner and initial sync // oplog appplier would try to apply those updates and lead to BSONObjectTooLarge exception error. assert.commandWorked(primaryColl.update({_id: 1}, {$set: {b: largeArray}})); assert.commandWorked(primaryColl.update({_id: 1}, {$unset: {b: 1}})); assert.commandWorked(primaryColl.update({_id: 1}, {$set: {c: largeArray}}));   jsTestLog("Allowing initial sync to continue."); assert.commandWorked( primaryAdmin.adminCommand({configureFailPoint: 'hangBeforeListDatabases', mode: 'off'}));   jsTestLog("Waiting for initial sync to complete."); rst.waitForState(secondary, ReplSetTest.State.SECONDARY);   rst.stopSet(); })();

      Description

      This is a bug in logical initial sync. Since the cloner doesn't do a snapshot read on the sync source for data cloning, the initial syncing node replays oplog entries on an inconsistent data. This can lead to an idempotency issue that exists when applying the operations from a transaction after the data already reflects the transaction. We have OplogApplication::Mode::kInitialSync to absorb such kind of errors and silently ignore it. I think we missed handling for the scenario that I mentioned in "steps to reproduce" section.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              backlog-server-repl Backlog - Replication Team
              Reporter:
              suganthi.mani Suganthi Mani
              Participants:
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: