Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-44507

Hybrid index build is able to commit (acquire stronger mode locks) for a collection that contains prepared documents. (4.2 only)

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 4.2.6
    • Affects Version/s: 4.2.1
    • Component/s: Storage
    • None
    • Fully Compatible
    • ALL
    • Hide
      /*
       * Test shows that an index build on a collection (which is prepared by transaction but not yet
       * committed) is able to proceed with second drain phase (that acquires collection lock in S mode)
       * and tries to do the finial index build commit. The result is a server crash since hybrid index
       * build missed applying entries from the side write table.
       */
      load("jstests/libs/check_log.js");
      load("jstests/replsets/rslib.js");
      load("jstests/core/txns/libs/prepare_helpers.js");
      
      (function() {
      
      "use strict";
      
      const rst = new ReplSetTest({nodes: 2});
      rst.startSet();
      rst.initiate();
      
      const dbName = jsTestName();
      const collName = "coll";
      
      const primary = rst.getPrimary();
      const primaryDB = primary.getDB(dbName);
      const primaryColl = primaryDB[collName];
      const collNss = primaryColl.getFullName();
      
      jsTestLog("Do document writes");
      for (var i = 0; i < 4; i++) {
          assert.commandWorked(primaryColl.insert({_id: i, y:i}, {"writeConcern": {"w": 1}}));
      }
      rst.awaitReplication();
      
      const secondary = rst.getSecondary();
      const secondaryAdmin = secondary.getDB("admin");
      
      // Make the hybrid build collection scan to yield often.
      assert.commandWorked(
          secondary.adminCommand({setParameter: 1, internalQueryExecYieldIterations: 2}));
      
      jsTestLog("Enable setYieldAllLocksHang fail point");
      assert.commandWorked(secondaryAdmin.runCommand(
          {configureFailPoint: "setYieldAllLocksHang", data: {namespace: collNss}, mode: "alwaysOn"}));
      
      TestData.dbName = dbName;
      TestData.collName = collName;
      
      const indexThread = startParallelShell(() => {
          jsTestLog("Create index");
          const primaryDB = db.getSiblingDB(TestData.dbName);
          assert.commandWorked(primaryDB[TestData.collName].createIndex({"y": 1}));
      }, primary.port);
      
      // Wait until index build (collection scan phase) on secondary yields.
      jsTestLog("Wait for the hybrid index build on secondary to hang");
      assert.soon(
          function() {
              const result = secondaryAdmin.currentOp({"command.createIndexes": collName});
              assert.commandWorked(result);
              if (result.inprog.length === 1 && result.inprog[0].numYields > 0) {
                  return true;
              }
      
              return false;
          },
          function() {
              return "Failed to find operation in currentOp() output: " +
                  tojson(secondaryAdmin.currentOp());
          },
          30 * 1000);
      
      jsTestLog("Step up the current secondary");
      assert.commandWorked(secondary.adminCommand({"replSetStepUp": 1}));
      waitForState(secondary, ReplSetTest.State.PRIMARY);
      waitForState(primary, ReplSetTest.State.SECONDARY);
      const newPrimary = rst.getPrimary();
      
      // Make sure the secondary was able to step up successfully.
      assert.eq(newPrimary, secondary)
      
      jsTestLog("Start a txn");
      const session = newPrimary.startSession();
      const sessionDB = session.getDatabase(dbName);
      const sessionColl = sessionDB.getCollection(collName);
      session.startTransaction();
      assert.commandWorked(sessionColl.insert({_id: 20}));
      
      jsTestLog("Prepare txn");
      const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
      
      // Make the hybrid index build to hang after first drain
      assert.commandWorked(newPrimary.adminCommand(
          {configureFailPoint: "hangAfterIndexBuildFirstDrain", mode: "alwaysOn"}));
      
      // This will make the hybrid build previously started on secondary (now primary) to resume.
      jsTestLog("Disable setYieldAllLocksHang fail point");
      assert.commandWorked(
          newPrimary.adminCommand({configureFailPoint: "setYieldAllLocksHang", mode: "off"}));
      
      // Wait until the first drain completed.
      checkLog.contains(newPrimary, "Hanging after index build first drain");
      
      assert.commandWorked(
          newPrimary.adminCommand({configureFailPoint: "hangAfterIndexBuildFirstDrain", mode: "off"}));
      
      jsTestLog("Step down the new primary");
      assert.commandWorked(newPrimary.adminCommand({"replSetStepDown": 60 * 60, "force": true}));
      
      // SERVER CRASH happens here since it missed applying few entries from the side write table.
      rst.stopSet();
      })();
      
      Show
      /* * Test shows that an index build on a collection (which is prepared by transaction but not yet * committed) is able to proceed with second drain phase (that acquires collection lock in S mode) * and tries to do the finial index build commit. The result is a server crash since hybrid index * build missed applying entries from the side write table. */ load("jstests/libs/check_log.js"); load("jstests/replsets/rslib.js"); load("jstests/core/txns/libs/prepare_helpers.js"); (function() { "use strict"; const rst = new ReplSetTest({nodes: 2}); rst.startSet(); rst.initiate(); const dbName = jsTestName(); const collName = "coll"; const primary = rst.getPrimary(); const primaryDB = primary.getDB(dbName); const primaryColl = primaryDB[collName]; const collNss = primaryColl.getFullName(); jsTestLog("Do document writes"); for (var i = 0; i < 4; i++) { assert.commandWorked(primaryColl.insert({_id: i, y:i}, {"writeConcern": {"w": 1}})); } rst.awaitReplication(); const secondary = rst.getSecondary(); const secondaryAdmin = secondary.getDB("admin"); // Make the hybrid build collection scan to yield often. assert.commandWorked( secondary.adminCommand({setParameter: 1, internalQueryExecYieldIterations: 2})); jsTestLog("Enable setYieldAllLocksHang fail point"); assert.commandWorked(secondaryAdmin.runCommand( {configureFailPoint: "setYieldAllLocksHang", data: {namespace: collNss}, mode: "alwaysOn"})); TestData.dbName = dbName; TestData.collName = collName; const indexThread = startParallelShell(() => { jsTestLog("Create index"); const primaryDB = db.getSiblingDB(TestData.dbName); assert.commandWorked(primaryDB[TestData.collName].createIndex({"y": 1})); }, primary.port); // Wait until index build (collection scan phase) on secondary yields. jsTestLog("Wait for the hybrid index build on secondary to hang"); assert.soon( function() { const result = secondaryAdmin.currentOp({"command.createIndexes": collName}); assert.commandWorked(result); if (result.inprog.length === 1 && result.inprog[0].numYields > 0) { return true; } return false; }, function() { return "Failed to find operation in currentOp() output: " + tojson(secondaryAdmin.currentOp()); }, 30 * 1000); jsTestLog("Step up the current secondary"); assert.commandWorked(secondary.adminCommand({"replSetStepUp": 1})); waitForState(secondary, ReplSetTest.State.PRIMARY); waitForState(primary, ReplSetTest.State.SECONDARY); const newPrimary = rst.getPrimary(); // Make sure the secondary was able to step up successfully. assert.eq(newPrimary, secondary) jsTestLog("Start a txn"); const session = newPrimary.startSession(); const sessionDB = session.getDatabase(dbName); const sessionColl = sessionDB.getCollection(collName); session.startTransaction(); assert.commandWorked(sessionColl.insert({_id: 20})); jsTestLog("Prepare txn"); const prepareTimestamp = PrepareHelpers.prepareTransaction(session); // Make the hybrid index build to hang after first drain assert.commandWorked(newPrimary.adminCommand( {configureFailPoint: "hangAfterIndexBuildFirstDrain", mode: "alwaysOn"})); // This will make the hybrid build previously started on secondary (now primary) to resume. jsTestLog("Disable setYieldAllLocksHang fail point"); assert.commandWorked( newPrimary.adminCommand({configureFailPoint: "setYieldAllLocksHang", mode: "off"})); // Wait until the first drain completed. checkLog.contains(newPrimary, "Hanging after index build first drain"); assert.commandWorked( newPrimary.adminCommand({configureFailPoint: "hangAfterIndexBuildFirstDrain", mode: "off"})); jsTestLog("Step down the new primary"); assert.commandWorked(newPrimary.adminCommand({"replSetStepDown": 60 * 60, "force": true})); // SERVER CRASH happens here since it missed applying few entries from the side write table. rst.stopSet(); })();
    • Execution Team 2019-12-02, Execution Team 2020-02-24, Execution Team 2020-03-09, Execution Team 2020-03-23, Execution Team 2020-04-06, Execution Team 2020-04-20

      Since hybrid index build collection scan phase can yield, it can violate the locking contracts of prepared transactions. Consider a below sequence.

      1) Hybrid index build gets started on secondary for a collection A. And, collection phase of hybrid index build on secondary gets yielded. (Makes all the mongoDB and document level locks to get released - abandons the snapshot).
      2) Secondary node steps up and becomes the new primary.
      3) Transaction gets stated and inserts some documents in collection A. (This step acquires the collection lock in mode IX)
      4) Transaction gets prepared. (After which, it holds the mongoDB locks (database, collection, global) except RSTL lock).
      5) Now, the node steps down. This will make any prepared transactions to yield mongoDB locks. (As on secondaries, prepared transactions does not hold locks.)
      6) Index build started at step #1, gets resumed and able to continue with drain phase (acquires collection lock in S mode) and commit phase( acquires collection lock in X mode) when the documents in collection A is in the prepared state. As a result, while committing the index build, server crash occurs due to this sanity check made for the side write table. (Since the entries in the side write table is prepared but not yet committed, the entry isn't be visible outside the transaction. As a result, second drain phase couldn't apply entries from the side write table and _numApplied was set to 0).

      Even, if we make the collection scan phase of index build to not yield, we would still can face the same problem mentioned above. Since, after SERVER-41462, hybrid index builder started on secondaries does not hold RSTL lock while performing index build (collection scan, drain and commit phase). SERVER-41462 was implemented mainly to avoid a 3 way deadlock between hybrid index builder, prepared transactions and step down. But, then, because of releasing RSTL lock, hybrid index builder started on secondaries won't be blocking the state transitions like step up and step down. This means even if the index build gets resumed after step 4, index build (second drain phase) gets blocked on the prepared transaction due to collection lock conflict. But, once step down occurs, second drain phase can now gets unblocked and able to acquire collection lock in S mode and proceed with the index building.

      Basically we are trying to do a following sequence that is not valid

      <Index Build START>  <Prepare Txn>   <Index Build Commit>  <CommitTxn> 
      

      P.S: The above sequence is prevented during secondary oplog application by this.

      Notes:
      Valid Sequences

      1) <Index Build START>  <Index Build Commit>  <Prepare Txn>   <CommitTxn> [ Applies to PRIMARY, SECONDARY]
      2) <Prepare Txn>   <CommitTxn> <Index Build START>  <Index Build Commit> [ Applies to PRIMARY, SECONDARY]
      3) <Index Build START>  <Prepare Txn>   <CommitTxn>   <Index Build Commit>  [ Applies only to PRIMARY]
      

            Assignee:
            milkie@mongodb.com Eric Milkie
            Reporter:
            suganthi.mani@mongodb.com Suganthi Mani
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: