Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-44722

3 way deadlock can happen between hybrid index build, prepared transactions and stepdown thread on primary that runs index build via coordinator.

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: Storage
    • None
    • ALL
    • Hide
      load("jstests/libs/check_log.js");
      load("jstests/replsets/rslib.js");
      load("jstests/core/txns/libs/prepare_helpers.js");
      
      (function() {
      
      "use strict";
      
      const dbName = "test";
      const collName = "coll";
      
      const rst = new ReplSetTest({nodes: 1});
      rst.startSet();
      rst.initiate();
      
      const primary = rst.getPrimary();
      const primaryDB = primary.getDB(dbName);
      const primaryColl = primaryDB[collName];
      const collNss = primaryColl.getFullName();
      
      TestData.dbName = dbName;
      TestData.collName = collName;
      
      jsTestLog("Do a document write");
      assert.commandWorked(primaryColl.insert({_id: 1, x: 1}, {"writeConcern": {"w": 1}}));
      
      // Clear the log.
      assert.commandWorked(primary.adminCommand({clearLog: 'global'}));
      
      // Enable fail point which makes hybrid index build to hang.
      assert.commandWorked(primary.adminCommand(
          {configureFailPoint: "hangAfterIndexBuildDumpsInsertsFromBulk", mode: "alwaysOn"}));
      
      const indexThread = startParallelShell(() => {
          jsTestLog("Create index");
          const primaryDB = db.getSiblingDB(TestData.dbName);
          assert.commandFailedWithCode(primaryDB[TestData.collName].createIndex({"x": 1}),
                                       ErrorCodes.InterruptedDueToReplStateChange);
      }, primary.port);
      
      // Wait for hangAfterIndexBuildDumpsInsertsFromBulk fail point to reach.
      checkLog.contains(primary, "Hanging after dumping inserts from bulk builder");
      
      jsTestLog("Start txn");
      const session = primary.startSession();
      const sessionDB = session.getDatabase(dbName);
      const sessionColl = sessionDB.getCollection(collName);
      session.startTransaction();
      assert.commandWorked(sessionColl.insert({x: 1}, {$set: {y: 1}}));
      
      jsTestLog("Prepare txn");
      const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
      
      assert.commandWorked(primary.adminCommand(
          {configureFailPoint: "hangAfterIndexBuildDumpsInsertsFromBulk", mode: "off"}));
      
      const stepDownThread = startParallelShell(() => {
          jsTestLog("Make primary to step down");
          assert.commandWorked(db.adminCommand({"replSetStepDown": 60 * 60, "force": true}));
      }, primary.port);
      
      // Wait for threads to join.
      indexThread();
      stepDownThread();
      
      waitForState(primary, ReplSetTest.State.SECONDARY);
      // Allow the primary to be re-elected, and wait for it.
      assert.commandWorked(primary.adminCommand({replSetFreeze: 0}));
      rst.getPrimary();
      
      jsTestLog("Abort txn");
      assert.commandWorked(session.abortTransaction_forTesting());
      
      rst.stopSet();
      })();
      
      Show
      load("jstests/libs/check_log.js"); load("jstests/replsets/rslib.js"); load("jstests/core/txns/libs/prepare_helpers.js"); (function() { "use strict"; const dbName = "test"; const collName = "coll"; const rst = new ReplSetTest({nodes: 1}); rst.startSet(); rst.initiate(); const primary = rst.getPrimary(); const primaryDB = primary.getDB(dbName); const primaryColl = primaryDB[collName]; const collNss = primaryColl.getFullName(); TestData.dbName = dbName; TestData.collName = collName; jsTestLog("Do a document write"); assert.commandWorked(primaryColl.insert({_id: 1, x: 1}, {"writeConcern": {"w": 1}})); // Clear the log. assert.commandWorked(primary.adminCommand({clearLog: 'global'})); // Enable fail point which makes hybrid index build to hang. assert.commandWorked(primary.adminCommand( {configureFailPoint: "hangAfterIndexBuildDumpsInsertsFromBulk", mode: "alwaysOn"})); const indexThread = startParallelShell(() => { jsTestLog("Create index"); const primaryDB = db.getSiblingDB(TestData.dbName); assert.commandFailedWithCode(primaryDB[TestData.collName].createIndex({"x": 1}), ErrorCodes.InterruptedDueToReplStateChange); }, primary.port); // Wait for hangAfterIndexBuildDumpsInsertsFromBulk fail point to reach. checkLog.contains(primary, "Hanging after dumping inserts from bulk builder"); jsTestLog("Start txn"); const session = primary.startSession(); const sessionDB = session.getDatabase(dbName); const sessionColl = sessionDB.getCollection(collName); session.startTransaction(); assert.commandWorked(sessionColl.insert({x: 1}, {$set: {y: 1}})); jsTestLog("Prepare txn"); const prepareTimestamp = PrepareHelpers.prepareTransaction(session); assert.commandWorked(primary.adminCommand( {configureFailPoint: "hangAfterIndexBuildDumpsInsertsFromBulk", mode: "off"})); const stepDownThread = startParallelShell(() => { jsTestLog("Make primary to step down"); assert.commandWorked(db.adminCommand({"replSetStepDown": 60 * 60, "force": true})); }, primary.port); // Wait for threads to join. indexThread(); stepDownThread(); waitForState(primary, ReplSetTest.State.SECONDARY); // Allow the primary to be re-elected, and wait for it. assert.commandWorked(primary.adminCommand({replSetFreeze: 0})); rst.getPrimary(); jsTestLog("Abort txn"); assert.commandWorked(session.abortTransaction_forTesting()); rst.stopSet(); })();
    • Execution Team 2020-05-04

      _buildIndex() is the method which performs collection scan , drain and commit phases of the index build. Drain and commit takes the stronger mode locks ( collection lock in S & X respectively). On master branch, we always run _buildIndex() method using index build coordinator. This means, we would be running _buildIndex() on a spawned thread (internal/system operation) which are not currently killable by the state transition thread (step down thread). This can result in 3 way deadlock where,

      1) IndexBuildsCoordinatorMongod-X (internal thread) blocked on prepare conflict while holding RSTL in IX.
      2) Step down enqueues RSTL lock in X mode. And blocked behind IndexBuildsCoordinatorMongod-X thread.
      3) CommitTransaction cmd is waiting for RSTL lock to acquire in IX mode but blocked behind the step down thread.

      To be noted, step down thread marks the the main thread(user connection thread which performs "createIndexes" cmd) as killed because the main thread previously acquired the RSTL in IX mode. Usually when the main thread gets interrupted by state transition, it kills the spawned IndexBuildsCoordinatorMongod-X thread NOT via opCtx channel. So, no way the internal thread (i..e.)IndexBuildsCoordinatorMongod-X waiting for the lock could be interrupted.

      It seems, even on mongoDB 4.2, we will hit the 3 way deadlock if we set this server startup parameter enableIndexBuildsCoordinatorForCreateIndexesCommand to true. Because when "enableIndexBuildsCoordinatorForCreateIndexesCommand" is false, we run drain and commit index build phase on the main thread (user connection thread which performs "createIndexes" cmd) which is always interruptible by the step down thread.

      Notes: We are acquiring collection lock in stronger mode in order to commit / abort.(X) and drain the side table writes (S). As, a result, this can lead to deadlocks involving prepared transactions, stepdown and indexBuildsCoordinator.

            Assignee:
            louis.williams@mongodb.com Louis Williams
            Reporter:
            suganthi.mani@mongodb.com Suganthi Mani
            Votes:
            0 Vote for this issue
            Watchers:
            11 Start watching this issue

              Created:
              Updated:
              Resolved: