Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-32066

Inserting document to drop pending collection using UUID with applyOps can cause primary to dassert

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Gone away
    • Affects Version/s: None
    • Fix Version/s: None
    • Component/s: Replication, Storage
    • Labels:
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

       
      load("jstests/libs/check_log.js");  // For 'checkLog'.
      function pauseOplogApplication(node) {
          assert.commandWorked(node.adminCommand(
              {configureFailPoint: "rsSyncApplyStop", mode: "alwaysOn"}));
          checkLog.contains(node, "rsSyncApplyStop fail point enabled");
      }
       
      function resumeOplogApplication(node) {
          assert.commandWorked(
              node.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}));
      }
       
       
      let replTest = new ReplSetTest({name: "applyOpsTest", nodes: 2});
       
      replTest.startSet();
      replTest.initiate();
      replTest.awaitReplication();
       
      // Pause oplog application so collection drop doesn't commit.
      pauseOplogApplication(replTest.getSecondary());
       
      // Get connections and collection.
      let primary = replTest.getPrimary();
      let pdb = primary.getDB("test");
       
      // Create collection.
      pdb["coll"].insert({x:1});
       
      let uuid = pdb.getCollectionInfos()[0].info.uuid;
       
      // Drop and re-create collection.
      pdb["coll"].drop();
      pdb["coll"].insert({x:1});
       
      let ops = [{
          "op": "i",
          "ns": "test.coll",
          "ui": uuid,
          "o": {"_id": 0}
      }];
       
      jsTestLog("Doing 'applyOps' command.");
      assert.commandWorked(pdb.adminCommand({applyOps: ops, allowAtomic: false}));
       
      resumeOplogApplication(replTest.getSecondary());
      

      Show
        load( "jstests/libs/check_log.js" ); // For 'checkLog'. function pauseOplogApplication(node) { assert.commandWorked(node.adminCommand( {configureFailPoint: "rsSyncApplyStop" , mode: "alwaysOn" })); checkLog.contains(node, "rsSyncApplyStop fail point enabled" ); }   function resumeOplogApplication(node) { assert.commandWorked( node.adminCommand({configureFailPoint: "rsSyncApplyStop" , mode: "off" })); }     let replTest = new ReplSetTest({name: "applyOpsTest" , nodes: 2});   replTest.startSet(); replTest.initiate(); replTest.awaitReplication();   // Pause oplog application so collection drop doesn't commit. pauseOplogApplication(replTest.getSecondary());   // Get connections and collection. let primary = replTest.getPrimary(); let pdb = primary.getDB( "test" );   // Create collection. pdb[ "coll" ].insert({x:1});   let uuid = pdb.getCollectionInfos()[0].info.uuid;   // Drop and re-create collection. pdb[ "coll" ].drop(); pdb[ "coll" ].insert({x:1});   let ops = [{ "op" : "i" , "ns" : "test.coll" , "ui" : uuid, "o" : { "_id" : 0} }];   jsTestLog( "Doing 'applyOps' command." ); assert.commandWorked(pdb.adminCommand({applyOps: ops, allowAtomic: false }));   resumeOplogApplication(replTest.getSecondary());
    • Sprint:
      Execution Team 2019-09-23

      Description

      Consider the following sequence of collection operations that occur on a primary:

      create "test.coll", UUID=0
      drop   "test.coll", UUID=0
      create "test.coll", UUID=1
      insert "test.coll"  UUID=0, {x:1}
      

      If we try to insert directly into a collection with namespace "test.coll" with UUID=0 using the applyOps command, before the drop of collection with UUID=0 has committed on primary, we hit this dassert.

      [js_test:apply_ops_repro] 2017-11-27T11:45:16.568-0500 d50410| 2017-11-27T11:45:16.568-0500 F -        [conn1] Invariant failure opCtx->lockState()->isCollectionLockedForMode( requestNss.ns(), supportsDocLocking() ? MODE_IX : MODE_X) src/mongo/db/repl/oplog.cpp 1040
      

      The problem seems to partially stem from the fact that we obtain locks by namespace, rather than UUID, before applying an operation through applyOps.

      One solution may be to disallow inserts on collection UUIDs that are in drop-pending state. See repro script. Note that it is possible to repro the issue even without pausing secondary oplog application, since it is very likely that the offending insert operation will be executed before the previous collection drop committed.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              evgeni.dobranov Evgeni Dobranov
              Reporter:
              william.schultz William Schultz (Inactive)
              Participants:
              Votes:
              0 Vote for this issue
              Watchers:
              11 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: