Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-36007

Attempting to check out an already checked out session leads to self-deadlock

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: 4.1.2
    • Component/s: Sharding
    • Labels:
      None
    • Backwards Compatibility:
      Fully Compatible
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

      python buildscripts/resmoke.py --suites=no_server already_checked_out_session.js
      

      already_checked_out_session.js

      (function() {
          "use strict";
       
          load("jstests/libs/parallelTester.js");
       
          const rst = new ReplSetTest({nodes: 1});
          rst.startSet();
          rst.initiate();
       
          const primary = rst.getPrimary();
          const db = primary.getDB("test");
       
          function doInsertWithSession(host, lsid, txnNumber) {
              try {
                  const conn = new Mongo(host);
                  const db = conn.getDB("test");
                  assert.commandWorked(db.runCommand({
                      insert: "mycoll",
                      documents: [{_id: txnNumber}],
                      lsid: {id: eval(lsid)},
                      txnNumber: NumberLong(txnNumber),
                  }));
                  return {ok: 1};
              } catch (e) {
                  return {ok: 0, error: e.toString(), stack: e.stack};
              }
          }
       
          let thread1;
          let thread2;
       
          assert.commandWorked(db.fsyncLock());
          try {
              // JavaScript objects backed by C++ objects (e.g. BSON values) do not serialize correctly
              // when passed through the ScopedThread constructor. To work around this behavior, we
              // instead pass a stringified form of the JavaScript object through the ScopedThread
              // constructor and use eval() to rehydrate it.
              const lsid = UUID();
              thread1 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 0);
              thread1.start();
       
              assert.soon(
                  () => {
                      const ops = db.currentOp({"command.insert": "mycoll", waitingForLock: true});
                      return ops.inprog.length === 1;
                  },
                  () => {
                      return "insert operation was never found to be waiting for a lock: " +
                          tojson(db.currentOp());
                  });
       
              thread2 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 1);
              thread2.start();
       
              // XXX: Wait a little bit for thread2 to have sent its insert command to the server or we
              // otherwise won't trigger the bug.
              sleep(5000);
          } finally {
              // We run the fsyncUnlock command in a finally block to avoid leaving the server fsyncLock'd
              // if the test were to fail.
              assert.commandWorked(db.fsyncUnlock());
          }
       
          thread1.join();
          thread2.join();
       
          assert.commandWorked(thread1.returnData());
          assert.commandWorked(thread2.returnData());
       
          rst.stopSet();
      })();
      

      Show
      python buildscripts/resmoke.py --suites=no_server already_checked_out_session.js already_checked_out_session.js ( function () { "use strict" ;   load( "jstests/libs/parallelTester.js" );   const rst = new ReplSetTest({nodes: 1}); rst.startSet(); rst.initiate();   const primary = rst.getPrimary(); const db = primary.getDB( "test" );   function doInsertWithSession(host, lsid, txnNumber) { try { const conn = new Mongo(host); const db = conn.getDB( "test" ); assert.commandWorked(db.runCommand({ insert: "mycoll" , documents: [{_id: txnNumber}], lsid: {id: eval(lsid)}, txnNumber: NumberLong(txnNumber), })); return {ok: 1}; } catch (e) { return {ok: 0, error: e.toString(), stack: e.stack}; } }   let thread1; let thread2;   assert.commandWorked(db.fsyncLock()); try { // JavaScript objects backed by C++ objects (e.g. BSON values) do not serialize correctly // when passed through the ScopedThread constructor. To work around this behavior, we // instead pass a stringified form of the JavaScript object through the ScopedThread // constructor and use eval() to rehydrate it. const lsid = UUID(); thread1 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 0); thread1.start();   assert.soon( () => { const ops = db.currentOp({ "command.insert" : "mycoll" , waitingForLock: true }); return ops.inprog.length === 1; }, () => { return "insert operation was never found to be waiting for a lock: " + tojson(db.currentOp()); });   thread2 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 1); thread2.start();   // XXX: Wait a little bit for thread2 to have sent its insert command to the server or we // otherwise won't trigger the bug. sleep(5000); } finally { // We run the fsyncUnlock command in a finally block to avoid leaving the server fsyncLock'd // if the test were to fail. assert.commandWorked(db.fsyncUnlock()); }   thread1.join(); thread2.join();   assert.commandWorked(thread1.returnData()); assert.commandWorked(thread2.returnData());   rst.stopSet(); })();
    • Sprint:
      Repl 2018-07-30, Repl 2018-08-13
    • Linked BF Score:
      58

      Description

      The changes from 1447252 as part of SERVER-35173 made it so we acquire the Client::_lock before calling SessionCatalog::checkOutSession(). However, if

      pred = [&sri]() { return !sri->checkedOut; }

      isn't immediately satisfied, then we'll attempt to acquire the Client::_lock a second time while already holding it.

      auto& checkedOutSession = operationSessionDecoration(opCtx);
      if (!checkedOutSession) {
          auto sessionTransactionTable = SessionCatalog::get(opCtx);
          // We acquire a Client lock here to guard the construction of this session so that
          // references to this session are safe to use while the lock is held.
          stdx::lock_guard<Client> lk(*opCtx->getClient());
          checkedOutSession.emplace(sessionTransactionTable->checkOutSession(opCtx));
      

      https://github.com/mongodb/mongo/blob/026f69dbf4f98e91b499bde5cb4ce73c332e9549/src/mongo/db/session_catalog.cpp#L257-L260

      // Wait until the session is no longer checked out
      opCtx->waitForConditionOrInterrupt(
          sri->availableCondVar, ul, [&sri]() { return !sri->checkedOut; });
      

      https://github.com/mongodb/mongo/blob/026f69dbf4f98e91b499bde5cb4ce73c332e9549/src/mongo/db/session_catalog.cpp#L132-L134

      Note: The changes from SERVER-35173 exist only on the master branch and this issue therefore does not affect 4.0 or earlier branches.

        Attachments

          Issue Links

            Activity

              People

              • Votes:
                0 Vote for this issue
                Watchers:
                8 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: