Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-36007

Attempting to check out an already checked out session leads to self-deadlock

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 4.1.2
    • Affects Version/s: None
    • Component/s: Sharding
    • Labels:
      None
    • Fully Compatible
    • ALL
    • Hide
      python buildscripts/resmoke.py --suites=no_server already_checked_out_session.js
      
      already_checked_out_session.js
      (function() {
          "use strict";
      
          load("jstests/libs/parallelTester.js");
      
          const rst = new ReplSetTest({nodes: 1});
          rst.startSet();
          rst.initiate();
      
          const primary = rst.getPrimary();
          const db = primary.getDB("test");
      
          function doInsertWithSession(host, lsid, txnNumber) {
              try {
                  const conn = new Mongo(host);
                  const db = conn.getDB("test");
                  assert.commandWorked(db.runCommand({
                      insert: "mycoll",
                      documents: [{_id: txnNumber}],
                      lsid: {id: eval(lsid)},
                      txnNumber: NumberLong(txnNumber),
                  }));
                  return {ok: 1};
              } catch (e) {
                  return {ok: 0, error: e.toString(), stack: e.stack};
              }
          }
      
          let thread1;
          let thread2;
      
          assert.commandWorked(db.fsyncLock());
          try {
              // JavaScript objects backed by C++ objects (e.g. BSON values) do not serialize correctly
              // when passed through the ScopedThread constructor. To work around this behavior, we
              // instead pass a stringified form of the JavaScript object through the ScopedThread
              // constructor and use eval() to rehydrate it.
              const lsid = UUID();
              thread1 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 0);
              thread1.start();
      
              assert.soon(
                  () => {
                      const ops = db.currentOp({"command.insert": "mycoll", waitingForLock: true});
                      return ops.inprog.length === 1;
                  },
                  () => {
                      return "insert operation was never found to be waiting for a lock: " +
                          tojson(db.currentOp());
                  });
      
              thread2 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 1);
              thread2.start();
      
              // XXX: Wait a little bit for thread2 to have sent its insert command to the server or we
              // otherwise won't trigger the bug.
              sleep(5000);
          } finally {
              // We run the fsyncUnlock command in a finally block to avoid leaving the server fsyncLock'd
              // if the test were to fail.
              assert.commandWorked(db.fsyncUnlock());
          }
      
          thread1.join();
          thread2.join();
      
          assert.commandWorked(thread1.returnData());
          assert.commandWorked(thread2.returnData());
      
          rst.stopSet();
      })();
      
      Show
      python buildscripts/resmoke.py --suites=no_server already_checked_out_session.js already_checked_out_session.js ( function () { "use strict" ; load( "jstests/libs/parallelTester.js" ); const rst = new ReplSetTest({nodes: 1}); rst.startSet(); rst.initiate(); const primary = rst.getPrimary(); const db = primary.getDB( "test" ); function doInsertWithSession(host, lsid, txnNumber) { try { const conn = new Mongo(host); const db = conn.getDB( "test" ); assert.commandWorked(db.runCommand({ insert: "mycoll" , documents: [{_id: txnNumber}], lsid: {id: eval(lsid)}, txnNumber: NumberLong(txnNumber), })); return {ok: 1}; } catch (e) { return {ok: 0, error: e.toString(), stack: e.stack}; } } let thread1; let thread2; assert.commandWorked(db.fsyncLock()); try { // JavaScript objects backed by C++ objects (e.g. BSON values) do not serialize correctly // when passed through the ScopedThread constructor. To work around this behavior, we // instead pass a stringified form of the JavaScript object through the ScopedThread // constructor and use eval() to rehydrate it. const lsid = UUID(); thread1 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 0); thread1.start(); assert.soon( () => { const ops = db.currentOp({ "command.insert" : "mycoll" , waitingForLock: true }); return ops.inprog.length === 1; }, () => { return "insert operation was never found to be waiting for a lock: " + tojson(db.currentOp()); }); thread2 = new ScopedThread(doInsertWithSession, primary.host, tojson(lsid), 1); thread2.start(); // XXX: Wait a little bit for thread2 to have sent its insert command to the server or we // otherwise won't trigger the bug. sleep(5000); } finally { // We run the fsyncUnlock command in a finally block to avoid leaving the server fsyncLock'd // if the test were to fail. assert.commandWorked(db.fsyncUnlock()); } thread1.join(); thread2.join(); assert.commandWorked(thread1.returnData()); assert.commandWorked(thread2.returnData()); rst.stopSet(); })();
    • Repl 2018-07-30, Repl 2018-08-13
    • 58

      The changes from 1447252 as part of SERVER-35173 made it so we acquire the Client::_lock before calling SessionCatalog::checkOutSession(). However, if

      pred = [&sri]() { return !sri->checkedOut; }

      isn't immediately satisfied, then we'll attempt to acquire the Client::_lock a second time while already holding it.

      auto& checkedOutSession = operationSessionDecoration(opCtx);
      if (!checkedOutSession) {
          auto sessionTransactionTable = SessionCatalog::get(opCtx);
          // We acquire a Client lock here to guard the construction of this session so that
          // references to this session are safe to use while the lock is held.
          stdx::lock_guard<Client> lk(*opCtx->getClient());
          checkedOutSession.emplace(sessionTransactionTable->checkOutSession(opCtx));
      

      https://github.com/mongodb/mongo/blob/026f69dbf4f98e91b499bde5cb4ce73c332e9549/src/mongo/db/session_catalog.cpp#L257-L260

      // Wait until the session is no longer checked out
      opCtx->waitForConditionOrInterrupt(
          sri->availableCondVar, ul, [&sri]() { return !sri->checkedOut; });
      

      https://github.com/mongodb/mongo/blob/026f69dbf4f98e91b499bde5cb4ce73c332e9549/src/mongo/db/session_catalog.cpp#L132-L134

      Note: The changes from SERVER-35173 exist only on the master branch and this issue therefore does not affect 4.0 or earlier branches.

            Assignee:
            jinny.byun Jinny Byun
            Reporter:
            max.hirschhorn@mongodb.com Max Hirschhorn
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

              Created:
              Updated:
              Resolved: