diff --git a/jstests/replsets/refresh_session_prepare_crash.js b/jstests/replsets/refresh_session_prepare_crash.js new file mode 100644 index 0000000000..cc8b071e37 --- /dev/null +++ b/jstests/replsets/refresh_session_prepare_crash.js @@ -0,0 +1,90 @@ +/** + * Tests readConcern level snapshot outside of transactions. + * + * @tags: [ + * requires_fcv_46, + * requires_majority_read_concern, + * ] + */ +(function() { +"use strict"; + +load("jstests/core/txns/libs/prepare_helpers.js"); +load("jstests/replsets/rslib.js"); // For reconnect() +load("jstests/libs/parallel_shell_helpers.js"); +load("jstests/libs/fail_point_util.js"); + +const replTest = new ReplSetTest({nodes: 2}); +replTest.startSet({setParameter: {logComponentVerbosity: tojson({storage: 3})}}); +replTest.initiate(); + +const dbName = "test"; +const collName = "coll"; +const primary = replTest.getPrimary(); +const newPrimary = replTest.getSecondary(); + +const testDB = primary.getDB(dbName); +testDB.dropDatabase(); +assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}})); + +const session = primary.startSession({causalConsistency: false}); +const sessionID = session.getSessionId(); +jsTestLog("DLZ started a new session " + tojson(sessionID)); + +let failPoint = configureFailPoint(primary, "hangBeforeSessionCheckOut"); + +const txnFunc = function(sessionID) { + load("jstests/core/txns/libs/prepare_helpers.js"); + jsTestLog("DLZ Starting a new transaction on session " + tojson(sessionID)); + const session = PrepareHelpers.createSessionWithGivenId(db.getMongo(), sessionID); + const sessionDB = session.getDatabase("test"); + const sessionColl = sessionDB.getCollection("coll"); + session.startTransaction({writeConcern: {w: "majority"}}); + assert.commandWorked(sessionDB.runCommand({find: "test", readConcern: {level: "snapshot"}})); + assert.commandWorked(session.commitTransaction_forTesting()); +}; +const waitForTxnShell = startParallelShell(funWithArgs(txnFunc, sessionID), primary.port); +failPoint.wait(); + +jsTestLog("DLZ stepping up " + newPrimary); +replTest.stepUp(newPrimary); +assert.eq(replTest.getPrimary(), newPrimary, "Primary didn't change."); +jsTestLog("DLZ stepped up " + newPrimary); + +const prepareTxnFunc = function(sessionID) { + load("jstests/core/txns/libs/prepare_helpers.js"); + jsTestLog("DLZ Starting a new transaction on session " + tojson(sessionID)); + const newPrimaryDB = db.getMongo().getDB("test"); + jsTestLog( + "DLZ Run a transaction with txnNumber 10 and the same session ID on the new primary."); + assert.commandWorked(newPrimaryDB.runCommand({ + insert: "coll", + documents: [{c: 1}], + lsid: sessionID, + txnNumber: NumberLong(10), + startTransaction: true, + autocommit: false + })); + let res = assert.commandWorked(newPrimaryDB.adminCommand({ + prepareTransaction: 1, + lsid: sessionID, + txnNumber: NumberLong(10), + autocommit: false, + writeConcern: {w: "majority"} + })); + jsTestLog("DLZ prepared on new primary with prepareTimestamp " + tojson(res.prepareTimestamp)); +}; +let applyFailPoint = configureFailPoint(primary, "hangBeforeSessionCheckOutForApplyPrepare"); +const waitForPrepareTxnShell = + startParallelShell(funWithArgs(prepareTxnFunc, sessionID), newPrimary.port); +applyFailPoint.wait(); +failPoint.off(); +sleep(10000); +applyFailPoint.off(); + +waitForPrepareTxnShell(); + +waitForTxnShell(); + +replTest.stopSet(); +})(); diff --git a/src/mongo/db/kill_sessions_local.cpp b/src/mongo/db/kill_sessions_local.cpp index d1570e2f2e..3005ac41ce 100644 --- a/src/mongo/db/kill_sessions_local.cpp +++ b/src/mongo/db/kill_sessions_local.cpp @@ -232,6 +232,9 @@ void invalidateSessionsForStepdown(OperationContext* opCtx) { [](OperationContext* killerOpCtx, const SessionToKill& session) { auto txnParticipant = TransactionParticipant::get(session); if (!txnParticipant.transactionIsPrepared()) { + LOGV2(82099, + "DLZ invalidating txnParticipant", + "sessionID"_attr = session.getSessionId()); txnParticipant.invalidate(killerOpCtx); } }, diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 2b19a51c91..fa82da8d57 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -1022,7 +1022,7 @@ Status applyOperation_inlock(OperationContext* opCtx, // Get the single oplog entry to be applied or the first oplog entry of grouped inserts. auto op = opOrGroupedInserts.getOp(); LOGV2_DEBUG(21254, - 3, + 0, "applying op (or grouped inserts): {op}, oplog application mode: " "{oplogApplicationMode}", "Applying op (or grouped inserts)", diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index 67fb840de6..ae2e5f7da1 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -59,6 +59,8 @@ MONGO_FAIL_POINT_DEFINE(skipReconstructPreparedTransactions); // conflict error. MONGO_FAIL_POINT_DEFINE(applyPrepareTxnOpsFailsWithWriteConflict); +MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOutForApplyPrepare); + // Apply the oplog entries for a prepare or a prepared commit during recovery/initial sync. Status _applyOperationsForTransaction(OperationContext* opCtx, const std::vector& ops, @@ -417,6 +419,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx, // The write on transaction table may be applied concurrently, so refreshing state // from disk may read that write, causing starting a new transaction on an existing // txnNumber. Thus, we start a new transaction without refreshing state from disk. + hangBeforeSessionCheckOutForApplyPrepare.pauseWhileSet(); MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx); auto txnParticipant = TransactionParticipant::get(opCtx); diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index ab449e2b9a..0e837c2b1f 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -113,6 +113,7 @@ MONGO_FAIL_POINT_DEFINE(sleepMillisAfterCommandExecutionBegins); MONGO_FAIL_POINT_DEFINE(waitAfterNewStatementBlocksBehindPrepare); MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution); MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand); +MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOut); // Tracks the number of times a legacy unacknowledged write failed due to // not primary error resulted in network disconnection. @@ -832,6 +833,7 @@ Future InvokeCommand::SessionCheckoutPath::_checkOutSession() { // This constructor will check out the session. It handles the appropriate state management // for both multi-statement transactions and retryable writes. Currently, only requests with // a transaction number will check out the session. + hangBeforeSessionCheckOut.pauseWhileSet(); _sessionTxnState = std::make_unique(opCtx); _txnParticipant.emplace(TransactionParticipant::get(opCtx)); diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 24b925280c..d95f2cd460 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -2180,12 +2180,17 @@ void TransactionParticipant::Participant::refreshFromStorageIfNeeded(OperationCo invariant(!opCtx->getClient()->isInDirectClient()); invariant(!opCtx->lockState()->isLocked()); - if (p().isValid) + LOGV2(82000, "DLZ calling refreshFromStorageIfNeeded", "isValid"_attr = p().isValid); + if (p().isValid) { + LOGV2(82001, "DLZ refreshFromStorageIfNeeded p().isValid"); return; + } + LOGV2(82002, "DLZ calling fetchActiveTransactionHistory"); auto activeTxnHistory = fetchActiveTransactionHistory(opCtx, _sessionId()); const auto& lastTxnRecord = activeTxnHistory.lastTxnRecord; if (lastTxnRecord) { + LOGV2(82003, "DLZ got lastTxnRecord", "lastTxnRecord"_attr = lastTxnRecord->toBSON()); stdx::lock_guard lg(*opCtx->getClient()); o(lg).activeTxnNumber = lastTxnRecord->getTxnNum(); o(lg).lastWriteOpTime = lastTxnRecord->getLastWriteOpTime(); @@ -2216,6 +2221,8 @@ void TransactionParticipant::Participant::refreshFromStorageIfNeeded(OperationCo MONGO_UNREACHABLE; } } + } else { + LOGV2(82004, "DLZ no lastTxnRecord"); } p().isValid = true;