From: Preeti Murthy Date: Mon, 7 Oct 2024 10:23:23 +0530 Subject: [PATCH] Add JS test to simulate a case where majority secondaries enter rollback state during a failover where old primary is frozen temporarily. --- jstests/replsets/split_brain.js | 83 +++++++++++++++++++ .../db/repl/replication_coordinator_impl.cpp | 12 +++ 2 files changed, 95 insertions(+) create mode 100644 jstests/replsets/split_brain.js diff --git a/jstests/replsets/split_brain.js b/jstests/replsets/split_brain.js new file mode 100644 index 00000000000..2356124166a --- /dev/null +++ b/jstests/replsets/split_brain.js @@ -0,0 +1,83 @@ +/** + * Test that majority secondaries go into rollback state when a failover happens + * and the old primary is frozen. + * This however does not cause data loss and hence is not a problem. But this renders + * the cluster unavailable for several minutes. + * + * This test requires we add sleeps in some parts of the server code to help disconnect and reconnect nodes. + * Not meant to be merged but for raising a JIRA with mongo to show this behavior. + * https://jira.mongodb.org/browse/SERVER-95560 + */ +(function() { +"use strict"; + +load("jstests/core/txns/libs/prepare_helpers.js"); +load("jstests/libs/storage_helpers.js"); +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/write_concern_util.js"); // for stopReplicationOnSecondaries. +load("jstests/aggregation/extras/utils.js"); +load("jstests/libs/parallel_shell_helpers.js"); + +const replTest = new ReplSetTest({nodes: 3, useBridge: true, settings: {electionTimeoutMillis: 30000, +catchUpTimeoutMillis: 10000, chainingAllowed: false}}); +const nodes = replTest.startSet(); +replTest.initiate(); +replTest.awaitNodesAgreeOnPrimary(); + +const primary = replTest.getPrimary(); +jsTestLog('1: Primary is ' + primary); +const secondaryToDisconnect = replTest.getSecondary();; +const secondaryToRollback = replTest.getSecondaries()[1]; + +// Insert one document in a collection. +const testDB = primary.getDB("test"); +const mongos = new Mongo(primary.host); +mongos.getDB("test").runCommand({ + insert: "test-collection", + documents: [{a:0, b: "first-insert"}], +}); +jsTestLog("Wrote document"); + +jsTestLog("2: Create a network partition between primary and one secondary to trigger an election"); +primary.disconnect(secondaryToDisconnect); + +jsTestLog("3: Wait for election to succeed "); +checkLog.contains(secondaryToDisconnect, "Election succeeded, assuming primary role"); + +// This helps simulate a case where stuck writes to the old primary progress during +// the catch up phase and the new primary(secondaryToDisconnect) misses seeing these writes +// There is a sleep in the source code which gives us time to disconnect. +secondaryToDisconnect.disconnect(secondaryToRollback); + +jsTestLog("5: Perform writes during catch up"); +mongos.getDB("test").runCommand({ + insert: "test-collection2", + documents: [{a:0, b: "first-insert"}], +}); + +checkLog.contains(secondaryToDisconnect, "Exited primary catch-up mode"); + +secondaryToRollback.reconnect(secondaryToDisconnect); + +checkLog.contains(secondaryToDisconnect, "Transition to primary complete; database writes are now permitted"); + + +jsTestLog("7: Reconnect old and new primary"); +primary.reconnect(secondaryToDisconnect); + +replTest.waitForState(secondaryToDisconnect, ReplSetTest.State.PRIMARY); + +jsTestLog("Waiting on secondaries to enter rollback"); +checkLog.contains(secondaryToRollback, "Starting rollback due to fetcher error"); + +// Old primary(now secondary) enters rollback. +checkLog.contains(primary, "Starting rollback due to fetcher error"); + +jsTestLog("Waiting on secondary to exit rollback"); +replTest.waitForState(secondaryToRollback, ReplSetTest.State.SECONDARY); + +jsTestLog("8: Waiting on old primary to step down"); +replTest.waitForState(primary, ReplSetTest.State.SECONDARY); +replTest.stopSet(); +})(); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 2a5126b1be7..62fa58f7638 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -4840,6 +4840,10 @@ void ReplicationCoordinatorImpl::_onFollowerModeStateChange() { void ReplicationCoordinatorImpl::CatchupState::start_inlock() { LOGV2(21359, "Entering primary catch-up mode"); + // https://jira.mongodb.org/browse/SERVER-95560 + // TOREMOVE : Added for jstest/replsets/split_brain.js to delay the primary that is + // stepping up from seeing writes making progress during the catch up phase. + sleepFor(Milliseconds{12000}); // Reset the number of catchup operations performed before starting catchup. _numCatchUpOps = 0; @@ -4897,6 +4901,10 @@ void ReplicationCoordinatorImpl::CatchupState::abort_inlock(PrimaryCatchUpConclu .incrementNumCatchUpsConcludedForReason(reason); LOGV2(21363, "Exited primary catch-up mode"); + // https://jira.mongodb.org/browse/SERVER-95560 + // TOREMOVE: Added for jstest/replsets/split_brain.js to buy us time to reconnect the stepping up + // primary to rest of the cluster. Else it will relinquish primary seeing no quorum. + sleepFor(Milliseconds{12000}); // Clean up its own members. if (_timeoutCbh) { _repl->_replExecutor->cancel(_timeoutCbh); @@ -6138,6 +6146,10 @@ EventHandle ReplicationCoordinatorImpl::_updateTerm_inlock( _pendingTermUpdateDuringStepDown = term; } if (_topCoord->prepareForUnconditionalStepDown()) { + // https://jira.mongodb.org/browse/SERVER-95560 + // TOREMOVE - Added for jstests/replsets/split_brain.js so that the primary stepping + // down does not see the new term in time to prevent writes going through.. + sleepFor(Milliseconds{40000}); LOGV2(21402, "stepping down from primary, because a new term has begun: {term}", "Stepping down from primary, because a new term has begun", -- 2.46.2