Loading...

Type: Bug
Resolution: Works as Designed
Priority: Major - P3
Fix Version/s: None
Affects Version/s: None
Component/s: Replication
Labels:
None

Assigned Teams:

Replication
Operating System:
ALL
Steps To Reproduce:
Hide

/* * This test demonstrates 2 primaries existing in the same replica set and both primaries * can satisfy majority write concern. * * Basically the test simulates below scenario * Note: 'P' refers primary, 'S' refers to secondary. * 1) [P, S0, S1, S2] // Start a 4 node replica set. * 2) Partition A: [P] Partition B: [S0->P, S1, S2] // Create n/w partition A & B. * 3) Partition A: [P] Partition B: [P, S1, S2, S3] // Add a new node S3 to Partition B. * 4) Partition A: [P, S2] Partition B: [P, S1, S3] // Restart/resync S2 and move back to partition A pool. * 5) Partition A: [P, S2, S4] Partition B: [P, S1, S3] // Add a new node S4 to Partition A. */ load('jstests/replsets/rslib.js'); (function() { 'use strict'; // Start a 4 node replica set. // [P, S0, S1, S2] const rst = new ReplSetTest({ nodes: [{}, {}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}], nodeOptions: {setParameter: {enableAutomaticReconfig: false}}, useBridge: true }); // Disable Chaining and disable automatic election from happening due to liveness timeout. var config = rst.getReplSetConfig(); config.settings = config.settings || {}; config.settings["chainingAllowed"] = false; config.settings["electionTimeoutMillis"] = ReplSetTest.kForeverMillis; rst.startSet(); rst.initiate(config); const dbName = jsTest.name(); const collName = "coll"; let primary1 = rst.getPrimary(); const primaryDB = primary1.getDB(dbName); const primaryColl = primaryDB[collName]; const secondaries = rst.getSecondaries(); jsTestLog("Do a document write"); assert.commandWorked(primaryColl.insert({_id: 1, x: 1}, {"writeConcern": {"w": 4}})); rst.awaitReplication(); // Create a n/w partition such that we result in this state [P] [S0, S1, S2]. jsTestLog("Disconnect primary1 from all secondaries"); primary1.disconnect([secondaries[0], secondaries[1], secondaries[2]]); jsTestLog("Make secondary0 to be become primary"); assert.commandWorked(secondaries[0].adminCommand({"replSetStepUp": 1})); // Now our network topology will be [P] [S0->P, S1, S2]. jsTestLog("Wait for secondary0 to become master"); checkLog.contains(secondaries[0], "Transition to primary complete"); let primary2 = secondaries[0]; jsTestLog("Adding a new voting node to the replica set"); const node5 = rst.add({ rsConfig: {priority: 0, votes: 1}, setParameter: { 'numInitialSyncAttempts': 1, 'enableAutomaticReconfig': false, } }); // Simulate this network topology [P] [P, S1, S2, S3]. node5.disconnect([primary1]); // Run a reconfig command on the primary 2 to add node 5. var config = rst.getReplSetConfigFromNode(1); var newConfig = rst.getReplSetConfig(); config.members = newConfig.members; config.version += 1; assert.adminCommandWorkedAllowingNetworkError( primary2, {replSetReconfig: config, maxTimeMS: ReplSetTest.kDefaultTimeoutMS}); // Make sure the new writes is able to propagate to the newly added node. jsTestLog("Do a document write on the primary2"); assert.commandWorked( primary2.getDB(dbName)[collName].insert({_id: 2, x: 2}, {"writeConcern": {"w": 4}})); // Now make sure, we get into this state [P, S2] [P, S1, S3]. jsTestLog("Disconnect Secondary2 from primary2 and reconnect to primary1"); secondaries[2].disconnect([secondaries[0], secondaries[1], node5]); secondaries[2].reconnect([primary1]); jsTestLog("Kill and restart Secondary2"); rst.stop(3, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true}); jsTestLog("Restarting the node."); var restartNode = rst.start(3, {startClean: true}, true); jsTestLog("wait for secondary state"); waitForState(restartNode, ReplSetTest.State.SECONDARY); jsTestLog("Adding a new voting node to the replica set"); const node6 = rst.add({ rsConfig: {priority: 0, votes: 1}, setParameter: { 'numInitialSyncAttempts': 1, 'enableAutomaticReconfig': false, } }); // Simulate this network topology [P, S2, S4] [P, S1, S3]. node6.disconnect([secondaries[0], secondaries[1], node5]); // Run a reconfig command on the primary1 to add node 6 config = rst.getReplSetConfigFromNode(0); newConfig = rst.getReplSetConfig(); // Only reset members. config.members[4] = newConfig.members[5]; config.version += 1; assert.adminCommandWorkedAllowingNetworkError( primary1, {replSetReconfig: config, maxTimeMS: ReplSetTest.kDefaultTimeoutMS}); jsTestLog( "Do some document writes to verify we have 2 primaries and both satisfy write concern majority"); assert.commandWorked(primary1.getDB(dbName)[collName].insert({_id: 3, x: "primary1 Doc"}, {"writeConcern": {"w": "majority"}})); assert.commandWorked(primary1.getDB(dbName)[collName].insert({_id: 4, x: "primary1 Doc"}, {"writeConcern": {"w": 3}})); assert.commandWorked(primary1.getDB(dbName)[collName].insert({_id: 5, x: "primary1 Doc"}, {"writeConcern": {"w": "majority"}})); assert.commandWorked(primary2.getDB(dbName)[collName].insert({_id: 6, x: "primary2 Doc"}, {"writeConcern": {"w": "majority"}})); jsTestLog("Verify our primary1 can be get re-elected."); assert.commandWorked(primary1.adminCommand({"replSetStepDown": 1000, "force": true})); assert.commandWorked(primary1.adminCommand({replSetFreeze: 0})); assert.commandWorked(primary1.adminCommand({"replSetStepUp": 1})); jsTestLog("Test completed"); rst.stopSet(); }());
Show
/* * This test demonstrates 2 primaries existing in the same replica set and both primaries * can satisfy majority write concern. * * Basically the test simulates below scenario * Note: 'P' refers primary, 'S' refers to secondary. * 1) [P, S0, S1, S2] // Start a 4 node replica set. * 2) Partition A: [P] Partition B: [S0->P, S1, S2] // Create n/w partition A & B. * 3) Partition A: [P] Partition B: [P, S1, S2, S3] // Add a new node S3 to Partition B. * 4) Partition A: [P, S2] Partition B: [P, S1, S3] // Restart/resync S2 and move back to partition A pool. * 5) Partition A: [P, S2, S4] Partition B: [P, S1, S3] // Add a new node S4 to Partition A. */ load('jstests/replsets/rslib.js'); (function() { 'use strict'; // Start a 4 node replica set. // [P, S0, S1, S2] const rst = new ReplSetTest({ nodes: [{}, {}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}], nodeOptions: {setParameter: {enableAutomaticReconfig: false}}, useBridge: true }); // Disable Chaining and disable automatic election from happening due to liveness timeout. var config = rst.getReplSetConfig(); config.settings = config.settings || {}; config.settings["chainingAllowed"] = false; config.settings["electionTimeoutMillis"] = ReplSetTest.kForeverMillis; rst.startSet(); rst.initiate(config); const dbName = jsTest.name(); const collName = "coll"; let primary1 = rst.getPrimary(); const primaryDB = primary1.getDB(dbName); const primaryColl = primaryDB[collName]; const secondaries = rst.getSecondaries(); jsTestLog("Do a document write"); assert.commandWorked(primaryColl.insert({_id: 1, x: 1}, {"writeConcern": {"w": 4}})); rst.awaitReplication(); // Create a n/w partition such that we result in this state [P] [S0, S1, S2]. jsTestLog("Disconnect primary1 from all secondaries"); primary1.disconnect([secondaries[0], secondaries[1], secondaries[2]]); jsTestLog("Make secondary0 to be become primary"); assert.commandWorked(secondaries[0].adminCommand({"replSetStepUp": 1})); // Now our network topology will be [P] [S0->P, S1, S2]. jsTestLog("Wait for secondary0 to become master"); checkLog.contains(secondaries[0], "Transition to primary complete"); let primary2 = secondaries[0]; jsTestLog("Adding a new voting node to the replica set"); const node5 = rst.add({ rsConfig: {priority: 0, votes: 1}, setParameter: { 'numInitialSyncAttempts': 1, 'enableAutomaticReconfig': false, } }); // Simulate this network topology [P] [P, S1, S2, S3]. node5.disconnect([primary1]); // Run a reconfig command on the primary 2 to add node 5. var config = rst.getReplSetConfigFromNode(1); var newConfig = rst.getReplSetConfig(); config.members = newConfig.members; config.version += 1; assert.adminCommandWorkedAllowingNetworkError( primary2, {replSetReconfig: config, maxTimeMS: ReplSetTest.kDefaultTimeoutMS}); // Make sure the new writes is able to propagate to the newly added node. jsTestLog("Do a document write on the primary2"); assert.commandWorked( primary2.getDB(dbName)[collName].insert({_id: 2, x: 2}, {"writeConcern": {"w": 4}})); // Now make sure, we get into this state [P, S2] [P, S1, S3]. jsTestLog("Disconnect Secondary2 from primary2 and reconnect to primary1"); secondaries[2].disconnect([secondaries[0], secondaries[1], node5]); secondaries[2].reconnect([primary1]); jsTestLog("Kill and restart Secondary2"); rst.stop(3, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true}); jsTestLog("Restarting the node."); var restartNode = rst.start(3, {startClean: true}, true); jsTestLog("wait for secondary state"); waitForState(restartNode, ReplSetTest.State.SECONDARY); jsTestLog("Adding a new voting node to the replica set"); const node6 = rst.add({ rsConfig: {priority: 0, votes: 1}, setParameter: { 'numInitialSyncAttempts': 1, 'enableAutomaticReconfig': false, } }); // Simulate this network topology [P, S2, S4] [P, S1, S3]. node6.disconnect([secondaries[0], secondaries[1], node5]); // Run a reconfig command on the primary1 to add node 6 config = rst.getReplSetConfigFromNode(0); newConfig = rst.getReplSetConfig(); // Only reset members. config.members[4] = newConfig.members[5]; config.version += 1; assert.adminCommandWorkedAllowingNetworkError( primary1, {replSetReconfig: config, maxTimeMS: ReplSetTest.kDefaultTimeoutMS}); jsTestLog( "Do some document writes to verify we have 2 primaries and both satisfy write concern majority"); assert.commandWorked(primary1.getDB(dbName)[collName].insert({_id: 3, x: "primary1 Doc"}, {"writeConcern": {"w": "majority"}})); assert.commandWorked(primary1.getDB(dbName)[collName].insert({_id: 4, x: "primary1 Doc"}, {"writeConcern": {"w": 3}})); assert.commandWorked(primary1.getDB(dbName)[collName].insert({_id: 5, x: "primary1 Doc"}, {"writeConcern": {"w": "majority"}})); assert.commandWorked(primary2.getDB(dbName)[collName].insert({_id: 6, x: "primary2 Doc"}, {"writeConcern": {"w": "majority"}})); jsTestLog("Verify our primary1 can be get re-elected."); assert.commandWorked(primary1.adminCommand({"replSetStepDown": 1000, "force": true})); assert.commandWorked(primary1.adminCommand({replSetFreeze: 0})); assert.commandWorked(primary1.adminCommand({"replSetStepUp": 1})); jsTestLog("Test completed"); rst.stopSet(); }());
Confidence Status:
None
Work Order:
3
CAR Domain/s:
None

Aha! Reference:
None
Tracking Level:
None
Risk Status:
None
Exec Notes:
None
Goal Name(s):
None
Goal Link:
None

While working on initial sync semantics upgrade downgrade piece, I found a scenario which can lead to 2 primaries in a replica set and both primaries can satisfy write concern "majority". It seems like a safe reconfig bug.
Below is the scenario. Assume 'P' is primary and 'S' indicates secondary and assume all the nodes we are dealing in the scenario are voters (votes:1).
1) Start a 4 node replica set A, B, C, D ==> [A(P), B (S), C(S), D(S)] , write/elect quorum = 3.
2) Create n/w partition X & Y Partition X: [A(P)] Partition Y: [B(S), C(S), D(S)].
3) Step up the node B Partition X: [A(P)] Partition Y: [B(P), C(S), D(S)].
4) Add a new node E to Partition Y using reconfig cmd. Partition X: [A(P)] Partition Y: [B(P), C(S), D(S) E(S)], write/elect quorum will still be 3.
5) Now, move node D to partition X pool and make it to restart and resync from node A and Partition X: [A(P), D(S)] Partition Y: [B(P), C(S), E(S)] .
6) Now, add a new node F to Partition X using reconfig cmd. Partition X: [A(P), D(S), F(S)] Partition Y: [B(P), C(S), E(S)]

To be noted, prerequisite for a reconfig cmd is that
the current config should be C~i~ majority committed and all committed entries in the previous config C~i-1~ should also be committed in the current config C~i~ . Since for node A, it's current config C~i~ is[A, B, C, D] (commit quorum = 3) which is majority committed and all committed entries in the previous config C~i-1~ is also committed (+ check quorum step -it's also able to contact majority of nodes A, D, F in the new config), node A was successfully able to run reconfig cmd by updating and persisting the new config document i.e., from [A,B, C, D] -> [A, B, C, D, F] and it's write/elect quorum will still be 3.

So, at end of this, partition X thinks it's config is [A, B, C, D, F] and partition Y thinks as [A, B, C, D, E]and A being the primary on partition X and B being the primary on partition Y.

Note: This problem can also be reproduced with initial sync semantics on. And, I have attached the jstest to demonstrate the problem.

- - Sort By Name
  - Sort By Date
  - Ascending
  - Descending
  - Thumbnails
  - List
  - Download All

2_primaries_with_initial_sync_semantics_on.js
6 kB
Apr 30 2020 10:55:52 PM UTC

is related to

SERVER-54746 Two primaries in a replica set can satisfy write concern "majority".

Closed

related to

SERVER-47363 Define correct use of MemberIds in Atlas workflow tests

Closed

Details

Description

Attachments

Attachments

Issue Links

Activity

People

Dates