[SERVER-77137] A cluster can join nodes of another cluster Created: 15/May/23  Updated: 01/Feb/24  Resolved: 01/Feb/24

Status: Closed
Project: Core Server
Component/s: Replication
Affects Version/s: 4.2.8
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: mingming tang Assignee: Xuerui Fa
Resolution: Works as Designed Votes: 0
Labels: Bug
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Related
is related to DOCS-16624 [Server] Specify that a node in a rep... External Review
Assigned Teams:
Replication
Operating System: ALL
Steps To Reproduce:

I have two clusters that have the same replSet name and keyFile, for example:

CLUSTER_A:

ydmongo:PRIMARY> rs.status()
{
        "set" : "ydmongo",
        ...
        "members" : [
                {
                        "_id" : 0,
                        "name" : "192.168.111.10:21000",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        ...
                },
                {
                        "_id" : 1,
                        "name" : "192.168.111.10:22000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                },
                {
                        "_id" : 2,
                        "name" : "192.168.111.10:23000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                }
        ],
        ...
}

CLUSTER_B:

ydmongo:PRIMARY> rs.status()
{
        "set" : "ydmongo",
        ...
        "members" : [
                {
                        "_id" : 0,
                        "name" : "192.168.111.10:11000",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        ...
                },
                {
                        "_id" : 1,
                        "name" : "192.168.111.10:12000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                },
                {
                        "_id" : 2,
                        "name" : "192.168.111.10:13000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                }
        ],
        ...
}

Now, I add the node in CLUSTER_B to CLUSTER_A, and it returns success

ydmongo:PRIMARY> rs.add(\{host:"192.168.111.10:22000"})
{
        "ok" : 1,
        "$clusterTime" : {
                "clusterTime" : Timestamp(1684195464, 1),
                "signature" : {
                        "hash" : BinData(0,"6aQiJetFIK4SAwtd9a+Hx+LxvGk="),
                        "keyId" : NumberLong("7233562191683649540")
                }
        },
        "operationTime" : Timestamp(1684195464, 1)
}
ydmongo:PRIMARY> rs.status()
{
        "set" : "ydmongo",
        ...
        "members" : [
                {
                        "_id" : 0,
                        "name" : "192.168.111.10:11000",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        ...
                },
                {
                        "_id" : 1,
                        "name" : "192.168.111.10:12000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                },
                {
                        "_id" : 2,
                        "name" : "192.168.111.10:13000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                },
                {
                        "_id" : 3,
                        "name" : "192.168.111.10:22000",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        ...
                }
        ],
        ..
}

The related content displayed in the log is

CLUSTER_A

2023-05-16T08:04:24.730+0800 I  REPL     [conn28] replSetReconfig admin command received from client; new config: { _id: "ydmongo", version: 4, protocolVersion: 1, writeConcernMajorityJo
urnalDefault: true, members: [ \{ _id: 0, host: "192.168.111.10:11000", arbiterOnly: false, buildIndexes: true, hidden: false, priority: 1.0, tags: {}, slaveDelay: 0, votes: 1 }, { _id: 1
, host: "192.168.111.10:12000", arbiterOnly: false, buildIndexes: true, hidden: false, priority: 1.0, tags: {}, slaveDelay: 0, votes: 1 }, { _id: 2, host: "192.168.111.10:13000", arbiter
Only: false, buildIndexes: true, hidden: false, priority: 1.0, tags: {}, slaveDelay: 0, votes: 1 }, \{ host: "192.168.111.10:22000", _id: 3.0 } ], settings: { chainingAllowed: true, heart
beatIntervalMillis: 2000, heartbeatTimeoutSecs: 10, electionTimeoutMillis: 10000, catchUpTimeoutMillis: -1, catchUpTakeoverDelayMillis: 30000, getLastErrorModes: {}, getLastErrorDefaults
: \{ w: 1, wtimeout: 0 }, replicaSetId: ObjectId('6462c67d1c82d8f677e4e92d') } }
2023-05-16T08:04:24.865+0800 I  REPL     [conn28] Scheduling remote command request for reconfig quorum check: RemoteCommand 481 -- target:192.168.111.10:22000 db:admin cmd:{ replSetHear
tbeat: "ydmongo", configVersion: 4, hbv: 1, from: "192.168.111.10:11000", fromId: 0, term: 1 }
2023-05-16T08:04:24.865+0800 I  CONNPOOL [Replication] Connecting to 192.168.111.10:22000
2023-05-16T08:04:24.866+0800 I  REPL     [conn28] New replica set config in use: { _id: "ydmongo", version: 4, protocolVersion: 1, writeConcernMajorityJournalDefault: true, members: [ { 
_id: 0, host: "192.168.111.10:11000", arbiterOnly: false, buildIndexes: true, hidden: false, priority: 1.0, tags: {}, slaveDelay: 0, votes: 1 }, { _id: 1, host: "192.168.111.10:12000", a
rbiterOnly: false, buildIndexes: true, hidden: false, priority: 1.0, tags: {}, slaveDelay: 0, votes: 1 }, { _id: 2, host: "192.168.111.10:13000", arbiterOnly: false, buildIndexes: true, 
hidden: false, priority: 1.0, tags: {}, slaveDelay: 0, votes: 1 }, \{ _id: 3, host: "192.168.111.10:22000", arbiterOnly: false, buildIndexes: true, hidden: false, priority: 1.0, tags: {},
 slaveDelay: 0, votes: 1 } ], settings: { chainingAllowed: true, heartbeatIntervalMillis: 2000, heartbeatTimeoutSecs: 10, electionTimeoutMillis: 10000, catchUpTimeoutMillis: -1, catchUpT
akeoverDelayMillis: 30000, getLastErrorModes: {}, getLastErrorDefaults: \{ w: 1, wtimeout: 0 }, replicaSetId: ObjectId('6462c67d1c82d8f677e4e92d') } }
2023-05-16T08:04:24.953+0800 I  REPL_HB  [replexec-7] Heartbeat to 192.168.111.10:22000 failed after 2 retries, response status: InvalidReplicaSetConfig: replica set IDs do not match, ou
rs: 6462c67d1c82d8f677e4e92d; remote node's: 6462c6d5b1e1bdd62920f68d
2023-05-16T08:04:24.953+0800 I  REPL     [replexec-7] Member 192.168.111.10:22000 is now in state RS_DOWN - replica set IDs do not match, ours: 6462c67d1c82d8f677e4e92d; remote node's: 6
462c6d5b1e1bdd62920f68d
2023-05-16T08:04:26.954+0800 I  REPL_HB  [replexec-5] Heartbeat to 192.168.111.10:22000 failed after 2 retries, response status: InvalidReplicaSetConfig: replica set IDs do not match, ou
rs: 6462c67d1c82d8f677e4e92d; remote node's: 6462c6d5b1e1bdd62920f68d
2023-05-16T08:04:28.955+0800 I  REPL_HB  [replexec-2] Heartbeat to 192.168.111.10:22000 failed after 2 retries, response status: InvalidReplicaSetConfig: replica set IDs do not match, ou
rs: 6462c67d1c82d8f677e4e92d; remote node's: 6462c6d5b1e1bdd62920f68d

CLUSTER_B

2023-05-16T08:04:23.949+0800 I  ACCESS   [conn23] Successfully authenticated as principal __system on local from client 192.168.111.10:36978
2023-05-16T08:04:23.949+0800 I  ACCESS   [conn22] Successfully authenticated as principal __system on local from client 192.168.111.10:36976
2023-05-16T08:04:23.995+0800 I  ACCESS   [conn22] Successfully authenticated as principal __system on local from client 192.168.111.10:36976
2023-05-16T08:04:24.048+0800 I  ACCESS   [conn22] Successfully authenticated as principal __system on local from client 192.168.111.10:36976
2023-05-16T08:04:24.092+0800 I  ACCESS   [conn22] Successfully authenticated as principal __system on local from client 192.168.111.10:36976
2023-05-16T08:04:28.280+0800 I  ACCESS   [conn25] Successfully authenticated as principal __system on local from client 192.168.111.10:36986
2023-05-16T08:04:28.281+0800 I  ACCESS   [conn26] Successfully authenticated as principal __system on local from client 192.168.111.10:36988
2023-05-16T08:04:28.330+0800 I  ACCESS   [conn25] Successfully authenticated as principal __system on local from client 192.168.111.10:36986
2023-05-16T08:04:28.378+0800 I  ACCESS   [conn25] Successfully authenticated as principal __system on local from client 192.168.111.10:36986
2023-05-16T08:04:28.426+0800 I  ACCESS   [conn25] Successfully authenticated as principal __system on local from client 192.168.111.10:36986

Meanwhile, the result of db.isMaster() is

CLUSTER_A:

ydmongo:PRIMARY> db.isMaster()
{
        "hosts" : [
                "192.168.111.10:11000",
                "192.168.111.10:12000",
                "192.168.111.10:13000",
                "192.168.111.10:22000"
        ],
        "setName" : "ydmongo",
        "setVersion" : 4,
        "ismaster" : true,
        "secondary" : false,
        "primary" : "192.168.111.10:11000",
        "me" : "192.168.111.10:11000",
        ...
}

CLUSTER_B

ydmongo:PRIMARY> db.isMaster()
{
        "hosts" : [
                "192.168.111.10:21000",
                "192.168.111.10:22000",
                "192.168.111.10:23000"
        ],
        "setName" : "ydmongo",
        "setVersion" : 3,
        "ismaster" : true,
        "secondary" : false,
        "primary" : "192.168.111.10:21000",
        "me" : "192.168.111.10:21000",
        ...
}

Now I'm going to add the primary node of CLUSTER_B to CLUSTER_A

ydmongo:PRIMARY> rs.add(\{host:"192.168.111.10:21000"})
{
        "ok" : 1,
        "$clusterTime" : {
                "clusterTime" : Timestamp(1684195779, 2),
                "signature" : {
                        "hash" : BinData(0,"vT+HNT9PPsdYcxzNPNCpPF1WlLE="),
                        "keyId" : NumberLong("7233562191683649540")
                }
        },
        "operationTime" : Timestamp(1684195779, 2)
}
ydmongo:PRIMARY> rs.status()
{
        "set" : "ydmongo",
        ...
        "members" : [
                {
                        "_id" : 0,
                        "name" : "192.168.111.10:11000",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        ...
                },
                {
                        "_id" : 1,
                        "name" : "192.168.111.10:12000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                },
                {
                        "_id" : 2,
                        "name" : "192.168.111.10:13000",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        ...
                },
                {
                        "_id" : 3,
                        "name" : "192.168.111.10:22000",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        ...
                },
                {
                        "_id" : 4,
                        "name" : "192.168.111.10:21000",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        ...
                }
        ],
        ...
}
ydmongo:PRIMARY> db.isMaster()
{
        "hosts" : [
                "192.168.111.10:11000",
                "192.168.111.10:12000",
                "192.168.111.10:13000",
                "192.168.111.10:22000",
                "192.168.111.10:21000"
        ],
        "setName" : "ydmongo",
        "setVersion" : 5,
        "ismaster" : true,
        "secondary" : false,
        "primary" : "192.168.111.10:11000",
        "me" : "192.168.111.10:11000",
        ...
}

From the official documentation, we learn that the driver detects the replica set topology via db.isMaster().
https://www.mongodb.com/docs/v4.2/reference/command/isMaster/
As a result, connections from CLUSTER_A may be diverted to CLUSTER_B
 
JUST LIKE THIS:

{color:#101214}[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:05,555]|@|[org.mongodb.driver.cluster]|@|[]|@|Monitor thread successfully connected to server with description ServerDescription\{address=10.100.3.220:28020, type=REPLICA_SET_SECONDARY, state=CONNECTED, ok=true, version=ServerVersion{versionList=[4, 2, 8]}, minWireVersion=0, maxWireVersion=8, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=1166127, setName='ydmongo', canonicalAddress=10.100.3.220:28020, hosts=[10.100.3.220:28020, 10.223.209.56:28022, 10.100.3.221:28021, 10.100.3.222:28022], passives=[], arbiters=[], primary='10.100.3.221:28021', tagSet=TagSet\{[]}, electionId=null, setVersion=4, lastWriteDate=Fri May 12 15:24:05 CST 2023, lastUpdateTimeNanos=66032399775199055}|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:06,979]|@|[org.mongodb.driver.cluster]|@|[]|@|Monitor thread successfully connected to server with description ServerDescription\{address=10.100.3.222:28022, type=REPLICA_SET_SECONDARY, state=CONNECTED, ok=true, version=ServerVersion{versionList=[4, 2, 8]}, minWireVersion=0, maxWireVersion=8, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=948119, setName='ydmongo', canonicalAddress=10.100.3.222:28022, hosts=[10.100.3.220:28020, 10.223.209.56:28022, 10.100.3.221:28021, 10.100.3.222:28022], passives=[], arbiters=[], primary='10.100.3.221:28021', tagSet=TagSet\{[]}, electionId=null, setVersion=4, lastWriteDate=Fri May 12 15:24:06 CST 2023, lastUpdateTimeNanos=66032401200132431}|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,374]|@|[org.mongodb.driver.cluster]|@|[]|@|Monitor thread successfully connected to server with description ServerDescription\{address=10.100.3.221:28021, type=REPLICA_SET_PRIMARY, state=CONNECTED, ok=true, version=ServerVersion{versionList=[4, 2, 8]}, minWireVersion=0, maxWireVersion=8, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=959352, setName='ydmongo', canonicalAddress=10.100.3.221:28021, hosts=[10.100.3.220:28020, 10.223.209.56:28022, 10.100.3.221:28021, 10.100.3.222:28022], passives=[], arbiters=[], primary='10.100.3.221:28021', tagSet=TagSet\{[]}, electionId=7fffffff0000000000000054, setVersion=4, lastWriteDate=Fri May 12 15:24:06 CST 2023, lastUpdateTimeNanos=66032401594541213}|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,374]|@|[org.mongodb.driver.cluster]|@|[]|@|Adding discovered server 10.223.209.56:28022 to client view of cluster|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,375]|@|[org.mongodb.driver.cluster]|@|[]|@|Setting max set version to 4 from replica set primary 10.100.3.221:28021|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,379]|@|[org.mongodb.driver.connection]|@|[]|@|Opened connection [connectionId\{localValue:10, serverValue:162}] to 10.223.209.56:28022|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,381]|@|[org.mongodb.driver.cluster]|@|[]|@|Monitor thread successfully connected to server with description ServerDescription\{address=10.223.209.56:28022, type=REPLICA_SET_PRIMARY, state=CONNECTED, ok=true, version=ServerVersion{versionList=[4, 2, 8]}, minWireVersion=0, maxWireVersion=8, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=920667, setName='ydmongo', canonicalAddress=10.223.209.56:28022, hosts=[10.223.209.56:28022, 10.223.209.54:28020, 10.223.209.55:28021], passives=[], arbiters=[], primary='10.223.209.56:28022', tagSet=TagSet\{[]}, electionId=7fffffff0000000000000004, setVersion=1, lastWriteDate=Fri May 12 15:24:06 CST 2023, lastUpdateTimeNanos=66032401601568290}|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,381]|@|[org.mongodb.driver.cluster]|@|[]|@|Adding discovered server 10.223.209.54:28020 to client view of cluster|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,381]|@|[org.mongodb.driver.cluster]|@|[]|@|Adding discovered server 10.223.209.55:28021 to client view of cluster|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,381]|@|[org.mongodb.driver.cluster]|@|[]|@|Server 10.100.3.220:28020 is no longer a member of the replica set.  Removing from client view of cluster.|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,382]|@|[org.mongodb.driver.cluster]|@|[]|@|Server 10.100.3.221:28021 is no longer a member of the replica set.  Removing from client view of cluster.|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,384]|@|[org.mongodb.driver.connection]|@|[]|@|Closed connection [connectionId\{localValue:5, serverValue:1843726}] to 10.100.3.221:28021 because the pool has been closed.|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,384]|@|[org.mongodb.driver.connection]|@|[]|@|Closed connection [connectionId\{localValue:7, serverValue:1850541}] to 10.100.3.221:28021 because the pool has been closed.|@|
[INFO ]|@|[]|@|[]|@|[2023-05-12 15:24:07,384]|@|[org.mongodb.driver.connection]|@|[]|@|Closed connection [connectionId\{localValue:4, serverValue:1842210}] to 10.100.3.221:28021 because the pool has been closed.|@|{color}

We know that this is abnormal 

Hope to receive your reply soon, thank you.

Sprint: Repl 2024-02-05
Participants:

 Description   

I have two clusters that have the same replSet name and keyFile.

 

When I added the node in CLUSTER_B to CLUSTER_A, it returned success and the node in CLUSTER_B was in the db.isMaster() list of CLUSTER_A nodes!



 Comments   
Comment by Chris Kelly [ 11/Jul/23 ]

Hi mingming,

Thanks for your report, and your patience on the reply. At glance, this looks like undefined behavior. You are getting this error after you add the node from the other replica set:

InvalidReplicaSetConfig: replica set IDs do not match, ours: 6462c67d1c82d8f677e4e92d; remote node's: 6462c6d5b1e1bdd62920f68d

To be clear, we do not support adding a node from one replica set into another replica set at the same time. If this is a feature that you believe in, you can help get this on the radar by submitting a feature request to our MongoDB Feedback Engine to advocate for this change.

It sounds like you are suggesting that we do not display (not reachable/healthy) nodes like this in isMaster() or hello because of the potential for it to mislead drivers into connecting to a different replica set, if a node from Cluster A is in Cluster B (and you then connect to Cluster B). I will pass this to the relevant team to comment on this behavior.

As an aside, MongoDB 4.2 reached end of life in April 2023 and is no longer supported - I would recommend upgrading to 4.4 or newer.

Thank you!

Comment by mingming tang [ 15/May/23 ]

Sorry, I misdescribed the cluster role at the beginning. This can lead to certain misunderstandings. Let me correct it here.

 

CLUSTER_A:
ydmongo:PRIMARY> rs.status()
{
        "set" : "ydmongo",
        ...
        "members" : [
               

{                         "_id" : 0,                         "name" : "192.168.111.10:11000",                         "health" : 1,                         "state" : 1,                         "stateStr" : "PRIMARY",                         ...                 }

,
               

{                         "_id" : 1,                         "name" : "192.168.111.10:12000",                         "health" : 1,                         "state" : 2,                         "stateStr" : "SECONDARY",                         ...                 }

,
               

{                         "_id" : 2,                         "name" : "192.168.111.10:13000",                         "health" : 1,                         "state" : 2,                         "stateStr" : "SECONDARY",                         ...                 }

        ],
        ...
}

CLUSTER_B:
ydmongo:PRIMARY> rs.status()
{
        "set" : "ydmongo",
        ...
        "members" : [
               

{                         "_id" : 0,                         "name" : "192.168.111.10:21000",                         "health" : 1,                         "state" : 1,                         "stateStr" : "PRIMARY",                         ...                 }

,
               

{                         "_id" : 1,                         "name" : "192.168.111.10:22000",                         "health" : 1,                         "state" : 2,                         "stateStr" : "SECONDARY",                         ...                 }

,
               

{                         "_id" : 2,                         "name" : "192.168.111.10:23000",                         "health" : 1,                         "state" : 2,                         "stateStr" : "SECONDARY",                         ...                 }

        ],
        ...
}

Generated at Thu Feb 08 06:34:38 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.