-
Type: Bug
-
Resolution: Fixed
-
Priority: Major - P3
-
Affects Version/s: 3.3.2
-
Component/s: None
-
Environment:Node.js 10.6.3 on OSX
-
(copied to CRM)
-
Empty show more show less
The below script fails ~90% of the time given driver 3.3.2 and a replica set with one member down.
'use strict'; const mongodb = require('mongodb'); run().catch(error => console.log('Caught', error)); async function run() { const uri = 'mongodb://localhost:27017,localhost:27018,localhost:27019/test?replicaSet=rs'; const client = await mongodb.MongoClient.connect(uri, { useNewUrlParser: true, useUnifiedTopology: true }); console.log('Connected'); await client.db().collection('test').findOne({}); console.log('done'); }
The output looks like this:
$ node gh-8209-2.js Caught { Error: connect ECONNREFUSED 127.0.0.1:27019 at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1106:14) name: 'MongoNetworkError', errorLabels: [ 'TransientTransactionError' ], [Symbol(mongoErrorContextSymbol)]: {} } ^C $
Here's my `rs.status()`:
{ "set" : "rs", "date" : ISODate("2019-10-07T20:02:58.119Z"), "myState" : 1, "term" : NumberLong(1), "syncingTo" : "", "syncSourceHost" : "", "syncSourceId" : -1, "heartbeatIntervalMillis" : NumberLong(2000), "optimes" : { "lastCommittedOpTime" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) }, "readConcernMajorityOpTime" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) }, "appliedOpTime" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) }, "durableOpTime" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) } }, "lastStableCheckpointTimestamp" : Timestamp(1570478542, 1), "members" : [ { "_id" : 1, "name" : "localhost:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 51, "optime" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) }, "optimeDate" : ISODate("2019-10-07T20:02:52Z"), "syncingTo" : "", "syncSourceHost" : "", "syncSourceId" : -1, "infoMessage" : "could not find member to sync from", "electionTime" : Timestamp(1570478540, 1), "electionDate" : ISODate("2019-10-07T20:02:20Z"), "configVersion" : 1, "self" : true, "lastHeartbeatMessage" : "" }, { "_id" : 2, "name" : "localhost:27018", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 47, "optime" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) }, "optimeDurable" : { "ts" : Timestamp(1570478572, 1), "t" : NumberLong(1) }, "optimeDate" : ISODate("2019-10-07T20:02:52Z"), "optimeDurableDate" : ISODate("2019-10-07T20:02:52Z"), "lastHeartbeat" : ISODate("2019-10-07T20:02:56.458Z"), "lastHeartbeatRecv" : ISODate("2019-10-07T20:02:57.459Z"), "pingMs" : NumberLong(0), "lastHeartbeatMessage" : "", "syncingTo" : "localhost:27017", "syncSourceHost" : "localhost:27017", "syncSourceId" : 1, "infoMessage" : "", "configVersion" : 1 }, { "_id" : 3, "name" : "localhost:27019", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : { "ts" : Timestamp(0, 0), "t" : NumberLong(-1) }, "optimeDurable" : { "ts" : Timestamp(0, 0), "t" : NumberLong(-1) }, "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2019-10-07T20:02:56.462Z"), "lastHeartbeatRecv" : ISODate("2019-10-07T20:02:53.455Z"), "pingMs" : NumberLong(0), "lastHeartbeatMessage" : "Error connecting to localhost:27019 (127.0.0.1:27019) :: caused by :: Connection refused", "syncingTo" : "", "syncSourceHost" : "", "syncSourceId" : -1, "infoMessage" : "", "configVersion" : -1 } ], "ok" : 1, "operationTime" : Timestamp(1570478572, 1), "$clusterTime" : { "clusterTime" : Timestamp(1570478572, 1), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } } }
I tracked this issue as far as these lines of code: https://github.com/mongodb/node-mongodb-native/blob/f5f9892a576fd52d13154776e90c5b977c2764e1/lib/core/sdam/topology.js#L839-L841 . It looks like at that stage, the driver thinks the replica set state is 'ReplicaSetNoPrimary' and gives up. My best guess is that somewhere the driver isn't waiting for all the replica set nodes to report back, it just gives up because one of them is down.
Originally reported in https://github.com/Automattic/mongoose/issues/8209.