Uploaded image for project: 'Node.js Driver'
  1. Node.js Driver
  2. NODE-2231

useUnifiedTopology prevents connecting to replica set if secondary is down

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 3.3.3
    • Affects Version/s: 3.3.2
    • Component/s: None
    • Environment:
      Node.js 10.6.3 on OSX

      The below script fails ~90% of the time given driver 3.3.2 and a replica set with one member down.

      'use strict';
        
      const mongodb = require('mongodb');
      
      run().catch(error => console.log('Caught', error));
      
      async function run() {
        const uri = 'mongodb://localhost:27017,localhost:27018,localhost:27019/test?replicaSet=rs';
        const client = await mongodb.MongoClient.connect(uri, { useNewUrlParser: true, useUnifiedTopology: true });
        console.log('Connected');
        
        await client.db().collection('test').findOne({});
        console.log('done');
      }
      

      The output looks like this:

      $ node gh-8209-2.js 
      Caught { Error: connect ECONNREFUSED 127.0.0.1:27019
          at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1106:14)
        name: 'MongoNetworkError',
        errorLabels: [ 'TransientTransactionError' ],
        [Symbol(mongoErrorContextSymbol)]: {} }
      ^C
      $ 
      

      Here's my `rs.status()`:

      {
      	"set" : "rs",
      	"date" : ISODate("2019-10-07T20:02:58.119Z"),
      	"myState" : 1,
      	"term" : NumberLong(1),
      	"syncingTo" : "",
      	"syncSourceHost" : "",
      	"syncSourceId" : -1,
      	"heartbeatIntervalMillis" : NumberLong(2000),
      	"optimes" : {
      		"lastCommittedOpTime" : {
      			"ts" : Timestamp(1570478572, 1),
      			"t" : NumberLong(1)
      		},
      		"readConcernMajorityOpTime" : {
      			"ts" : Timestamp(1570478572, 1),
      			"t" : NumberLong(1)
      		},
      		"appliedOpTime" : {
      			"ts" : Timestamp(1570478572, 1),
      			"t" : NumberLong(1)
      		},
      		"durableOpTime" : {
      			"ts" : Timestamp(1570478572, 1),
      			"t" : NumberLong(1)
      		}
      	},
      	"lastStableCheckpointTimestamp" : Timestamp(1570478542, 1),
      	"members" : [
      		{
      			"_id" : 1,
      			"name" : "localhost:27017",
      			"health" : 1,
      			"state" : 1,
      			"stateStr" : "PRIMARY",
      			"uptime" : 51,
      			"optime" : {
      				"ts" : Timestamp(1570478572, 1),
      				"t" : NumberLong(1)
      			},
      			"optimeDate" : ISODate("2019-10-07T20:02:52Z"),
      			"syncingTo" : "",
      			"syncSourceHost" : "",
      			"syncSourceId" : -1,
      			"infoMessage" : "could not find member to sync from",
      			"electionTime" : Timestamp(1570478540, 1),
      			"electionDate" : ISODate("2019-10-07T20:02:20Z"),
      			"configVersion" : 1,
      			"self" : true,
      			"lastHeartbeatMessage" : ""
      		},
      		{
      			"_id" : 2,
      			"name" : "localhost:27018",
      			"health" : 1,
      			"state" : 2,
      			"stateStr" : "SECONDARY",
      			"uptime" : 47,
      			"optime" : {
      				"ts" : Timestamp(1570478572, 1),
      				"t" : NumberLong(1)
      			},
      			"optimeDurable" : {
      				"ts" : Timestamp(1570478572, 1),
      				"t" : NumberLong(1)
      			},
      			"optimeDate" : ISODate("2019-10-07T20:02:52Z"),
      			"optimeDurableDate" : ISODate("2019-10-07T20:02:52Z"),
      			"lastHeartbeat" : ISODate("2019-10-07T20:02:56.458Z"),
      			"lastHeartbeatRecv" : ISODate("2019-10-07T20:02:57.459Z"),
      			"pingMs" : NumberLong(0),
      			"lastHeartbeatMessage" : "",
      			"syncingTo" : "localhost:27017",
      			"syncSourceHost" : "localhost:27017",
      			"syncSourceId" : 1,
      			"infoMessage" : "",
      			"configVersion" : 1
      		},
      		{
      			"_id" : 3,
      			"name" : "localhost:27019",
      			"health" : 0,
      			"state" : 8,
      			"stateStr" : "(not reachable/healthy)",
      			"uptime" : 0,
      			"optime" : {
      				"ts" : Timestamp(0, 0),
      				"t" : NumberLong(-1)
      			},
      			"optimeDurable" : {
      				"ts" : Timestamp(0, 0),
      				"t" : NumberLong(-1)
      			},
      			"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
      			"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
      			"lastHeartbeat" : ISODate("2019-10-07T20:02:56.462Z"),
      			"lastHeartbeatRecv" : ISODate("2019-10-07T20:02:53.455Z"),
      			"pingMs" : NumberLong(0),
      			"lastHeartbeatMessage" : "Error connecting to localhost:27019 (127.0.0.1:27019) :: caused by :: Connection refused",
      			"syncingTo" : "",
      			"syncSourceHost" : "",
      			"syncSourceId" : -1,
      			"infoMessage" : "",
      			"configVersion" : -1
      		}
      	],
      	"ok" : 1,
      	"operationTime" : Timestamp(1570478572, 1),
      	"$clusterTime" : {
      		"clusterTime" : Timestamp(1570478572, 1),
      		"signature" : {
      			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
      			"keyId" : NumberLong(0)
      		}
      	}
      }
      

      I tracked this issue as far as these lines of code: https://github.com/mongodb/node-mongodb-native/blob/f5f9892a576fd52d13154776e90c5b977c2764e1/lib/core/sdam/topology.js#L839-L841 . It looks like at that stage, the driver thinks the replica set state is 'ReplicaSetNoPrimary' and gives up. My best guess is that somewhere the driver isn't waiting for all the replica set nodes to report back, it just gives up because one of them is down.

      Originally reported in https://github.com/Automattic/mongoose/issues/8209.

            Assignee:
            matt.broadstone@mongodb.com Matt Broadstone
            Reporter:
            val@karpov.io Valeri Karpov
            Votes:
            3 Vote for this issue
            Watchers:
            4 Start watching this issue

              Created:
              Updated:
              Resolved: