Loading...

XML

Word

Printable

JSON

Type: Bug
Resolution: Fixed
Priority: Major - P3
Fix Version/s: 3.3.3
Affects Version/s: 3.3.2
Component/s: None
Labels:
- external-user
Environment:
Node.js 10.6.3 on OSX

Case:
Confidence Status:
None

Aha! Reference:
None
Tracking Level:
None
Risk Status:
None
Exec Notes:
None
Goal Link:
None
Goal Name(s):
None

The below script fails ~90% of the time given driver 3.3.2 and a replica set with one member down.

'use strict';
  
const mongodb = require('mongodb');

run().catch(error => console.log('Caught', error));

async function run() {
  const uri = 'mongodb://localhost:27017,localhost:27018,localhost:27019/test?replicaSet=rs';
  const client = await mongodb.MongoClient.connect(uri, { useNewUrlParser: true, useUnifiedTopology: true });
  console.log('Connected');
  
  await client.db().collection('test').findOne({});
  console.log('done');
}

The output looks like this:

$ node gh-8209-2.js 
Caught { Error: connect ECONNREFUSED 127.0.0.1:27019
    at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1106:14)
  name: 'MongoNetworkError',
  errorLabels: [ 'TransientTransactionError' ],
  [Symbol(mongoErrorContextSymbol)]: {} }
^C
$

Here's my `rs.status()`:

{
	"set" : "rs",
	"date" : ISODate("2019-10-07T20:02:58.119Z"),
	"myState" : 1,
	"term" : NumberLong(1),
	"syncingTo" : "",
	"syncSourceHost" : "",
	"syncSourceId" : -1,
	"heartbeatIntervalMillis" : NumberLong(2000),
	"optimes" : {
		"lastCommittedOpTime" : {
			"ts" : Timestamp(1570478572, 1),
			"t" : NumberLong(1)
		},
		"readConcernMajorityOpTime" : {
			"ts" : Timestamp(1570478572, 1),
			"t" : NumberLong(1)
		},
		"appliedOpTime" : {
			"ts" : Timestamp(1570478572, 1),
			"t" : NumberLong(1)
		},
		"durableOpTime" : {
			"ts" : Timestamp(1570478572, 1),
			"t" : NumberLong(1)
		}
	},
	"lastStableCheckpointTimestamp" : Timestamp(1570478542, 1),
	"members" : [
		{
			"_id" : 1,
			"name" : "localhost:27017",
			"health" : 1,
			"state" : 1,
			"stateStr" : "PRIMARY",
			"uptime" : 51,
			"optime" : {
				"ts" : Timestamp(1570478572, 1),
				"t" : NumberLong(1)
			},
			"optimeDate" : ISODate("2019-10-07T20:02:52Z"),
			"syncingTo" : "",
			"syncSourceHost" : "",
			"syncSourceId" : -1,
			"infoMessage" : "could not find member to sync from",
			"electionTime" : Timestamp(1570478540, 1),
			"electionDate" : ISODate("2019-10-07T20:02:20Z"),
			"configVersion" : 1,
			"self" : true,
			"lastHeartbeatMessage" : ""
		},
		{
			"_id" : 2,
			"name" : "localhost:27018",
			"health" : 1,
			"state" : 2,
			"stateStr" : "SECONDARY",
			"uptime" : 47,
			"optime" : {
				"ts" : Timestamp(1570478572, 1),
				"t" : NumberLong(1)
			},
			"optimeDurable" : {
				"ts" : Timestamp(1570478572, 1),
				"t" : NumberLong(1)
			},
			"optimeDate" : ISODate("2019-10-07T20:02:52Z"),
			"optimeDurableDate" : ISODate("2019-10-07T20:02:52Z"),
			"lastHeartbeat" : ISODate("2019-10-07T20:02:56.458Z"),
			"lastHeartbeatRecv" : ISODate("2019-10-07T20:02:57.459Z"),
			"pingMs" : NumberLong(0),
			"lastHeartbeatMessage" : "",
			"syncingTo" : "localhost:27017",
			"syncSourceHost" : "localhost:27017",
			"syncSourceId" : 1,
			"infoMessage" : "",
			"configVersion" : 1
		},
		{
			"_id" : 3,
			"name" : "localhost:27019",
			"health" : 0,
			"state" : 8,
			"stateStr" : "(not reachable/healthy)",
			"uptime" : 0,
			"optime" : {
				"ts" : Timestamp(0, 0),
				"t" : NumberLong(-1)
			},
			"optimeDurable" : {
				"ts" : Timestamp(0, 0),
				"t" : NumberLong(-1)
			},
			"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
			"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
			"lastHeartbeat" : ISODate("2019-10-07T20:02:56.462Z"),
			"lastHeartbeatRecv" : ISODate("2019-10-07T20:02:53.455Z"),
			"pingMs" : NumberLong(0),
			"lastHeartbeatMessage" : "Error connecting to localhost:27019 (127.0.0.1:27019) :: caused by :: Connection refused",
			"syncingTo" : "",
			"syncSourceHost" : "",
			"syncSourceId" : -1,
			"infoMessage" : "",
			"configVersion" : -1
		}
	],
	"ok" : 1,
	"operationTime" : Timestamp(1570478572, 1),
	"$clusterTime" : {
		"clusterTime" : Timestamp(1570478572, 1),
		"signature" : {
			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
			"keyId" : NumberLong(0)
		}
	}
}

I tracked this issue as far as these lines of code: https://github.com/mongodb/node-mongodb-native/blob/f5f9892a576fd52d13154776e90c5b977c2764e1/lib/core/sdam/topology.js#L839-L841 . It looks like at that stage, the driver thinks the replica set state is 'ReplicaSetNoPrimary' and gives up. My best guess is that somewhere the driver isn't waiting for all the replica set nodes to report back, it just gives up because one of them is down.

Originally reported in https://github.com/Automattic/mongoose/issues/8209.

Assignee:: Matt Broadstone
Reporter:: Valeri Karpov
Reviewers:: None
Votes:: 3 Vote for this issue
Watchers:: 4 Start watching this issue

Created:: Oct 07 2019 08:28:57 PM UTC
Updated:: Oct 29 2023 01:43:00 PM UTC
Resolved:: Oct 16 2019 04:15:33 PM UTC

Details

Description

Attachments

Activity

People

Dates