Uploaded image for project: 'Node.js Driver'
  1. Node.js Driver
  2. NODE-2466

Driver does not seem to properly recover from a long disconnection with primary server

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 3.5.4
    • Affects Version/s: 3.5.3
    • Component/s: None
    • Labels:

      Tested driver version: 3.5.3

      Steps to reproduce:

      • You'll need two machines (on the same network) to reproduce this; one called client, one called server. toxiproxy needs to be installed on client.
      • On server, run a replicat set with 3 nodes (port 8500, 8501, 8502). Ensure that the node listening on 8500 is the primary one.
      • On client, run toxiproxy to open three local ports, and redirect traffic to server (in this example, 10.10.164.148 is the IP address of server):
        toxiproxy-cli create mongo_proxied_0 -l 127.0.0.1:8500 -u 10.10.164.148:8500 
        toxiproxy-cli create mongo_proxied_1 -l 127.0.0.1:8501 -u 10.10.164.148:8501
        toxiproxy-cli create mongo_proxied_2 -l 127.0.0.1:8502 -u 10.10.164.148:8502
        
      • On client, edit /etc/hosts to make sure that any request sent with server's hostname are actually sent to localhost (otherwise the application running on client will connect directly to server, instead of going through toxiproxy)
      • On client, run the following nodejs script:
        const mongodb = require("mongodb");
        const MongoClient = mongodb.MongoClient;
        const net = require("net");
        
        
        async function wait(t) {
        	return new Promise((resolve) => setTimeout(resolve, t));
        }
        
        function log() {
        	console.log.apply(console, [new Date(), ...arguments]);
        }
        const URL = "mongodb://127.0.0.1:8500,127.0.0.1:8501,127.0.0.1:8502/testDb?readPreference=primaryPreferred&slaveOk=true&replicaSet=rs0&heartbeatFrequencyMS=10000&serverSelectionTimeoutMS=30000";
        let client;
        async function getClient() {
        	if (!client) {
        		log("mongo - Connecting to " + URL);
        		client = await MongoClient.connect(URL, { useUnifiedTopology: true });
        		log("mongo - Connected");
        	}
        
        	return client;
        }
        
        async function loop() {
        	while(true) {
        		try {
        			log("mongo - Connecting");
        			const client = await getClient();
        			const db = client.db();
        			const collection = db.collection("testCollection");
        			await collection.insertOne({name: Date.now()});
        			log("mongo - Write done");
        			await wait(1000);
        			log("mongo - Done waiting");
        		} catch (err) {
        			log("mongo - Error:", err);
        		}
        	}
        }
        
        function loop_connect() {
        	const client = new net.Socket();
        	const port = 8500;
        	log("network - connecting to 127.0.0.1 on port", port);
        	client.on("error", (err) => {
        		log("network - Failed:", err.message);
        		client.destroy();
        	});
        	client.connect(port, "127.0.0.1", () => {
        		log("network - Connected!");
        		client.destroy();
        	});
        }
        
        loop();
        setInterval(loop_connect, 1000);
        
        • Every second, this script will try to insert a document. The resulting logs are prefixed with "mongo"
        • In parallel, every second, this script will try to open a socket to 127.0.0.1:8500. The resulting logs are prefixed with "network"
      • On client, assuming toxiproxy server runs on port 8474, run the following script to cut the connexion between our nodejs client and the primary node during 21 seconds:
        curl http://localhost:8474/proxies/mongo_proxied_0 -d '{"enabled":false}'
        sleep 21
        curl http://localhost:8474/proxies/mongo_proxied_0 -d '{"enabled":true}'
        

      What happens:

      • Thanks to the "network" logs (see some logs I captured below), we can see that our nodejs client is indeed unable to connect to localhost:8500 during 21 seconds (from 14:12:39 to 14:13:00)
      • In "mongo" logs, we can see that:
        • 30 seconds after the disconnection, at 14:13:09, the following error is logged: Error: MongoServerSelectionError: connect ECONNREFUSED 127.0.0.1:8500. I guess this is due to the fact that the previous heartbeat failed; this seems okay to me.
        • However, right after, we try to send another request to mongo (at 14:13:09), and it fails 30 seconds later (at 14:13:39) with the following error message: Error: MongoServerSelectionError: Server selection timed out after 30000 ms. This seems like a bug to me: the primary node was up and reachable during all that time, as confirmed by the "network" logs. But the driver was unable to reconnect and execute the request during this timeframe.
      2020-02-24T14:12:36.862Z mongo - Connecting
      2020-02-24T14:12:36.865Z mongo - Connecting to mongodb://127.0.0.1:8500,127.0.0.1:8501,127.0.0.1:8502/testDb?readPreference=primaryPreferred&slaveOk=true&replicaSet=rs0&heartbeatFrequencyMS=10000&serverSelectionTimeoutMS=30000
      2020-02-24T14:12:37.141Z mongo - Connected
      2020-02-24T14:12:37.308Z mongo - Write done
      2020-02-24T14:12:37.883Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:37.886Z network - Connected!
      2020-02-24T14:12:38.308Z mongo - Done waiting
      2020-02-24T14:12:38.308Z mongo - Connecting
      2020-02-24T14:12:38.349Z mongo - Write done
      2020-02-24T14:12:38.884Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:38.886Z network - Connected!
      2020-02-24T14:12:39.349Z mongo - Done waiting
      2020-02-24T14:12:39.349Z mongo - Connecting
      2020-02-24T14:12:39.886Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:39.887Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:40.904Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:40.905Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:41.911Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:41.912Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:42.912Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:42.913Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:43.913Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:43.915Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:44.913Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:44.914Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:45.915Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:45.916Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:46.918Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:46.920Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:47.917Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:47.919Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:48.918Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:48.919Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:49.918Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:49.919Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:50.918Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:50.919Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:51.919Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:51.920Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:52.919Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:52.919Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:53.919Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:53.920Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:54.918Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:54.919Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:55.919Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:55.921Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:56.919Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:56.921Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:57.924Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:57.925Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:58.925Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:58.926Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:12:59.929Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:12:59.930Z network - Failed: connect ECONNREFUSED 127.0.0.1:8500
      2020-02-24T14:13:00.931Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:00.933Z network - Connected!
      2020-02-24T14:13:01.938Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:01.939Z network - Connected!
      2020-02-24T14:13:02.938Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:02.939Z network - Connected!
      2020-02-24T14:13:03.939Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:03.940Z network - Connected!
      2020-02-24T14:13:04.939Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:04.940Z network - Connected!
      2020-02-24T14:13:05.939Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:05.940Z network - Connected!
      2020-02-24T14:13:06.940Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:06.940Z network - Connected!
      2020-02-24T14:13:07.942Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:07.943Z network - Connected!
      2020-02-24T14:13:08.942Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:08.943Z network - Connected!
      2020-02-24T14:13:09.350Z mongo - Error: MongoServerSelectionError: connect ECONNREFUSED 127.0.0.1:8500
          at Timeout._onTimeout (/home/mfred/tmp/long-mongo-3.5/node_modules/mongodb/lib/core/sdam/topology.js:428:30)
          at listOnTimeout (internal/timers.js:531:17)
          at processTimers (internal/timers.js:475:7) {
        name: 'MongoServerSelectionError',
        reason: TopologyDescription {
          type: 'ReplicaSetNoPrimary',
          setName: 'mgrplset',
          maxSetVersion: 1,
          maxElectionId: null,
          servers: Map {
            'vgerndvud2202.gemalto.com:8500' => [ServerDescription],
            'vgerndvud2202.gemalto.com:8501' => [ServerDescription],
            'vgerndvud2202.gemalto.com:8502' => [ServerDescription]
          },
          stale: false,
          compatible: true,
          compatibilityError: null,
          logicalSessionTimeoutMinutes: 30,
          heartbeatFrequencyMS: 10000,
          localThresholdMS: 15,
          commonWireVersion: 7
        },
        [Symbol(mongoErrorContextSymbol)]: {}
      }
      2020-02-24T14:13:09.352Z mongo - Connecting
      2020-02-24T14:13:09.948Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:09.950Z network - Connected!
      2020-02-24T14:13:10.949Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:10.951Z network - Connected!
      2020-02-24T14:13:11.950Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:11.951Z network - Connected!
      2020-02-24T14:13:12.951Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:12.952Z network - Connected!
      2020-02-24T14:13:13.954Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:13.955Z network - Connected!
      2020-02-24T14:13:14.956Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:14.957Z network - Connected!
      2020-02-24T14:13:15.958Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:15.960Z network - Connected!
      2020-02-24T14:13:16.957Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:16.958Z network - Connected!
      2020-02-24T14:13:17.959Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:17.962Z network - Connected!
      2020-02-24T14:13:18.959Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:18.960Z network - Connected!
      2020-02-24T14:13:19.960Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:19.961Z network - Connected!
      2020-02-24T14:13:20.961Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:20.961Z network - Connected!
      2020-02-24T14:13:21.962Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:21.963Z network - Connected!
      2020-02-24T14:13:22.963Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:22.964Z network - Connected!
      2020-02-24T14:13:23.967Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:23.968Z network - Connected!
      2020-02-24T14:13:24.969Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:24.970Z network - Connected!
      2020-02-24T14:13:25.970Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:25.970Z network - Connected!
      2020-02-24T14:13:26.970Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:26.971Z network - Connected!
      2020-02-24T14:13:27.971Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:27.974Z network - Connected!
      2020-02-24T14:13:28.972Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:28.974Z network - Connected!
      2020-02-24T14:13:29.974Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:29.975Z network - Connected!
      2020-02-24T14:13:30.975Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:30.976Z network - Connected!
      2020-02-24T14:13:31.977Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:31.979Z network - Connected!
      2020-02-24T14:13:32.986Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:32.987Z network - Connected!
      2020-02-24T14:13:33.988Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:33.990Z network - Connected!
      2020-02-24T14:13:34.989Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:34.990Z network - Connected!
      2020-02-24T14:13:35.996Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:35.998Z network - Connected!
      2020-02-24T14:13:37.002Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:37.003Z network - Connected!
      2020-02-24T14:13:38.004Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:38.006Z network - Connected!
      2020-02-24T14:13:39.004Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:39.006Z network - Connected!
      2020-02-24T14:13:39.356Z mongo - Error: MongoServerSelectionError: Server selection timed out after 30000 ms
          at Timeout._onTimeout (/home/mfred/tmp/long-mongo-3.5/node_modules/mongodb/lib/core/sdam/topology.js:428:30)
          at listOnTimeout (internal/timers.js:531:17)
          at processTimers (internal/timers.js:475:7) {
        name: 'MongoServerSelectionError',
        reason: TopologyDescription {
          type: 'ReplicaSetWithPrimary',
          setName: 'mgrplset',
          maxSetVersion: 1,
          maxElectionId: null,
          servers: Map {
            'vgerndvud2202.gemalto.com:8500' => [ServerDescription],
            'vgerndvud2202.gemalto.com:8501' => [ServerDescription],
            'vgerndvud2202.gemalto.com:8502' => [ServerDescription]
          },
          stale: false,
          compatible: true,
          compatibilityError: null,
          logicalSessionTimeoutMinutes: 30,
          heartbeatFrequencyMS: 10000,
          localThresholdMS: 15,
          commonWireVersion: 7
        },
        [Symbol(mongoErrorContextSymbol)]: {}
      }
      2020-02-24T14:13:39.357Z mongo - Connecting
      2020-02-24T14:13:39.515Z mongo - Write done
      2020-02-24T14:13:40.005Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:40.005Z network - Connected!
      2020-02-24T14:13:40.532Z mongo - Done waiting
      2020-02-24T14:13:40.532Z mongo - Connecting
      2020-02-24T14:13:40.576Z mongo - Write done
      2020-02-24T14:13:41.005Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:41.006Z network - Connected!
      2020-02-24T14:13:41.580Z mongo - Done waiting
      2020-02-24T14:13:41.580Z mongo - Connecting
      2020-02-24T14:13:41.624Z mongo - Write done
      2020-02-24T14:13:42.005Z network - connecting to 127.0.0.1 on port 8500
      2020-02-24T14:13:42.006Z network - Connected!
      

            Assignee:
            matt.broadstone@mongodb.com Matt Broadstone
            Reporter:
            frederic.maillasson@gemalto.com Frederic Maillasson
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: