Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-61479

Increase the number of retries to connect to a replica set following a stepdown

    XMLWordPrintable

Details

    • Improvement
    • Status: Closed
    • Major - P3
    • Resolution: Fixed
    • None
    • 5.2.0, 5.1.2, 5.0.6, 4.4.11
    • None
    • None
    • Fully Compatible
    • v5.1, v5.0, v4.4
    • Execution Team 2021-11-29
    • 24

    Description

      There was a build failure on a slow machine that timed out trying to reconnect to a replica set:

       
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] Recreating replica set from config {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 	"_id" : "backup_restore_stop_start",
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 	"version" : 3,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 	"term" : 1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 	"protocolVersion" : NumberLong(1),
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 	"writeConcernMajorityJournalDefault" : true,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 	"members" : [
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 		{
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"_id" : 0,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"host" : "macos-1014-137.macstadium.build.10gen.cc:20270",
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"arbiterOnly" : false,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"buildIndexes" : true,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"hidden" : false,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"priority" : 1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"tags" : {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0]
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"slaveDelay" : NumberLong(0),
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"votes" : 1
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 		},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 		{
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"_id" : 1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"host" : "macos-1014-137.macstadium.build.10gen.cc:20271",
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"arbiterOnly" : false,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"buildIndexes" : true,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"hidden" : false,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"priority" : 1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0] 			"tags" : {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.121-0400 sh79139| [WaitForReplication:job0]
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"slaveDelay" : NumberLong(0),
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"votes" : 1
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		{
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"_id" : 2,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"host" : "macos-1014-137.macstadium.build.10gen.cc:20272",
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"arbiterOnly" : false,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"buildIndexes" : true,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"hidden" : false,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"priority" : 1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"tags" : {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0]
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"slaveDelay" : NumberLong(0),
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"votes" : 1
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		}
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 	],
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 	"settings" : {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"chainingAllowed" : true,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"heartbeatIntervalMillis" : 2000,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"heartbeatTimeoutSecs" : 10,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"electionTimeoutMillis" : 60000,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"catchUpTimeoutMillis" : -1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"catchUpTakeoverDelayMillis" : 30000,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"getLastErrorModes" : {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0]
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"getLastErrorDefaults" : {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"w" : 1,
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 			"wtimeout" : 0
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		},
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 		"replicaSetId" : ObjectId("616e79d72ca9e8fc78add28b")
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 	}
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] }
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] Network error occurred and the call will be retried: {
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 	"error" : "Error: couldn't connect to server macos-1014-137.macstadium.build.10gen.cc:20271, connection attempt failed: SocketException: Error connecting to macos-1014-137.macstadium.build.10gen.cc:20271 (207.254.77.156:20271) :: caused by :: Connection refused",
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] 	"stack" : "_constructFromExistingSeedNode/self.nodes<@src/mongo/shell/replsettest.js:3299:24\n_constructFromExistingSeedNode@src/mongo/shell/replsettest.js:3298:22\nReplSetTest/<@src/mongo/shell/replsettest.js:3313:13\nretryOnNetworkError@src/mongo/shell/utils.js:57:20\nReplSetTest@src/mongo/shell/replsettest.js:3309:9\n@(shell eval):1:1369\n"
      [js_test:backup_restore_stop_start] 2021-10-19T03:56:22.122-0400 sh79139| [WaitForReplication:job0] }
      

      Increasing this parameter to a higher value would give the slow machine more time that it needed.

      Attachments

        Activity

          People

            gregory.wlodarek@mongodb.com Gregory Wlodarek
            gregory.wlodarek@mongodb.com Gregory Wlodarek
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: