Improve ShardingTest initialization to avoid hanging with some hooks

XMLWordPrintableJSON

    • Type: Task
    • Resolution: Unresolved
    • Priority: Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • None
    • Catalog and Routing
    • CAR Team 2026-03-16
    • 2
    • 🟥 DDL, 🟦 Shard Catalog, 🟩 Routing and Topology
    • None
    • None
    • None
    • None
    • None
    • None

      The js test may hang when stepdown is executed during ShardingTest initialization, for example from jstests/libs/override_methods/sharding_csrs_continuous_config_stepdown.js hook in sharding_csrs_continuous_config_stepdown suite, see trace below. 
      ShardingTest has to stop such hooks during initialisation and resume when it's ready.

       [js_test:custom_write_concern] uncaught exception: Error: command failed: {
      [js_test:custom_write_concern] 	"ok" : 0,
      [js_test:custom_write_concern] 	"errmsg" : "operation exceeded time limit",
      [js_test:custom_write_concern] 	"code" : 50,
      [js_test:custom_write_concern] 	"codeName" : "MaxTimeMSExpired",
      [js_test:custom_write_concern] 	"$clusterTime" : {
      [js_test:custom_write_concern] 		"clusterTime" : Timestamp(1769619707, 1),
      [js_test:custom_write_concern] 		"signature" : {
      [js_test:custom_write_concern] 			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
      [js_test:custom_write_concern] 			"keyId" : NumberLong(0)
      [js_test:custom_write_concern] 		}
      [js_test:custom_write_concern] 	},
      [js_test:custom_write_concern] 	"operationTime" : Timestamp(1769619707, 1)
      [js_test:custom_write_concern] } with original command request: {
      [js_test:custom_write_concern] 	"balancerStop" : 1,
      [js_test:custom_write_concern] 	"maxTimeMS" : 60000,
      [js_test:custom_write_concern] 	"lsid" : {
      [js_test:custom_write_concern] 		"id" : UUID("b98bc2fa-9e1b-4917-9ea6-166962be4a12")
      [js_test:custom_write_concern] 	},
      [js_test:custom_write_concern] 	"$traceCtx" : {
      [js_test:custom_write_concern] 		"traceparent" : "00-23c21a3e8100bdc4b054b4f886de273f-0aa55ef7b75615f2-01"
      [js_test:custom_write_concern] 	}
      [js_test:custom_write_concern] } on connection: connection to ip-10-128-178-238.ec2.internal:21549 with errmsg: operation exceeded time limit :
      [js_test:custom_write_concern] _getErrorWithCode@src/mongo/shell/utils.js:32:13
      [js_test:custom_write_concern] doassert@src/mongo/shell/assert.js:47:14
      [js_test:custom_write_concern] _doassert@src/mongo/shell/assert.js:181:13
      [js_test:custom_write_concern] _assertCommandWorked@src/mongo/shell/assert.js:1249:22
      [js_test:custom_write_concern] assert.commandWorked@src/mongo/shell/assert.js:1423:12
      [js_test:custom_write_concern] sh.stopBalancer@src/mongo/shell/utils_sh.js:243:19
      [js_test:custom_write_concern] _extendWithShMethods/</st[fn]@/data/mci/1cf3586db16d319b3e5b2d09ff01f537/src/jstests/libs/shardingtest.js:2084:31
      [js_test:custom_write_concern] _configureCluster@/data/mci/1cf3586db16d319b3e5b2d09ff01f537/src/jstests/libs/shardingtest.js:2097:12
      [js_test:custom_write_concern] ShardingTest@/data/mci/1cf3586db16d319b3e5b2d09ff01f537/src/jstests/libs/shardingtest.js:1844:34
      [js_test:custom_write_concern] ShardingTestWithContinuousPrimaryStepdown@/data/mci/1cf3586db16d319b3e5b2d09ff01f537/src/jstests/libs/override_methods/continuous_stepdown.js:326:13
      [js_test:custom_write_concern] ShardingTestWithContinuousFailover@/data/mci/1cf3586db16d319b3e5b2d09ff01f537/src/jstests/libs/override_methods/sharding_csrs_continuous_config_stepdown.js:41:9
      [js_test:custom_write_concern] ShardingTest@/data/mci/1cf3586db16d319b3e5b2d09ff01f537/src/jstests/libs/shardingtest.js:1063:20
      [js_test:custom_write_concern] @jstests/sharding/custom_write_concern.js:9:12
      [js_test:custom_write_concern] Error: command failed: {
      [js_test:custom_write_concern] 	"ok" : 0,
      [js_test:custom_write_concern] 	"errmsg" : "operation exceeded time limit",
      [js_test:custom_write_concern] 	"code" : 50,
      [js_test:custom_write_concern] 	"codeName" : "MaxTimeMSExpired",
      [js_test:custom_write_concern] 	"$clusterTime" : {
      [js_test:custom_write_concern] 		"clusterTime" : Timestamp(1769619707, 1),
      [js_test:custom_write_concern] 		"signature" : {
      [js_test:custom_write_concern] 			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
      [js_test:custom_write_concern] 			"keyId" : NumberLong(0)
      [js_test:custom_write_concern] 		}
      [js_test:custom_write_concern] 	},
      [js_test:custom_write_concern] 	"operationTime" : Timestamp(1769619707, 1)
      [js_test:custom_write_concern] } with original command request: {
      [js_test:custom_write_concern] 	"balancerStop" : 1,
      [js_test:custom_write_concern] 	"maxTimeMS" : 60000,
      [js_test:custom_write_concern] 	"lsid" : {
      [js_test:custom_write_concern] 		"id" : UUID("b98bc2fa-9e1b-4917-9ea6-166962be4a12")
      [js_test:custom_write_concern] 	},
      [js_test:custom_write_concern] 	"$traceCtx" : {
      [js_test:custom_write_concern] 		"traceparent" : "00-23c21a3e8100bdc4b054b4f886de273f-0aa55ef7b75615f2-01"
      [js_test:custom_write_concern] 	}
      [js_test:custom_write_concern] } on connection: connection to ip-10-128-178-238.ec2.internal:21549 with errmsg: operation exceeded time limit
      [js_test:custom_write_concern] c21540| {"t":{"$date":"2026-01-28T17:03:46.546+00:00"},"s":"I",  "c":"COMMAND",  "id":51803,   "svc":"S", "ctx":"conn66","msg":"Slow query","attr": 

            Assignee:
            Igor Praznik
            Reporter:
            Igor Praznik
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated: