|
Still seem to be running into this invariant with rc4.
Latest test runs with full logs available at.
https://spruce.mongodb.com/version/6273538e0ae60663c6e6cfbc/tasks?sorts=STATUS%3AASC%3BBASE_STATUS%3ADESC
Mongo Version
{"t":{"$date":"2022-05-05T06:08:10.625+00:00"},"s":"I", "c":"CONTROL", "id":23403, "ctx":"initandlisten","msg":"Build Info","attr":{"buildInfo":{"version":"6.0.0-rc4","gitVersion":"ff5e921b9378126dc1a44ae4d783d39afde09a7a","openSSLVersion":"OpenSSL 1.0.2k-fips 26 Jan 2017","modules":["enterprise"],"allocator":"tcmalloc","environment":{"distmod":"amazon2","distarch":"x86_64","target_arch":"x86_64"}}}}
|
Backtrace
backtrace2.txt
Invariant
{"t":{"$date":"2022-05-05T06:09:04.746+00:00"},"s":"F", "c":"ASSERT", "id":23079, "ctx":"conn402","msg":"Invariant failure","attr":{"expr":"!opCtx->lockState()->isLocked()","file":"src/mongo/db/repl/replication_coordinator_impl.cpp","line":2017}}
|
|
|
|
xiang.gao@mongodb.com, fah.vorasucha@mongodb.com, would it be possible to have the Cloud team to re-test the restore procedure with MongoDB 6.0.0-rc4? I believe this invariant failure has been fixed already by SERVER-65015.
The changes from a295897 made it so setOrphanCountersOnRangeDeletionTasks() is no longer running the getMore command while the ScopedRangeDeleterLock is held.
I confirmed I was able to use the steps to reproduce the invariant failure using repro_for_SERVER-66132.patch from pierlauro.sciarelli@mongodb.com while based on top of MongoDB 6.0.0-rc1 with the following additional test change.
diff --git a/jstests/sharding/repro.js b/jstests/sharding/repro.js
|
index b2ac347903e..abffce5b076 100644
|
--- a/jstests/sharding/repro.js
|
+++ b/jstests/sharding/repro.js
|
@@ -12,7 +12,7 @@ TestData.skipCheckOrphans = true;
|
let st = new ShardingTest(
|
{shards: 2,
|
rs: {
|
- nodes: 1,
|
+ nodes: 2,
|
// disable the range deleter; this will maintain the persisted docs.
|
setParameter: {disableResumableRangeDeleter: true}
|
}
|
Note that a 2-node replica set is needed to avoid hitting this other invariant while waiting for write concern with a LockManager lock held.
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.565+00:00"},"s":"I", "c":"MIGRATE", "id":467560, "ctx":"conn33","msg":"Going to start blocking migrations","attr":{"reason":"setFeatureCompatibilityVersionUpgrade"}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.566+00:00"},"s":"I", "c":"-", "id":0, "ctx":"conn33","msg":"TROLL doRequestMore()"}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.566+00:00"},"s":"F", "c":"ASSERT", "id":23079, "ctx":"conn33","msg":"Invariant failure","attr":{"expr":"isNoop() || getClientState() == Locker::ClientState::kInactive","file":"src/mongo/db/concurrency/locker.h","line":504}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.566+00:00"},"s":"F", "c":"ASSERT", "id":23080, "ctx":"conn33","msg":"\n\n***aborting after invariant() failure\n\n"}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.566+00:00"},"s":"F", "c":"CONTROL", "id":4757800, "ctx":"conn33","msg":"Writing fatal message","attr":{"message":"Got signal: 6 (Aborted).\n"}}
|
...
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.697+00:00"},"s":"I", "c":"CONTROL", "id":31445, "ctx":"conn33","msg":"Frame","attr":{"frame":{"a":"7F594B7E1BF9","b":"7F594B6B3000","o":"12EBF9","s":"_ZN5mongo15invariantFailedEPKcS1_j","C":"mongo::invariantFailed(char const*, char const*, unsigned int)","s+":"F7"}}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.697+00:00"},"s":"I", "c":"CONTROL", "id":31445, "ctx":"conn33","msg":"Frame","attr":{"frame":{"a":"7F5947D070B6","b":"7F5947CEF000","o":"180B6","s":"_ZN5mongo4repl20StorageInterfaceImpl39waitForAllEarlierOplogWritesToBeVisibleEPNS_16OperationContextEb.cold.1743","C":"mongo::repl::StorageInterfaceImpl::waitForAllEarlierOplogWritesToBeVisible(mongo::OperationContext*, bool) [clone .cold.1743]","s+":"F2"}}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.697+00:00"},"s":"I", "c":"CONTROL", "id":31445, "ctx":"conn33","msg":"Frame","attr":{"frame":{"a":"7F5946E8EA0C","b":"7F5946E7A000","o":"14A0C","s":"_ZN5mongo27waitForNoOplogHolesIfNeededEPNS_16OperationContextE","C":"mongo::waitForNoOplogHolesIfNeeded(mongo::OperationContext*)","s+":"1BC"}}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.697+00:00"},"s":"I", "c":"CONTROL", "id":31445, "ctx":"conn33","msg":"Frame","attr":{"frame":{"a":"7F5946E8FFF8","b":"7F5946E7A000","o":"15FF8","s":"_ZN5mongo19waitForWriteConcernEPNS_16OperationContextERKNS_4repl6OpTimeERKNS_19WriteConcernOptionsEPNS_18WriteConcernResultE","C":"mongo::waitForWriteConcern(mongo::OperationContext*, mongo::repl::OpTime const&, mongo::WriteConcernOptions const&, mongo::WriteConcernResult*)","s+":"268"}}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.697+00:00"},"s":"I", "c":"CONTROL", "id":31445, "ctx":"conn33","msg":"Frame","attr":{"frame":{"a":"7F5946F2953D","b":"7F5946F1A000","o":"F53D","s":"_ZZNK5mongo23ServiceEntryPointMongod5Hooks19waitForWriteConcernEPNS_16OperationContextEPKNS_17CommandInvocationERKNS_4repl6OpTimeERNS_14BSONObjBuilderEENKUlvE_clEv","C":"mongo::ServiceEntryPointMongod::Hooks::waitForWriteConcern(mongo::OperationContext*, mongo::CommandInvocation const*, mongo::repl::OpTime const&, mongo::BSONObjBuilder&) const::{lambda()#1}::operator()() const","s+":"3ED"}}}
|
[js_test:repro] d20020| {"t":{"$date":"2022-05-04T20:22:52.697+00:00"},"s":"I", "c":"CONTROL", "id":31445, "ctx":"conn33","msg":"Frame","attr":{"frame":{"a":"7F5946F29DD6","b":"7F5946F1A000","o":"FDD6","s":"_ZNK5mongo23ServiceEntryPointMongod5Hooks19waitForWriteConcernEPNS_16OperationContextEPKNS_17CommandInvocationERKNS_4repl6OpTimeERNS_14BSONObjBuilderE","C":"mongo::ServiceEntryPointMongod::Hooks::waitForWriteConcern(mongo::OperationContext*, mongo::CommandInvocation const*, mongo::repl::OpTime const&, mongo::BSONObjBuilder&) const","s+":"176"}}}
|
|
|
Attaching the full symbolicated C++ stacktrace as backtrace.txt . Inlining a portion of its contents here for Jira searchability.
I'm a little surprised to see reading from a cursor leading to a wait for write concern. It appears the getMore command uses RunCommandAndWaitForWriteConcern and so perhaps the internal find command inherited the default write concern of {w: "majority"}?
$ python buildscripts/mongosymb.py --symbolizer-path /opt/mongodbtoolchain/v3/bin/llvm-symbolizer --debug-file-resolver path ~/temp_server66132/mongodb-linux-x86_64-enterprise-amazon2-6.0.0-rc1/bin/mongod.debug <backtrace.json
|
...
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/repl/replication_coordinator_impl.cpp:2009:5: mongo::repl::ReplicationCoordinatorImpl::awaitReplication(mongo::OperationContext*, mongo::repl::OpTime const&, mongo::WriteConcernOptions const&) (.cold.4083)
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/write_concern.cpp:343:89: mongo::waitForWriteConcern(mongo::OperationContext*, mongo::repl::OpTime const&, mongo::WriteConcernOptions const&, mongo::WriteConcernResult*)
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/service_entry_point_mongod.cpp:132:97: mongo::ServiceEntryPointMongod::Hooks::waitForWriteConcern(mongo::OperationContext*, mongo::CommandInvocation const*, mongo::repl::OpTime const&, mongo::BSONObjBuilder&) const::'lambda'()::operator()() const
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/service_entry_point_mongod.cpp:152:47: mongo::ServiceEntryPointMongod::Hooks::waitForWriteConcern(mongo::OperationContext*, mongo::CommandInvocation const*, mongo::repl::OpTime const&, mongo::BSONObjBuilder&) const
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/service_entry_point_common.cpp:1170:49: mongo::(anonymous namespace)::RunCommandAndWaitForWriteConcern::_waitForWriteConcern(mongo::BSONObjBuilder&)
|
...
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/client/dbclient_cursor.cpp:428:29: mongo::DBClientCursor::requestMore()
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/client/dbclient_cursor.cpp:510:16: mongo::DBClientCursor::more()
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/client/dbclient_cursor.cpp:500:6: mongo::DBClientCursor::more()
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/persistent_task_store.h:159:28: mongo::PersistentTaskStore<mongo::RangeDeletionTask>::forEach(mongo::OperationContext*, mongo::BSONObj const&, std::function<bool (mongo::RangeDeletionTask const&)>)
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/s/range_deletion_util.cpp:643:18: mongo::setOrphanCountersOnRangeDeletionTasks(mongo::OperationContext*)
|
/data/mci/3c43825604be2ead8488679f7d391122/src/src/mongo/db/commands/set_feature_compatibility_version_command.cpp:483:62: mongo::(anonymous namespace)::SetFeatureCompatibilityVersionCommand::run(mongo::OperationContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, mongo::BSONObj const&, mongo::BSONObjBuilder&)
|
|