-
Type:
Bug
-
Resolution: Fixed
-
Priority:
Major - P3
-
Affects Version/s: None
-
Component/s: Replication
-
None
-
Replication
-
Fully Compatible
-
ALL
-
Repl 2026-03-16
-
200
-
None
-
None
-
None
-
None
-
None
-
None
-
None
In BF-41952 we see a new data race caused by this recent commit.
WARNING: ThreadSanitizer: data race (pid=35857) Read of size 1 at 0x7254004d5a4a by thread T502 (mutexes: write M0): #0 mongo::OperationContext::isRetryableWrite() const src/mongo/db/operation_context.h:358:15 (libauto_get_rstl_for_stepup_stepdown.so+0xc430b) (BuildId: 55d61c384269e45091eab9e64ce797eb88d56ade) #1 mongo::repl::OpsAndSessionsKiller::killConflictingOpsAndSessionsOnStepUpAndStepDown() src/mongo/db/repl/auto_get_rstl_for_stepup_stepdown.cpp:240:51 (libauto_get_rstl_for_stepup_stepdown.so+0xc430b) #2 mongo::repl::AutoGetRstlForStepUpStepDown::_killOpThreadFn(mongo::Date_t) src/mongo/db/repl/auto_get_rstl_for_stepup_stepdown.cpp:137:16 (libauto_get_rstl_for_stepup_stepdown.so+0xc39bb) (BuildId: 55d61c384269e45091eab9e64ce797eb88d56ade) #3 mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0::operator()() const src/mongo/db/repl/auto_get_rstl_for_stepup_stepdown.cpp:120:71 (libauto_get_rstl_for_stepup_stepdown.so+0xc5682) (BuildId: 55d61c384269e45091eab9e64ce797eb88d56ade) #4 void std::__invoke_impl<void, mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0>(std::__invoke_other, mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0&&) external/mongo_toolchain_v5/stow/gcc-v5/include/c++/14.2.0/bits/invoke.h:61:14 (libauto_get_rstl_for_stepup_stepdown.so+0xc5682) #5 std::__invoke_result<mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0>::type std::__invoke<mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0>(mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0&&) external/mongo_toolchain_v5/stow/gcc-v5/include/c++/14.2.0/bits/invoke.h:96:14 (libauto_get_rstl_for_stepup_stepdown.so+0xc5682) #6 decltype(auto) std::__apply_impl<mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0, std::tuple<>>(mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0&&, std::tuple<>&&, std::integer_sequence<unsigned long, ...>) external/mongo_toolchain_v5/stow/gcc-v5/include/c++/14.2.0/tuple:2921:14 (libauto_get_rstl_for_stepup_stepdown.so+0xc5682) #7 decltype(auto) std::apply<mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0, std::tuple<>>(mongo::repl::AutoGetRstlForStepUpStepDown::_startKillOpThread(mongo::Date_t)::$_0&&, std::tuple<>&&) external/mongo_toolchain_v5/stow/gcc-v5/include/c++/14.2.0/tuple:2936:14 (libauto_get_rstl_for_stepup_stepdown.so+0xc5682) ... SUMMARY: ThreadSanitizer: data race src/mongo/db/operation_context.h:358:15 in mongo::OperationContext::isRetryableWrite() const
Previous write of size 1 at 0x7254004d5a4a by thread T373 (mutexes: write M1): #0 mongo::OperationContext::setInMultiDocumentTransaction() src/mongo/db/operation_context.h:350:37 (libshared_request_handling.so+0xb38e8) (BuildId: 83a0a9d5f49e8e537d7ebae11ed3d0dc447c76e1) #1 mongo::initializeOperationSessionInfo(mongo::OperationContext*, boost::optional<mongo::TenantId> const&, mongo::OperationSessionInfoFromClientBase const&, bool, bool, bool) src/mongo/db/initialize_operation_session_info.cpp:192:16 (libshared_request_handling.so+0xb38e8) #2 mongo::(anonymous namespace)::ExecCommandDatabase::_initiateCommand() src/mongo/db/service_entry_point_shard_role.cpp:1591:23 (libservice_context_d.so+0xee04c) (BuildId: 4a5b75e8cb760e2438fdbef1d7d06888761b26b7) #3 mongo::(anonymous namespace)::ExecCommandDatabase::run()::'lambda'()::operator()() const src/mongo/db/service_entry_point_shard_role.cpp:486:17 (libservice_context_d.so+0xee04c) #4 mongo::(anonymous namespace)::ExecCommandDatabase::run() src/mongo/db/service_entry_point_shard_role.cpp:484:23 (libservice_context_d.so+0xeb11d) (BuildId: 4a5b75e8cb760e2438fdbef1d7d06888761b26b7) #5 mongo::(anonymous namespace)::executeCommand(mongo::(anonymous namespace)::HandleRequest::ExecutionContext&) src/mongo/db/service_entry_point_shard_role.cpp:2256:16 (libservice_context_d.so+0xeb11d) #6 mongo::(anonymous namespace)::receivedCommands(mongo::(anonymous namespace)::HandleRequest::ExecutionContext&) src/mongo/db/service_entry_point_shard_role.cpp:2327:9 (libservice_context_d.so+0xeb11d) #7 mongo::(anonymous namespace)::HandleRequest::runOperation() src/mongo/db/service_entry_point_shard_role.cpp:2386:20 (libservice_context_d.so+0xe8147) (BuildId: 4a5b75e8cb760e2438fdbef1d7d06888761b26b7) #8 mongo::ServiceEntryPointShardRole::handleRequest(mongo::OperationContext*, mongo::Message const&, mongo::Date_t) src/mongo/db/service_entry_point_shard_role.cpp:2491:28 (libservice_context_d.so+0xe8147) #9 mongo::transport::SessionWorkflow::Impl::_dispatchWork() src/mongo/transport/session_workflow.cpp:809:18 (libsession_manager.so+0x100426) (BuildId: c95f641d50a4996217978ec01b97fccc647ef8f5) #10 _ZZN5mongo9transport15SessionWorkflow4Impl15_doOneIterationEvENK3$_0clISt10unique_ptrINS2_8WorkItemESt14default_deleteIS6_EEEEDaT_ src/mongo/transport/session_workflow.cpp:877:20 (libsession_manager.so+0x10443a) (BuildId: c95f641d50a4996217978ec01b97fccc647ef8f5) #11 auto mongo::future_details::call<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&, std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&, std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>&&) src/mongo/util/future_impl.h:253:12 (libsession_manager.so+0x10443a) #12 auto mongo::future_details::throwingCall<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&, std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&, std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>&&) src/mongo/util/future_impl.h:311:16 (libsession_manager.so+0x10443a) #13 auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) &&::'lambda'(std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>&&)::operator()(std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>&&) const src/mongo/util/future_impl.h:975:60 (libsession_manager.so+0x10443a) #14 auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::generalImpl<auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) &&::'lambda'(std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>&&), auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) &&::'lambda'(mongo::Status&&), auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) &&::'lambda'()>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&, auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) &&::'lambda'(mongo::Status&&)&&, auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) &&::'lambda'()&&) src/mongo/util/future_impl.h:1253:20 (libsession_manager.so+0x1013be) (BuildId: c95f641d50a4996217978ec01b97fccc647ef8f5) #15 auto mongo::future_details::FutureImpl<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0&&) && src/mongo/util/future_impl.h:971:20 (libsession_manager.so+0x1013be) #16 auto mongo::Future<std::unique_ptr<mongo::transport::SessionWorkflow::Impl::WorkItem, std::default_delete<mongo::transport::SessionWorkflow::Impl::WorkItem>>>::then<mongo::transport::SessionWorkflow::Impl::_doOneIteration()::$_0>(T&&) && src/mongo/util/future.h:419:53 (libsession_manager.so+0x1013be) #17 mongo::transport::SessionWorkflow::Impl::_doOneIteration() src/mongo/transport/session_workflow.cpp:873:10 (libsession_manager.so+0x1013be) #18 mongo::transport::SessionWorkflow::Impl::_scheduleIteration()::$_0::operator()(mongo::Status) const src/mongo/transport/session_workflow.cpp:918:17 (libsession_manager.so+0x1058ae) (BuildId: c95f641d50a4996217978ec01b97fccc647ef8f5) #19 auto mongo::unique_function<void (mongo::Status)>::makeImpl<mongo::transport::SessionWorkflow::Impl::_scheduleIteration()::$_0>(mongo::transport::SessionWorkflow::Impl::_scheduleIteration()::$_0&&)::SpecificImpl::call(mongo::Status&&) src/mongo/util/functional.h:264:21 (libsession_manager.so+0x1058ae) #20 mongo::unique_function<void (mongo::Status)>::operator()(mongo::Status) const src/mongo/util/functional.h:222:22 (libsession_manager.so+0x10f02b) (BuildId: c95f641d50a4996217978ec01b97fccc647ef8f5) #21 mongo::transport::SessionWorkflow::Impl::_captureContext(mongo::unique_function<void (mongo::Status)>)::'lambda'(mongo::Status)::operator()(mongo::Status)::'lambda'()::operator()() const src/mongo/transport/session_workflow.cpp:511:38 (libsession_manager.so+0x10f02b) #22 void mongo::ClientStrand::run<mongo::transport::SessionWorkflow::Impl::_captureContext(mongo::unique_function<void (mongo::Status)>)::'lambda'(mongo::Status)::operator()(mongo::Status)::'lambda'()>(mongo::transport::SessionWorkflow::Impl::_captureContext(mongo::unique_function<void (mongo::Status)>)::'lambda'(mongo::Status)::operator()(mongo::Status)::'lambda'()) src/mongo/db/client_strand.h:177:16 (libsession_manager.so+0x10f02b) #23 mongo::transport::SessionWorkflow::Impl::_captureContext(mongo::unique_function<void (mongo::Status)>)::'lambda'(mongo::Status)::operator()(mongo::Status) src/mongo/transport/session_workflow.cpp:511:28 (libsession_manager.so+0x10ee8e) (BuildId: c95f641d50a4996217978ec01b97fccc647ef8f5) #24 auto mongo::unique_function<void (mongo::Status)>::makeImpl<mongo::transport::SessionWorkflow::Impl::_captureContext(mongo::unique_function<void (mongo::Status)>)::'lambda'(mongo::Status)>(mongo::transport::SessionWorkflow::Impl::_captureContext(mongo::unique_function<void (mongo::Status)>)::'lambda'(mongo::Status)&&)::SpecificImpl::call(mongo::Status&&) src/mongo/util/functional.h:264:21 (libsession_manager.so+0x10ee8e) #25 mongo::unique_function<void (mongo::Status)>::operator()(mongo::Status) const src/mongo/util/functional.h:222:22 (libservice_executor.so+0xca043) (BuildId: 3e5be340cb4cf4688cac1ce1705538e3161fe11b)
OperationContext::isRetryableWrite() accesses the _inMultiDocumentTransaction opCtx's field. killConflictingOpsAndSessionsOnStepUpAndStepDown() calls OperationContext::isRetryableWrite() with a lock on the client, but we callĀ OperationContext::setInMultiDocumentTransaction() without taking a lock on the corresponding client in various places. Which leads to this data race.
To fix this race we should take the lock on the opCtx's client everytime we call OperationContext::setInMultiDocumentTransaction().
- is caused by
-
SERVER-110728 Retried retryable writes are not interrupted on stepDown
-
- Closed
-