-
Type: Bug
-
Resolution: Fixed
-
Priority: Major - P3
-
Affects Version/s: None
-
Component/s: None
-
None
-
Storage Execution
-
Fully Compatible
-
ALL
-
Execution Team 2023-04-03
-
136
Oplog application holds the PBWM lock in exclusive mode and is waiting on the _oplogPinnedByBackupMutex.
Thread 52: "" (Thread 0x7f6d94df3700 (LWP 23283)) #0 0x00007f6dca3e089d in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007f6dca3d9b59 in pthread_mutex_lock () from /lib64/libpthread.so.0 #2 0x00007f6dcc519000 in __gthread_mutex_lock (__mutex=0x7f6db9d0c408) at /opt/mongodbtoolchain/revisions/11316f1e7b36f08dcdd2ad0640af18f9287876f4/stow/gcc-v4.spX/include/c++/11.3.0/x86_64-mongodb-linux/bits/gthr-default.h:749 #3 std::mutex::lock (this=0x7f6db9d0c408) at /opt/mongodbtoolchain/revisions/11316f1e7b36f08dcdd2ad0640af18f9287876f4/stow/gcc-v4.spX/include/c++/11.3.0/bits/std_mutex.h:100 #4 mongo::latch_detail::Mutex::lock (this=0x7f6db9d0c3f0) at src/mongo/platform/mutex.cpp:74 #5 mongo::latch_detail::Mutex::lock (this=0x7f6db9d0c3f0) at src/mongo/platform/mutex.cpp:66 #6 0x00007f6dc12ecc2b in std::lock_guard<mongo::latch_detail::Latch>::lock_guard (__m=..., this=<synthetic pointer>) at /opt/mongodbtoolchain/revisions/11316f1e7b36f08dcdd2ad0640af18f9287876f4/stow/gcc-v4.spX/include/c++/11.3.0/bits/std_mutex.h:228 #7 mongo::WiredTigerKVEngine::getPinnedOplog (this=0x7f6db9d0c020) at src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp:2462 #8 0x00007f6dc1312934 in mongo::WiredTigerRecordStore::getPinnedOplog (this=<optimized out>) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:763 #9 mongo::WiredTigerRecordStore::OplogTruncateMarkers::_hasExcessMarkers (opCtx=<optimized out>, this=0x7f6db1f826b0) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:229 #10 mongo::WiredTigerRecordStore::OplogTruncateMarkers::_hasExcessMarkers (this=0x7f6db1f826b0, opCtx=<optimized out>) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:214 #11 0x00007f6dc56632ad in mongo::CollectionTruncateMarkers::pokeReclaimThreadIfNeeded (this=0x7f6db1f826b0, opCtx=<optimized out>) at src/mongo/db/storage/collection_markers.cpp:255 #12 0x00007f6dc5668efd in mongo::CollectionTruncateMarkers::createNewMarkerIfNeeded (this=0x7f6db1f826b0, opCtx=0x7f6daaa38f20, lastRecord=..., wallTime=...) at src/mongo/db/storage/collection_markers.cpp:169 #13 0x00007f6dc2290709 in mongo::RecoveryUnit::_executeCommitHandlers (this=0x7f6d9b85b020, commitTimestamp=...) at src/mongo/db/storage/recovery_unit.cpp:146 #14 0x00007f6dc2290cfe in mongo::RecoveryUnit::commitRegisteredChanges (this=this@entry=0x7f6d9b85b020, commitTimestamp=...) at src/mongo/db/storage/recovery_unit.cpp:102 #15 0x00007f6dc13354b6 in mongo::WiredTigerRecoveryUnit::_commit (this=0x7f6d9b85b020) at src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp:109 #16 mongo::WiredTigerRecoveryUnit::doCommitUnitOfWork (this=0x7f6d9b85b020) at src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp:156 #17 0x00007f6dc2291cc3 in mongo::RecoveryUnit::commitUnitOfWork (this=0x7f6d9b85b020) at src/mongo/db/storage/recovery_unit.cpp:114 #18 0x00007f6dc229cc94 in mongo::WriteUnitOfWork::commit (this=this@entry=0x7f6d94df1650) at /opt/mongodbtoolchain/revisions/11316f1e7b36f08dcdd2ad0640af18f9287876f4/stow/gcc-v4.spX/include/c++/11.3.0/bits/unique_ptr.h:173 #19 0x00007f6dc7c72338 in mongo::repl::(anonymous namespace)::_insertDocumentsToOplogAndChangeCollections (opCtx=opCtx@entry=0x7f6daaa38f20, begin=begin@entry={ stmtIds = std::vector of length 1, capacity 1 = {-1}, oplogSlot = { _timestamp = { i = 4, secs = 1679204442 }, _term = 1 }, recordId = "null RecordId", doc = owned BSONObj 102592 bytes @ 0x7f6d8fd9605f = { [op] = "i", [ns] = "test.alphaPost", [ui] = {"$binary": {"base64": "VLJexFfbTruplwvmga+U5Q==", "subType": "04"}}, [o] = {"_id": {"$oid": "6416a05a3a779e25ac478d76"}, "docNum": 8.0, "payload": "..."}, [o2] = {"_id": {"$oid": "6416a05a3a779e25ac478d76"}}, [ts] = {"$timestamp": {"t": 1679204442, "i": 4}}, [t] = 1, [v] = 2, [wall] = {"$date": "2023-03-19T05:40:42.339Z"} } }, end=end@entry={ stmtIds = std::vector of length 933982117, capacity -904252160475958434 = {<error reading variable: Cannot access memory at address 0x58>, skipWritesToOplog=<optimized out>) at src/mongo/db/repl/oplog_applier_impl.cpp:173 #20 0x00007f6dc7c729f5 in operator()<mongo::OperationContext, __gnu_cxx::__normal_iterator<const mongo::InsertStatement*, std::vector<mongo::InsertStatement> >, __gnu_cxx::__normal_iterator<const mongo::InsertStatement*, std::vector<mongo::InsertStatement> > > (end=..., begin=..., opCtx=0x7f6daaa38f20, __closure=<optimized out>) at src/mongo/db/repl/oplog_applier_impl.cpp:474 #21 operator() (__closure=<optimized out>) at src/mongo/db/storage/storage_util.h:127 #22 mongo::writeConflictRetry<mongo::storage_helpers::insertBatchAndHandleRetry<mongo::repl::scheduleWritesToOplogAndChangeCollection(mongo::OperationContext*, mongo::repl::StorageInterface*, mongo::ThreadPool*, const std::vector<mongo::repl::OplogEntry>&, bool)::<lambda(size_t, size_t)>::<lambda(auto:104)>::<lambda(auto:105*, auto:106, auto:107)> >(mongo::OperationContext*, const mongo::NamespaceStringOrUUID&, const std::vector<mongo::InsertStatement>&, mongo::repl::scheduleWritesToOplogAndChangeCollection(mongo::OperationContext*, mongo::repl::StorageInterface*, mongo::ThreadPool*, const std::vector<mongo::repl::OplogEntry>&, bool)::<lambda(size_t, size_t)>::<lambda(auto:104)>::<lambda(auto:105*, auto:106, auto:107)>&&)::<lambda()> > (f=..., ns=..., opStr=..., opCtx=0x7f6daaa38f20) at src/mongo/db/concurrency/exception_util.h:142 #23 mongo::storage_helpers::insertBatchAndHandleRetry<mongo::repl::scheduleWritesToOplogAndChangeCollection(mongo::OperationContext*, mongo::repl::StorageInterface*, mongo::ThreadPool*, const std::vector<mongo::repl::OplogEntry>&, bool)::<lambda(size_t, size_t)>::<lambda(auto:104)>::<lambda(auto:105*, auto:106, auto:107)> > (insertFn=..., docs=std::vector of length 1, capacity 1 = {...}, nsOrUUID=..., opCtx=0x7f6daaa38f20) at src/mongo/db/storage/storage_util.h:126 #24 operator()<mongo::Status> (__closure=0x7f6db145b468, status=Status::OK()) at src/mongo/db/repl/oplog_applier_impl.cpp:474 ...
Opening a backup cursor holds the _oplogPinnedByBackupMutex and is waiting on the PBWM intent lock.
Thread 43: "" (Thread 0x7f6d9b95c700 (LWP 23215)) #0 0x00007f6dca0fc7e1 in poll () from /lib64/libc.so.6 #1 0x00007f6dc98d2fa5 in poll (__timeout=100, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/bits/poll2.h:46 #2 operator() (__closure=<optimized out>) at src/mongo/transport/asio/asio_networking_baton.cpp:383 #3 mongo::transport::AsioNetworkingBaton::_poll[abi:cxx11](std::unique_lock<mongo::latch_detail::Mutex>&, mongo::ClockSource*) (this=0x7f6d944551f0, lk=..., clkSource=<optimized out>) at src/mongo/transport/asio/asio_networking_baton.cpp:390 #4 0x00007f6dc98d442c in mongo::transport::AsioNetworkingBaton::run (this=0x7f6d944551f0, clkSource=0x7f6dba0678e0) at src/mongo/transport/asio/asio_networking_baton.cpp:210 #5 0x00007f6dc98d11a4 in mongo::transport::AsioNetworkingBaton::run_until (this=this@entry=0x7f6d944551f0, clkSource=0x7f6dba0678e0, deadline=...) at src/mongo/transport/asio/asio_networking_baton.cpp:173 #6 0x00007f6dc22aec9a in mongo::Waitable::wait_until<mongo::BasicLockableAdapter>(mongo::Waitable*, mongo::ClockSource*, mongo::stdx::condition_variable&, mongo::BasicLockableAdapter&, std::chrono::time_point<std::chrono::_V2::system_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > const&)::{lambda()#1}::operator()() const (__closure=<optimized out>) at src/mongo/util/waitable.h:94 #7 mongo::stdx::condition_variable::_runWithNotifyable<mongo::Waitable::wait_until<mongo::BasicLockableAdapter>(mongo::Waitable*, mongo::ClockSource*, mongo::stdx::condition_variable&, mongo::BasicLockableAdapter&, std::chrono::time_point<std::chrono::_V2::system_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > const&)::{lambda()#1}>(mongo::Notifyable&, mongo::BasicLockableAdapter&&) (cb=..., notifyable=..., this=0x7f6d922cf158) at src/mongo/stdx/condition_variable.h:162 #8 mongo::Waitable::wait_until<mongo::BasicLockableAdapter> (timeout_time=..., lk=<synthetic pointer>..., cv=..., clkSource=<optimized out>, waitable=0x7f6d944551f0) at src/mongo/util/waitable.h:92 #9 mongo::ClockSource::waitForConditionUntil (this=<optimized out>, cv=..., bla=..., deadline=..., deadline@entry=..., waitable=0x7f6d944551f0) at src/mongo/util/clock_source.cpp:48 #10 0x00007f6dc2334fc0 in operator() (__closure=<optimized out>) at src/mongo/util/lockable_adapter.h:50 #11 mongo::OperationContext::waitForConditionOrInterruptNoAssertUntil (this=0x7f6db1e817a0, cv=..., m=..., deadline=...) at src/mongo/db/operation_context.cpp:326 #12 0x00007f6dc37bf3d6 in operator() (__closure=__closure@entry=0x7f6d9b957080, deadline=..., deadline@entry=..., speed=speed@entry=mongo::Interruptible::WakeSpeed::kFast) at src/mongo/util/interruptible.h:307 #13 0x00007f6dc37c26b5 in operator() (speed=mongo::Interruptible::WakeSpeed::kFast, deadline=..., __closure=<synthetic pointer>) at src/mongo/util/interruptible.h:340 #14 mongo::Interruptible::waitForConditionOrInterruptUntil<std::unique_lock<mongo::latch_detail::Latch>, mongo::CondVarLockGrantNotification::wait(mongo::OperationContext*, mongo::Milliseconds)::<lambda()> > (pred=..., finalDeadline=..., m=..., cv=..., this=0x7f6db1e817a0) at src/mongo/util/interruptible.h:352 #15 mongo::Interruptible::waitForConditionOrInterruptFor<std::unique_lock<mongo::latch_detail::Latch>, mongo::CondVarLockGrantNotification::wait(mongo::OperationContext*, mongo::Milliseconds)::<lambda()> > (pred=..., ms=..., m=..., cv=..., this=0x7f6db1e817a0) at src/mongo/util/interruptible.h:392 #16 mongo::CondVarLockGrantNotification::wait (this=<optimized out>, opCtx=0x7f6db1e817a0, timeout=...) at src/mongo/db/concurrency/lock_state.cpp:276 #17 0x00007f6dc37c57fa in mongo::LockerImpl::_lockComplete(mongo::OperationContext*, mongo::ResourceId, mongo::LockMode, mongo::Date_t, std::function<void ()> const&) (this=0x7f6d922cf020, opCtx=0x7f6db1e817a0, resId=..., mode=mongo::MODE_IS, deadline=..., onTimeout=...) at src/mongo/db/concurrency/lock_state.cpp:1009 #18 0x00007f6dc37c5fb9 in mongo::LockerImpl::lock (this=0x7f6d922cf020, opCtx=0x7f6db1e817a0, resId=..., mode=mongo::MODE_IS, deadline=...) at src/mongo/db/concurrency/lock_state.cpp:572 #19 0x00007f6dc37ae053 in mongo::Lock::ResourceLock::_lock (deadline=..., mode=mongo::MODE_IS, this=0x7f6d9b9575e8) at src/mongo/db/concurrency/d_concurrency.cpp:256 #20 mongo::Lock::ResourceLock::ResourceLock (deadline=..., mode=mongo::MODE_IS, rid=..., opCtx=0x7f6db1e817a0, this=0x7f6d9b9575e8) at src/mongo/db/concurrency/d_concurrency.h:56 #21 boost::optional_detail::optional_base<mongo::Lock::ResourceLock>::construct<mongo::OperationContext*&, mongo::ResourceId const&, mongo::LockMode, mongo::Date_t&> (this=0x7f6d9b9575e0) at src/third_party/boost/boost/optional/optional.hpp:428 #22 boost::optional_detail::optional_base<mongo::Lock::ResourceLock>::emplace_assign<mongo::OperationContext*&, mongo::ResourceId const&, mongo::LockMode, mongo::Date_t&> (this=0x7f6d9b9575e0) at src/third_party/boost/boost/optional/optional.hpp:436 #23 boost::optional<mongo::Lock::ResourceLock>::emplace<mongo::OperationContext*&, mongo::ResourceId const&, mongo::LockMode, mongo::Date_t&> (this=0x7f6d9b9575e0) at src/third_party/boost/boost/optional/optional.hpp:1169 #24 mongo::Lock::GlobalLock::GlobalLock (this=0x7f6d9b9575d0, opCtx=0x7f6db1e817a0, lockMode=mongo::MODE_IS, deadline=..., behavior=<optimized out>, options=...) at src/mongo/db/concurrency/d_concurrency.cpp:111 #25 0x00007f6dc12ed271 in mongo::Lock::GlobalLock::GlobalLock (lockMode=mongo::MODE_IS, opCtx=0x7f6db1e817a0, this=0x7f6d9b9575d0) at src/mongo/db/concurrency/d_concurrency.h:231 #26 mongo::WiredTigerKVEngine::beginNonBlockingBackup (this=0x7f6db9d0c020, opCtx=0x7f6db1e817a0, checkpointTimestamp=..., options=...) at src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp:1271 #27 0x00007f6dbede175c in non-virtual thunk to mongo::StorageEngineImpl::beginNonBlockingBackup(mongo::OperationContext*, boost::optional<mongo::Timestamp>, mongo::StorageEngine::BackupOptions const&) () at /opt/mongodbtoolchain/revisions/11316f1e7b36f08dcdd2ad0640af18f9287876f4/stow/gcc-v4.spX/include/c++/11.3.0/bits/unique_lock.h:192 #28 0x00007f6dbec527ca in mongo::BackupCursorService::openBackupCursor (this=0x7f6db9d02fe0, opCtx=0x7f6db1e817a0, options=...) at src/mongo/db/modules/enterprise/src/hot_backups/backup_cursor_service.cpp:121 #29 0x00007f6dc71f8365 in mongo::CommonMongodProcessInterface::openBackupCursor (this=<optimized out>, opCtx=0x7f6db1e817a0, options=...) at src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp:543 #30 0x00007f6dbec5ea2e in mongo::DocumentSourceBackupCursor::DocumentSourceBackupCursor (this=0x7f6db1779ca0, pExpCtx=..., options=...) at src/mongo/db/modules/enterprise/src/hot_backups/document_source_backup_cursor.cpp:32 #31 0x00007f6dbec5ed9f in mongo::DocumentSourceBackupCursor::createFromBson (spec=..., pExpCtx=0x7f6db1d4be20 = {...}) at src/mongo/db/modules/enterprise/src/hot_backups/document_source_backup_cursor.cpp:183 ...
This deadlock is only possible as of SERVER-46870
- is caused by
-
SERVER-46870 Generalize the WiredTigerRecordStore::OplogStones and move it out of WiredTigerRecordStore
- Closed
- is related to
-
SERVER-75435 Investigate scope of _oplogPinnedByBackupMutex in WiredTigerKVEngine::beginNonBlockingBackupCursor()
- Closed