See the following call stack. The M/R doesn't hold a DB lock, but is stuck at eviction. The problem is that the recovery unit has not been reset and hence we still hold a snapshot.
The way to fix this is to add a ScopedTransaction.
Thread 6427 (Thread 0x7ffff7fcc700 (LWP 11443)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238 #1 0x00000000013188a2 in boost::condition_variable::timed_wait (this=0x160edddd0, m=..., wait_until=...) at src/third_party/boost/boost/thread/pthread/condition_variable.hpp:74 #2 0x0000000001318a4e in boost::condition_variable::timed_wait<boost::date_time::subsecond_duration<boost::posix_time::time_duration, 1000ll> > (this=0x160edddd0, m=..., wait_duration=...) at src/third_party/boost/boost/thread/pthread/condition_variable_fwd.hpp:72 #3 0x00000000013174d0 in mongo::CondVarLockGrantNotification::wait (this=0x160eddda0, timeoutMs=500) at src/mongo/db/concurrency/lock_state.cpp:189 #4 0x000000000131bdfa in mongo::LockerImpl<false>::lockComplete (this=0x160edd800, resId=..., timeoutMs=4294967295, checkDeadlock=false) at src/mongo/db/concurrency/lock_state.cpp:607 #5 0x000000000131ada2 in mongo::LockerImpl<false>::lockGlobalComplete (this=0x160edd800, timeoutMs=4294967295) at src/mongo/db/concurrency/lock_state.cpp:256 #6 0x000000000131acef in mongo::LockerImpl<false>::lockGlobal (this=0x160edd800, mode=mongo::MODE_IX, timeoutMs=4294967295) at src/mongo/db/concurrency/lock_state.cpp:232 #7 0x000000000130adc8 in mongo::Lock::DBLock::DBLock (this=0x731f440, lockState=0x160edd800, db=..., mode=mongo::MODE_IX) at src/mongo/db/concurrency/d_concurrency.cpp:145 #8 0x00000000013013ae in mongo::WriteBatchExecutor::ExecInsertsState::_lockAndCheckImpl (this=0x7ffff7fc9e50, result=0x7ffff7fc89c0, intentLock=true) at src/mongo/db/commands/write_commands/batch_executor.cpp:974 #9 0x0000000001301bea in mongo::WriteBatchExecutor::ExecInsertsState::lockAndCheck (this=0x7ffff7fc9e50, result=0x7ffff7fc89c0) at src/mongo/db/commands/write_commands/batch_executor.cpp:1029 #10 0x0000000001301d89 in mongo::insertOne (state=0x7ffff7fc9e50, result=0x7ffff7fc89c0) at src/mongo/db/commands/write_commands/batch_executor.cpp:1057 #11 0x0000000001302013 in mongo::WriteBatchExecutor::execOneInsert (this=0x7ffff7fca260, state=0x7ffff7fc9e50, error=0x7ffff7fc9e28) at src/mongo/db/commands/write_commands/batch_executor.cpp:1086 #12 0x0000000001300a02 in mongo::WriteBatchExecutor::execInserts (this=0x7ffff7fca260, request=..., errors=0x7ffff7fca0e0) at src/mongo/db/commands/write_commands/batch_executor.cpp:877 #13 0x0000000001300475 in mongo::WriteBatchExecutor::bulkExecute (this=0x7ffff7fca260, request=..., upsertedIds=0x7ffff7fca100, errors=0x7ffff7fca0e0) at src/mongo/db/commands/write_commands/batch_executor.cpp:759 #14 0x00000000012fe479 in mongo::WriteBatchExecutor::executeBatch (this=0x7ffff7fca260, request=..., response=0x7ffff7fca2a0) at src/mongo/db/commands/write_commands/batch_executor.cpp:268 #15 0x0000000001308266 in mongo::WriteCmd::run (this=0x30bd2c0, txn=0x7ffff7fcb7e0, dbName=..., cmdObj=..., options=0, errMsg=..., result=..., fromRepl=false) at src/mongo/db/commands/write_commands/write_commands.cpp:144 #16 0x00000000013237d5 in mongo::_execCommand (txn=0x7ffff7fcb7e0, c=0x30bd2c0, dbname=..., cmdObj=..., queryOptions=0, errmsg=..., result=..., fromRepl=false) at src/mongo/db/dbcommands.cpp:1263 #17 0x0000000001324752 in mongo::Command::execCommand (txn=0x7ffff7fcb7e0, c=0x30bd2c0, queryOptions=0, cmdns=0xe89c0814 "test.$cmd", cmdObj=..., result=..., fromRepl=false) at src/mongo/db/dbcommands.cpp:1479 #18 0x0000000001325034 in mongo::_runCommands (txn=0x7ffff7fcb7e0, ns=0xe89c0814 "test.$cmd", _cmdobj=..., b=..., anObjBuilder=..., fromRepl=false, queryOptions=0) at src/mongo/db/dbcommands.cpp:1554 #19 0x00000000015271cc in mongo::runCommands (txn=0x7ffff7fcb7e0, ns=0xe89c0814 "test.$cmd", jsobj=..., curop=..., b=..., anObjBuilder=..., fromRepl=false, queryOptions=0) at src/mongo/db/query/find.cpp:135 #20 0x0000000001528f58 in mongo::runQuery (txn=0x7ffff7fcb7e0, m=..., q=..., curop=..., result=..., fromDBDirectClient=false) at src/mongo/db/query/find.cpp:569 #21 0x000000000142eff3 in mongo::receivedQuery (txn=0x7ffff7fcb7e0, c=..., dbresponse=..., m=..., fromDBDirectClient=false) at src/mongo/db/instance.cpp:225 #22 0x0000000001430105 in mongo::assembleResponse (txn=0x7ffff7fcb7e0, m=..., dbresponse=..., remote=..., fromDBDirectClient=false) at src/mongo/db/instance.cpp:395 #23 0x0000000001132d2a in mongo::MyMessageHandler::process (this=0x307e190, m=..., port=0x3c0fde780, le=0x1f7175630) at src/mongo/db/db.cpp:197 #24 0x00000000018c5253 in mongo::PortMessageServer::handleIncomingMsg (arg=0x3c0fde780) at src/mongo/util/net/message_server_port.cpp:225 #25 0x00007ffff7bc4182 in start_thread (arg=0x7ffff7fcc700) at pthread_create.c:312 #26 0x00007ffff6cc4efd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 Thread 6424 (Thread 0x7ffff15f3700 (LWP 11422)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238 #1 0x0000000001eba83e in __wt_cond_wait (session=0x352b440, cond=0x30bd560, usecs=100000) at src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c:78 78 ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts); (gdb) p *cond $20 = {name = 0x212ee56 "eviction waiters", mtx = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 12 times>, "\001", '\000' <repeats 26 times>, __align = 0}, cond = {__data = {__lock = 0, __futex = 186927, __total_seq = 93464, __wakeup_seq = 93463, __woken_seq = 93463, __mutex = 0x30bd568, __nwaiters = 2, __broadcast_seq = 66195}, __size = "\000\000\000\000/\332\002\000\030m\001\000\000\000\000\000\027m\001\000\000\000\000\000\027m\001\000\000\000\000\000h\325\v\003\000\000\000\000\002\000\000\000\223\002\001", __align = 802845351739392}, waiters = 1} #2 0x0000000001e3ce39 in __wt_cache_full_check (session=0x352b440) at src/third_party/wiredtiger/src/include/cache.i:167 #3 0x0000000001e3cf2c in __cursor_enter (session=0x352b440) at src/third_party/wiredtiger/src/include/cursor.i:63 #4 0x0000000001e3cfbf in __curfile_enter (cbt=0x9ef5e840) at src/third_party/wiredtiger/src/include/cursor.i:98 #5 0x0000000001e3d134 in __cursor_func_init (cbt=0x9ef5e840, reenter=1) at src/third_party/wiredtiger/src/include/cursor.i:185 #6 0x0000000001e3d87f in __wt_btcur_search (cbt=0x9ef5e840) at src/third_party/wiredtiger/src/btree/bt_cursor.c:308 #7 0x0000000001e7dfe9 in __curfile_search (cursor=0x9ef5e840) at src/third_party/wiredtiger/src/cursor/cur_file.c:167 #8 0x0000000001724c95 in mongo::WiredTigerRecordStore::updateRecord (this=0x3524000, txn=0x7ffff15f27e0, loc=..., data=0x11f7f5b04 "Q\001", len=337, enforceQuota=false, notifier=0x0) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:552 #9 0x0000000001697568 in mongo::KVCatalog::renameCollection (this=0x3398500, opCtx=0x7ffff15f27e0, fromNS=..., toNS=..., stayTemp=false) at src/mongo/db/storage/kv/kv_catalog.cpp:353 #10 0x000000000169dfec in mongo::KVDatabaseCatalogEntry::renameCollection (this=0xae7686c0, txn=0x7ffff15f27e0, fromNS=..., toNS=..., stayTemp=false) at src/mongo/db/storage/kv/kv_database_catalog_entry.cpp:276 #11 0x0000000001261181 in mongo::Database::renameCollection (this=0x137df5c00, txn=0x7ffff15f27e0, fromNS=..., toNS=..., stayTemp=false) at src/mongo/db/catalog/database.cpp:466 #12 0x00000000012d8a1f in mongo::CmdRenameCollection::run (this=0x283cb60 <mongo::cmdrenamecollection>, txn=0x7ffff15f27e0, dbname=..., cmdObj=..., errmsg=..., result=..., fromRepl=false) at src/mongo/db/commands/rename_collection.cpp:232 #13 0x00000000013237d5 in mongo::_execCommand (txn=0x7ffff15f27e0, c=0x283cb60 <mongo::cmdrenamecollection>, dbname=..., cmdObj=..., queryOptions=0, errmsg=..., result=..., fromRepl=false) at src/mongo/db/dbcommands.cpp:1263 #14 0x0000000001324752 in mongo::Command::execCommand (txn=0x7ffff15f27e0, c=0x283cb60 <mongo::cmdrenamecollection>, queryOptions=0, cmdns=0x16085f04 "admin.$cmd", cmdObj=..., result=..., fromRepl=false) at src/mongo/db/dbcommands.cpp:1479 #15 0x0000000001325034 in mongo::_runCommands (txn=0x7ffff15f27e0, ns=0x16085f04 "admin.$cmd", _cmdobj=..., b=..., anObjBuilder=..., fromRepl=false, queryOptions=0) at src/mongo/db/dbcommands.cpp:1554 #16 0x00000000015271cc in mongo::runCommands (txn=0x7ffff15f27e0, ns=0x16085f04 "admin.$cmd", jsobj=..., curop=..., b=..., anObjBuilder=..., fromRepl=false, queryOptions=0) at src/mongo/db/query/find.cpp:135 #17 0x0000000001528f58 in mongo::runQuery (txn=0x7ffff15f27e0, m=..., q=..., curop=..., result=..., fromDBDirectClient=true) at src/mongo/db/query/find.cpp:569 #18 0x000000000142eff3 in mongo::receivedQuery (txn=0x7ffff15f27e0, c=..., dbresponse=..., m=..., fromDBDirectClient=true) at src/mongo/db/instance.cpp:225 #19 0x0000000001430105 in mongo::assembleResponse (txn=0x7ffff15f27e0, m=..., dbresponse=..., remote=..., fromDBDirectClient=true) at src/mongo/db/instance.cpp:395 #20 0x0000000001331d5d in mongo::DBDirectClient::call (this=0x7ffff15f11f8, toSend=..., response=..., assertOk=false, actualServer=0x22d67fb58) at src/mongo/db/dbdirectclient.cpp:125 #21 0x00000000011a0d15 in mongo::DBClientCursor::init (this=0x22d67fb30) at src/mongo/client/dbclientcursor.cpp:84 #22 0x0000000001187a34 in mongo::DBClientBase::query (this=0x7ffff15f11f8, ns=..., query=..., nToReturn=1, nToSkip=0, fieldsToReturn=0x0, queryOptions=0, batchSize=0) at src/mongo/client/dbclient.cpp:1149 #23 0x0000000001331f7c in mongo::DBDirectClient::query (this=0x7ffff15f11f8, ns=..., query=..., nToReturn=1, nToSkip=0, fieldsToReturn=0x0, queryOptions=0, batchSize=0) at src/mongo/db/dbdirectclient.cpp:159 #24 0x0000000001185f80 in mongo::DBClientInterface::findN (this=0x7ffff15f11f8, out=..., ns=..., query=..., nToReturn=1, nToSkip=0, fieldsToReturn=0x0, queryOptions=0) at src/mongo/client/dbclient.cpp:990 #25 0x00000000011863b1 in mongo::DBClientInterface::findOne (this=0x7ffff15f11f8, ns=..., query=..., fieldsToReturn=0x0, queryOptions=0) at src/mongo/client/dbclient.cpp:1009 #26 0x00000000011813d6 in mongo::DBClientWithCommands::runCommand (this=0x7ffff15f11f8, dbname=..., cmd=..., info=..., options=0) at src/mongo/client/dbclient.cpp:451 #27 0x00000000012bdc1d in mongo::mr::State::postProcessCollectionNonAtomic (this=0x7ffff15f11f0, txn=0x7ffff15f27e0, op=0x1255d8800, pm=...) at src/mongo/db/commands/mr.cpp:595 #28 0x00000000012bd7e1 in mongo::mr::State::postProcessCollection (this=0x7ffff15f11f0, txn=0x7ffff15f27e0, op=0x1255d8800, pm=...) at src/mongo/db/commands/mr.cpp:548 #29 0x00000000012c3c3e in mongo::mr::MapReduceCommand::run (this=0x283c680 <mongo::mr::mapReduceCommand>, txn=0x7ffff15f27e0, dbname=..., cmd=..., errmsg=..., result=..., fromRepl=false) at src/mongo/db/commands/mr.cpp:1468 #30 0x00000000013237d5 in mongo::_execCommand (txn=0x7ffff15f27e0, c=0x283c680 <mongo::mr::mapReduceCommand>, dbname=..., cmdObj=..., queryOptions=0, errmsg=..., result=..., fromRepl=false) at src/mongo/db/dbcommands.cpp:1263 #31 0x0000000001324752 in mongo::Command::execCommand (txn=0x7ffff15f27e0, c=0x283c680 <mongo::mr::mapReduceCommand>, queryOptions=0, cmdns=0xd9c25814 "aggdb.$cmd", cmdObj=..., result=..., fromRepl=false) at src/mongo/db/dbcommands.cpp:1479 #32 0x0000000001325034 in mongo::_runCommands (txn=0x7ffff15f27e0, ns=0xd9c25814 "aggdb.$cmd", _cmdobj=..., b=..., anObjBuilder=..., fromRepl=false, queryOptions=0) at src/mongo/db/dbcommands.cpp:1554 #33 0x00000000015271cc in mongo::runCommands (txn=0x7ffff15f27e0, ns=0xd9c25814 "aggdb.$cmd", jsobj=..., curop=..., b=..., anObjBuilder=..., fromRepl=false, queryOptions=0) at src/mongo/db/query/find.cpp:135 #34 0x0000000001528f58 in mongo::runQuery (txn=0x7ffff15f27e0, m=..., q=..., curop=..., result=..., fromDBDirectClient=false) at src/mongo/db/query/find.cpp:569 #35 0x000000000142eff3 in mongo::receivedQuery (txn=0x7ffff15f27e0, c=..., dbresponse=..., m=..., fromDBDirectClient=false) at src/mongo/db/instance.cpp:225 #36 0x0000000001430105 in mongo::assembleResponse (txn=0x7ffff15f27e0, m=..., dbresponse=..., remote=..., fromDBDirectClient=false) at src/mongo/db/instance.cpp:395 #37 0x0000000001132d2a in mongo::MyMessageHandler::process (this=0x307e190, m=..., port=0x3c0fdfe50, le=0x110a42760) at src/mongo/db/db.cpp:197 #38 0x00000000018c5253 in mongo::PortMessageServer::handleIncomingMsg (arg=0x3c0fdfe50) at src/mongo/util/net/message_server_port.cpp:225 #39 0x00007ffff7bc4182 in start_thread (arg=0x7ffff15f3700) at pthread_create.c:312 #40 0x00007ffff6cc4efd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 Thread 6 (Thread 0x7ffff48fc700 (LWP 8406)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238 #1 0x0000000001eba83e in __wt_cond_wait (session=0x3528b00, cond=0x30bd800, usecs=60000000) at src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c:78 78 ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts); (gdb) p *cond $21 = {name = 0x212f235 "checkpoint server", mtx = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 12 times>, "\001", '\000' <repeats 26 times>, __align = 0}, cond = {__data = {__lock = 0, __futex = 2103, __total_seq = 1052, __wakeup_seq = 1051, __woken_seq = 1051, __mutex = 0x30bd808, __nwaiters = 2, __broadcast_seq = 0}, __size = "\000\000\000\000\067\b\000\000\034\004\000\000\000\000\000\000\033\004\000\000\000\000\000\000\033\004\000\000\000\000\000\000\b\330\v\003\000\000\000\000\002\000\000\000\000\000\000", __align = 9032316223488}, waiters = 1} #2 0x0000000001e6ec33 in __ckpt_server (arg=0x3528b00) at src/third_party/wiredtiger/src/conn/conn_ckpt.c:99 #3 0x00007ffff7bc4182 in start_thread (arg=0x7ffff48fc700) at pthread_create.c:312 #4 0x00007ffff6cc4efd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 Thread 5 (Thread 0x7ffff50fd700 (LWP 8405)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238 #1 0x0000000001eba83e in __wt_cond_wait (session=0x3528840, cond=0x30bd720, usecs=1000000) at src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c:78 78 ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts); (gdb) p *cond $22 = {name = 0x212f563 "log server", mtx = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 12 times>, "\001", '\000' <repeats 26 times>, __align = 0}, cond = {__data = {__lock = 0, __futex = 152955, __total_seq = 76478, __wakeup_seq = 76477, __woken_seq = 76477, __mutex = 0x30bd728, __nwaiters = 2, __broadcast_seq = 1052}, __size = "\000\000\000\000{U\002\000\276*\001\000\000\000\000\000\275*\001\000\000\000\000\000\275*\001\000\000\000\000\000(\327\v\003\000\000\000\000\002\000\000\000\034\004\000", __align = 656936722759680}, waiters = 1} #2 0x0000000001e7213f in __log_server (arg=0x3528840) at src/third_party/wiredtiger/src/conn/conn_log.c:319 #3 0x00007ffff7bc4182 in start_thread (arg=0x7ffff50fd700) at pthread_create.c:312 #4 0x00007ffff6cc4efd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 Thread 4 (Thread 0x7ffff58fe700 (LWP 8404)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238 #1 0x0000000001eba83e in __wt_cond_wait (session=0x3528580, cond=0x30bd5d0, usecs=10000000) at src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c:78 78 ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts); (gdb) p *cond $23 = {name = 0x212f78f "handle sweep server", mtx = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 12 times>, "\001", '\000' <repeats 26 times>, __align = 0}, cond = {__data = {__lock = 0, __futex = 15721, __total_seq = 7861, __wakeup_seq = 7860, __woken_seq = 7860, __mutex = 0x30bd5d8, __nwaiters = 2, __broadcast_seq = 0}, __size = "\000\000\000\000i=\000\000\265\036\000\000\000\000\000\000\264\036\000\000\000\000\000\000\264\036\000\000\000\000\000\000\330\325\v\003\000\000\000\000\002\000\000\000\000\000\000", __align = 67521180860416}, waiters = 1} #2 0x0000000001e7519e in __sweep_server (arg=0x3528580) at src/third_party/wiredtiger/src/conn/conn_sweep.c:123 #3 0x00007ffff7bc4182 in start_thread (arg=0x7ffff58fe700) at pthread_create.c:312 #4 0x00007ffff6cc4efd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 Thread 3 (Thread 0x7ffff60ff700 (LWP 8403)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238 #1 0x0000000001eba83e in __wt_cond_wait (session=0x35282c0, cond=0x30bd4f0, usecs=100000) at src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c:78 78 ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts); (gdb) p *cond $24 = {name = 0x212ee40 "cache eviction server", mtx = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 12 times>, "\001", '\000' <repeats 26 times>, __align = 0}, cond = {__data = {__lock = 0, __futex = 2349351, __total_seq = 1174676, __wakeup_seq = 1174675, __woken_seq = 1174675, __mutex = 0x30bd4f8, __nwaiters = 2, __broadcast_seq = 459392}, __size = "\000\000\000\000'\331#\000\224\354\021\000\000\000\000\000\223\354\021\000\000\000\000\000\223\354\021\000\000\000\000\000\370\324\v\003\000\000\000\000\002\000\000\000\200\002\a", __align = 10090385711824896}, waiters = 1} #2 0x0000000001e99061 in __evict_server (arg=0x35282c0) at src/third_party/wiredtiger/src/evict/evict_lru.c:196 #3 0x00007ffff7bc4182 in start_thread (arg=0x7ffff60ff700) at pthread_create.c:312 #4 0x00007ffff6cc4efd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111