Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-6231

Queries hang for Linkbench with a smaller WT cache

    • Type: Icon: Bug Bug
    • Resolution: Cannot Reproduce
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 4.4.0-rc5
    • Component/s: None
    • None
    • 8

      Using rc5 all queries hang during Linkbench with maxid=10m and WT cacheSizeGB=4. The database fits in the OS page cache but not in WT. This is intermittent – it didn't happen for 1 run with cacheSizeGB=2, it did happen for 1 run with cacheSizeGB=4.

      By hang I mean all operations have been running for ~140,000 seconds per db.currentOp().

      Next step is to try this for rc6

      I will attach ftdc, db.currentOp() output, mongod.log and stack traces from PMP. Here is a subset of the stack traces:

            5 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cache_eviction_worker,__session_begin_transaction,mongo::WiredTigerBeginTxnBlock::WiredTigerBeginTxnBlock(__wt_session*,,mongo::WiredTigerRecoveryUnit::_txnOpen(),mongo::WiredTigerRecoveryUnit::getSession(),mongo::WiredTigerCursor::WiredTigerCursor(std::__cxx11::basic_string<char,,mongo::WiredTigerIndexUnique::newCursor(mongo::OperationContext*,,mongo::AbstractIndexAccessMethod::findSingle(mongo::OperationContext*,,mongo::IDHackStage::doWork(unsigned,mongo::PlanStage::work(unsigned,mongo::PlanExecutorImpl::_getNextImpl(mongo::Snapshotted<mongo::Document>*,,mongo::PlanExecutorImpl::getNext(mongo::Document*,,mongo::(anonymous,mongo::CommandHelpers::runCommandInvocation(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::ServiceEntryPointCommon::handleRequest(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::handleRequest(mongo::OperationContext*,,mongo::ServiceStateMachine::_processMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,mongo::transport::ServiceExecutorSynchronous::schedule(std::function<void,mongo::ServiceStateMachine::_scheduleNextWithGuard(mongo::ServiceStateMachine::ThreadGuard,,mongo::ServiceStateMachine::_sourceCallback(mongo::Status),auto,mongo::ServiceStateMachine::_sourceMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,std::_Function_handler<void,std::_Function_handler<void,mongo::(anonymous,start_thread,clone
            3 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cache_eviction_worker,__session_begin_transaction,mongo::WiredTigerBeginTxnBlock::WiredTigerBeginTxnBlock(__wt_session*,,mongo::WiredTigerRecoveryUnit::_txnOpen(),mongo::WiredTigerRecoveryUnit::getSession(),mongo::(anonymous,mongo::RequiresIndexStage::doRestoreStateRequiresCollection(),mongo::RequiresCollectionStageBase<mongo::Collection,mongo::PlanStage::restoreState(),mongo::PlanStage::restoreState(),mongo::PlanExecutorImpl::restoreStateWithoutRetrying(),mongo::PlanYieldPolicy::yieldOrInterrupt(std::function<void,mongo::PlanExecutorImpl::_getNextImpl(mongo::Snapshotted<mongo::Document>*,,mongo::PlanExecutorImpl::getNext(mongo::Document*,,mongo::(anonymous,mongo::(anonymous,mongo::CommandHelpers::runCommandInvocation(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::ServiceEntryPointCommon::handleRequest(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::handleRequest(mongo::OperationContext*,,mongo::ServiceStateMachine::_processMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,mongo::transport::ServiceExecutorSynchronous::schedule(std::function<void,mongo::ServiceStateMachine::_scheduleNextWithGuard(mongo::ServiceStateMachine::ThreadGuard,,mongo::ServiceStateMachine::_sourceCallback(mongo::Status),auto,mongo::ServiceStateMachine::_sourceMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,std::_Function_handler<void,std::_Function_handler<void,mongo::(anonymous,start_thread,clone
            2 pthread_cond_wait@@GLIBC_2.3.2,std::condition_variable::wait(std::unique_lock<std::mutex>&),mongo::Interruptible::NotInterruptible::waitForConditionOrInterruptNoAssertUntil(mongo::stdx::condition_variable&,,bool,mongo::WiredTigerKVEngine::waitForJournalFlush(mongo::OperationContext*),mongo::waitForWriteConcern(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::Hooks::waitForWriteConcern(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::Hooks::waitForWriteConcern(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::ServiceEntryPointCommon::handleRequest(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::handleRequest(mongo::OperationContext*,,mongo::ServiceStateMachine::_processMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,mongo::transport::ServiceExecutorSynchronous::schedule(std::function<void,mongo::ServiceStateMachine::_scheduleNextWithGuard(mongo::ServiceStateMachine::ThreadGuard,,mongo::ServiceStateMachine::_sourceCallback(mongo::Status),auto,mongo::ServiceStateMachine::_sourceMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,std::_Function_handler<void,std::_Function_handler<void,mongo::(anonymous,start_thread,clone
            2 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cache_eviction_worker,__wt_btcur_search_near,__curfile_search_near,int,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::AbstractIndexAccessMethod::findSingle(mongo::OperationContext*,,mongo::IDHackStage::doWork(unsigned,mongo::PlanStage::work(unsigned,mongo::UpdateStage::doWork(unsigned,mongo::UpsertStage::doWork(unsigned,mongo::PlanStage::work(unsigned,mongo::PlanExecutorImpl::_getNextImpl(mongo::Snapshotted<mongo::Document>*,,mongo::PlanExecutorImpl::getNext(mongo::Document*,,mongo::PlanExecutorImpl::executePlan(),mongo::performUpdates(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::CommandHelpers::runCommandInvocation(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::ServiceEntryPointCommon::handleRequest(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::handleRequest(mongo::OperationContext*,,mongo::ServiceStateMachine::_processMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,mongo::transport::ServiceExecutorSynchronous::schedule(std::function<void,mongo::ServiceStateMachine::_scheduleNextWithGuard(mongo::ServiceStateMachine::ThreadGuard,,mongo::ServiceStateMachine::_sourceCallback(mongo::Status),auto,mongo::ServiceStateMachine::_sourceMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,std::_Function_handler<void,std::_Function_handler<void,mongo::(anonymous,start_thread,clone
            1 select,__wt_sleep,__wt_gen_drain,__wt_sync_file,__checkpoint_tree.constprop.11,__txn_checkpoint_wrapper,__wt_txn_checkpoint,__session_checkpoint.cold.49,mongo::WiredTigerKVEngine::WiredTigerCheckpointThread::run(),mongo::BackgroundJob::jobBody(),std::thread::_State_impl<std::thread::_Invoker<std::tuple<mongo::stdx::thread::thread<mongo::BackgroundJob::go()::{lambda()#2},,execute_native_thread_routine,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cond_auto_wait_signal,__wt_cond_auto_wait,__wt_evict_thread_run,__thread_run,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cond_auto_wait_signal,__wt_cond_auto_wait,__log_wrlsn_server,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cache_eviction_worker,__wt_page_in_func,__wt_row_search,__wt_btcur_search,__curfile_search,int,mongo::WiredTigerRecordStoreCursorBase::seekExact(mongo::RecordId,mongo::WorkingSetCommon::fetch(mongo::OperationContext*,,mongo::IDHackStage::doWork(unsigned,mongo::PlanStage::work(unsigned,mongo::PlanExecutorImpl::_getNextImpl(mongo::Snapshotted<mongo::Document>*,,mongo::PlanExecutorImpl::getNext(mongo::Document*,,mongo::(anonymous,mongo::CommandHelpers::runCommandInvocation(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::ServiceEntryPointCommon::handleRequest(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::handleRequest(mongo::OperationContext*,,mongo::ServiceStateMachine::_processMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,mongo::transport::ServiceExecutorSynchronous::schedule(std::function<void,mongo::ServiceStateMachine::_scheduleNextWithGuard(mongo::ServiceStateMachine::ThreadGuard,,mongo::ServiceStateMachine::_sourceCallback(mongo::Status),auto,mongo::ServiceStateMachine::_sourceMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,std::_Function_handler<void,std::_Function_handler<void,mongo::(anonymous,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cache_eviction_worker,__wt_btcur_prev,__curfile_prev,mongo::WiredTigerRecordStoreCursorBase::next(),mongo::WiredTigerOplogManager::waitForAllEarlierOplogWritesToBeVisible(mongo::WiredTigerRecordStore,mongo::repl::StorageInterfaceImpl::waitForAllEarlierOplogWritesToBeVisible(mongo::OperationContext*,,mongo::repl::ReplicationCoordinatorImpl::_waitUntilOpTime(mongo::OperationContext*,,mongo::repl::ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead(mongo::OperationContext*,,mongo::repl::ReplicationCoordinatorImpl::waitUntilOpTimeForReadUntil(mongo::OperationContext*,,mongo::repl::ReplicationCoordinatorImpl::waitUntilOpTimeForRead(mongo::OperationContext*,,mongo::(anonymous,mongo::waitForReadConcern(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::Hooks::waitForReadConcern(mongo::OperationContext*,,mongo::(anonymous,mongo::(anonymous,mongo::ServiceEntryPointCommon::handleRequest(mongo::OperationContext*,,mongo::ServiceEntryPointMongod::handleRequest(mongo::OperationContext*,,mongo::ServiceStateMachine::_processMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,mongo::transport::ServiceExecutorSynchronous::schedule(std::function<void,mongo::ServiceStateMachine::_scheduleNextWithGuard(mongo::ServiceStateMachine::ThreadGuard,,mongo::ServiceStateMachine::_sourceCallback(mongo::Status),auto,mongo::ServiceStateMachine::_sourceMessage(mongo::ServiceStateMachine::ThreadGuard),mongo::ServiceStateMachine::_runNextInGuard(mongo::ServiceStateMachine::ThreadGuard),std::_Function_handler<void,std::_Function_handler<void,std::_Function_handler<void,mongo::(anonymous,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait_signal,__wt_cache_eviction_worker,__session_begin_transaction,mongo::WiredTigerBeginTxnBlock::WiredTigerBeginTxnBlock(__wt_session*,,mongo::WiredTigerRecoveryUnit::_txnOpen(),mongo::WiredTigerRecoveryUnit::getSession(),mongo::repl::LocalOplogInfo::getNextOpTimes(mongo::OperationContext*,,mongo::repl::logOp(mongo::OperationContext*,,mongo::(anonymous,mongo::OpObserverImpl::onInternalOpMessage(mongo::OperationContext*,,mongo::OpObserverRegistry::onInternalOpMessage(mongo::OperationContext*,,mongo::repl::NoopWriter::_writeNoop(mongo::OperationContext*)::{lambda()#5}::operator()(),mongo::repl::NoopWriter::_writeNoop(mongo::OperationContext*),mongo::repl::NoopWriter::PeriodicNoopRunner::run(mongo::Duration<std::ratio<1l,,std::thread::_State_impl<std::thread::_Invoker<std::tuple<mongo::stdx::thread::thread<mongo::repl::NoopWriter::PeriodicNoopRunner::PeriodicNoopRunner(mongo::Duration<std::ratio<1l,,execute_native_thread_routine,start_thread,clone
      

            Assignee:
            keith.bostic@mongodb.com Keith Bostic (Inactive)
            Reporter:
            mark.callaghan@mongodb.com Mark Callaghan (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

              Created:
              Updated:
              Resolved: