Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-1605

underflow cache stat and low throughput

    XMLWordPrintable

    Details

    • Type: Task
    • Status: Closed
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: WT2.5.1
    • Component/s: None
    • Labels:

      Description

      This references https://jira.mongodb.org/browse/SERVER-17067 and I'm able to reproduce it with the current develop, changeset 1a29bbba. See that ticket for the testcase script.

      Symptom is that throughput drops, and %dirty and %used in mongostat values go "very large" (negative) and stay that way.

      insert query update delete getmore command % dirty % used flushes  vsize    res qr|qw ar|aw netIn netOut conn     set repl     time
      [... about 15-20 seconds running well...]
          *0    *0  22357     *0       0   224|0     0.0    0.0       0 892.0M 226.0M   0|0  0|79    3m    48k  102 replset  PRI 16:52:57
          *0    *0  26777     *0       0   269|0     0.1    0.1       0 892.0M 226.0M   0|0  0|76    4m    55k  102 replset  PRI 16:52:58
          *0    *0  21569     *0       0   218|0     0.0    0.0       0 893.0M 226.0M   0|0  0|83    3m    47k  102 replset  PRI 16:52:59
          *0    *0  26320     *0       0   267|0     0.1    0.1       0 894.0M 228.0M   0|0  0|97    4m    54k  102 replset  PRI 16:53:00
      insert query update delete getmore command % dirty % used flushes  vsize    res qr|qw ar|aw netIn netOut conn     set repl     time
          *0    *0  24435     *0       0   244|0     0.1    0.1       0 901.0M 234.0M   0|0  0|77    3m    51k  102 replset  PRI 16:53:01
          *0    *0  20109     *0       0   198|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100    3m    44k  102 replset  PRI 16:53:02
          *0    *0    832     *0       0    11|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100  116k    18k  102 replset  PRI 16:53:03
          *0    *0    899     *0       0    12|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100  125k    18k  102 replset  PRI 16:53:04
          *0    *0    885     *0       0    10|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100  123k    17k  102 replset  PRI 16:53:05
          *0    *0    942     *0       0    10|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100  131k    17k  102 replset  PRI 16:53:06
          *0    *0    961     *0       0     9|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100  133k    17k  102 replset  PRI 16:53:07
          *0    *0   2398     *0       0    23|0 26030104824.2 26030104824.2       1 934.0M 246.0M   0|0  0|75  333k    19k  102 replset  PRI 16:53:08
          *0    *0   4961     *0       0    53|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0  0|99  688k    24k  102 replset  PRI 16:53:09
          *0    *0    667     *0       0    10|0 26030104824.2 26030104824.2       0 934.0M 246.0M   0|0 0|100   93k    17k  102 replset  PRI 16:53:10
      

      I was able to complete running pmp while this was going on:

           60 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__wt_cache_wait,__wt_cache_full_check,__cursor_enter,__curfile_enter,__cursor_func_init,__wt_btcur_search_near,__curfile_search_near,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::BtreeBasedAccessMethod::findSingle,mongo::IDHackStage::work,mongo::UpdateStage::work,mongo::PlanExecutor::getNext,mongo::PlanExecutor::executePlan,mongo::receivedUpdate,mongo::assembleResponse,mongo::MyMessageHandler::process,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
           33 pthread_cond_wait@@GLIBC_2.3.2,wait<boost::unique_lock<boost::recursive_mutex>,take,mongo::threadpool::Worker::loop,boost::(anonymous,start_thread,clone
           24 __lll_lock_wait,_L_cond_lock_817,__pthread_mutex_cond_lock,pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__wt_cache_wait,__wt_cache_full_check,__cursor_enter,__curfile_enter,__cursor_func_init,__wt_btcur_search_near,__curfile_search_near,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::BtreeBasedAccessMethod::findSingle,mongo::IDHackStage::work,mongo::UpdateStage::work,mongo::PlanExecutor::getNext,mongo::PlanExecutor::executePlan,mongo::receivedUpdate,mongo::assembleResponse,mongo::MyMessageHandler::process,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
           11 __lll_lock_wait,_L_lock_816,pthread_mutex_lock,__wt_cond_wait,__wt_cache_wait,__wt_cache_full_check,__cursor_enter,__curfile_enter,__cursor_func_init,__wt_btcur_search_near,__curfile_search_near,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::BtreeBasedAccessMethod::findSingle,mongo::IDHackStage::work,mongo::UpdateStage::work,mongo::PlanExecutor::getNext,mongo::PlanExecutor::executePlan,mongo::receivedUpdate,mongo::assembleResponse,mongo::MyMessageHandler::process,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
            2 recv,mongo::Socket::_recv,mongo::Socket::unsafe_recv,mongo::Socket::recv,mongo::MessagingPort::recv,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
            2 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__wt_cache_wait,__wt_cache_full_check,__wt_page_in_func,__wt_page_swap_func,__wt_row_search,__cursor_row_search,__wt_btcur_search_near,__curfile_search_near,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::BtreeBasedAccessMethod::findSingle,mongo::IDHackStage::work,mongo::UpdateStage::work,mongo::PlanExecutor::getNext,mongo::PlanExecutor::executePlan,mongo::receivedUpdate,mongo::assembleResponse,mongo::MyMessageHandler::process,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
            2 __lll_lock_wait,pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__wt_cache_wait,__wt_cache_full_check,__cursor_enter,__curfile_enter,__cursor_func_init,__wt_btcur_search_near,__curfile_search_near,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::BtreeBasedAccessMethod::findSingle,mongo::IDHackStage::work,mongo::UpdateStage::work,mongo::PlanExecutor::getNext,mongo::PlanExecutor::executePlan,mongo::receivedUpdate,mongo::assembleResponse,mongo::MyMessageHandler::process,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
            2 
            1 select,mongo::Listener::initAndListen,_initAndListen,mongo::initAndListen,mongoDbMain,main
            1 pthread_cond_wait@@GLIBC_2.3.2,wait<boost::unique_lock<boost::timed_mutex>
            1 pthread_cond_wait@@GLIBC_2.3.2,wait<boost::unique_lock<boost::mutex>,mongo::repl::SyncSourceFeedback::run,boost::(anonymous,start_thread,clone
            1 pthread_cond_wait@@GLIBC_2.3.2,boost::condition_variable::wait,mongo::repl::NetworkInterfaceImpl::waitForWork,mongo::repl::ReplicationExecutor::getWork,mongo::repl::ReplicationExecutor::run,boost::(anonymous,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__sweep_server,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__log_server,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__log_close_server,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__ckpt_server,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,timed_wait<boost::unique_lock<boost::timed_mutex>,timed_wait<boost::unique_lock<boost::timed_mutex>,,mongo::RangeDeleter::doWork,boost::(anonymous,start_thread,clone
            1 pthread_cond_timedwait@@GLIBC_2.3.2,timed_wait<boost::unique_lock<boost::timed_mutex>,timed_wait<boost::unique_lock<boost::timed_mutex>,mongo::BlockingQueue<mongo::BSONObj>::blockingPeek,mongo::repl::BackgroundSync::waitForMore,mongo::repl::SyncTail::tryPopAndWaitForMore,mongo::repl::SyncTail::oplogApplication
            1 pthread_cond_timedwait@@GLIBC_2.3.2,boost::condition_variable::timed_wait,timed_wait<boost::posix_time::seconds>,mongo::repl::NetworkInterfaceImpl::_consumeNetworkRequests
            1 pthread_cond_timedwait@@GLIBC_2.3.2,boost::condition_variable_any::timed_wait<boost::unique_lock<boost::timed_mutex>,timed_wait<boost::unique_lock<boost::timed_mutex>,,timed_wait<boost::unique_lock<boost::timed_mutex>,,mongo::(anonymous,mongo::BackgroundJob::jobBody,boost::(anonymous,start_thread,clone
            1 nanosleep,mongo::sleepsecs,mongo::TTLMonitor::run,mongo::BackgroundJob::jobBody,boost::(anonymous,start_thread,clone
            1 nanosleep,mongo::sleepsecs,mongo::ClientCursorMonitor::run,mongo::BackgroundJob::jobBody,boost::(anonymous,start_thread,clone
            1 nanosleep,mongo::sleepsecs
            1 nanosleep,mongo::sleepmicros,mongo::sleepmillis
            1 mongo::repl::runSyncThread,boost::(anonymous,start_thread,clone
            1 mongo::repl::NetworkInterfaceImpl::_requestProcessorThreadBody,boost::(anonymous,start_thread,clone
            1 mongo::repl::BackgroundSync::producerThread,boost::(anonymous,start_thread,clone
            1 mongo::repl::BackgroundSync::_producerThread
            1 mongo::DeadlineMonitor<mongo::V8Scope>::deadlineMonitorThread,boost::(anonymous,start_thread,clone
            1 mongo::(anonymous,mongo::BackgroundJob::jobBody,boost::(anonymous,start_thread,clone
            1 mongo::(anonymous,boost::(anonymous,start_thread,clone
            1 __lll_unlock_wake,_L_unlock_591,__pthread_mutex_unlock_usercnt,pthread_cond_timedwait@@GLIBC_2.3.2,__wt_cond_wait,__wt_cache_wait,__wt_cache_full_check,__cursor_enter,__curfile_enter,__cursor_func_init,__wt_btcur_search_near,__curfile_search_near,mongo::(anonymous,mongo::(anonymous,mongo::(anonymous,mongo::BtreeBasedAccessMethod::findSingle,mongo::IDHackStage::work,mongo::UpdateStage::work,mongo::PlanExecutor::getNext,mongo::PlanExecutor::executePlan,mongo::receivedUpdate,mongo::assembleResponse,mongo::MyMessageHandler::process,mongo::PortMessageServer::handleIncomingMsg,start_thread,clone
            1 __lll_lock_wait,_L_lock_816,pthread_mutex_lock,__wt_cond_signal,__evict_lru_walk,__evict_pass,__evict_server,start_thread,clone
            1 __evict_worker,start_thread,clone
            1 __evict_lru_pages,__evict_worker,start_thread,clone
            1 __evict_get_ref,__wt_evict_lru_page,__evict_lru_pages,__evict_worker,start_thread,clone
            1 do_sigwait,sigwait
      

      This only reproduces when replication is turned on. Always runs well without replication.

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                alexander.gorrod Alexander Gorrod
                Reporter:
                sue.loverso Susan LoVerso
              • Votes:
                0 Vote for this issue
                Watchers:
                1 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: