Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-19466

logOp fassert when primary steps down during a TTL delete

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Fixed
    • Affects Version/s: 3.1.2
    • Fix Version/s: 3.1.7
    • Component/s: Replication
    • Labels:
    • Backwards Compatibility:
      Fully Compatible
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

      'use strict';
       
      load('jstests/libs/parallelTester.js');
       
      function stepDownWorkload() {
          var conn = new Mongo();
       
          while (true) {
              try {
                  conn.getDB('admin').runCommand({replSetStepDown: 1, force: true});
              } catch (e) {
                  // ignore
              }
              sleep(1000);
          }
      }
       
      function insertWorkload() {
          var conn = new Mongo();
       
          while (true) {
              try {
                  var testDB = conn.getDB('test');
       
                  var bulk = testDB.foo.initializeUnorderedBulkOp();
                  for (var i = 0; i < 1500; i++) {
                      bulk.insert({a: new ISODate()});
                  }
                  bulk.execute();
              } catch (e) {
                  // ignore
              }
          }
      }
       
      // Set up the replica set
      var rs = new ReplSetTest({nodes: 3, startPort: 27017});
      rs.startSet();
      rs.initiate();
      rs.awaitSecondaryNodes();
       
      // Increase the frequency of TTL deletes
      var adminDB = rs.getPrimary().getDB('admin');
      assert.commandWorked(adminDB.runCommand({setParameter: 1, ttlMonitorSleepSecs: 1}));
       
      // Add a TTL index
      var testDB = rs.getPrimary().getDB('test');
      assert.commandWorked(testDB.foo.ensureIndex({a: 1}, {expireAfterSeconds: 1}));
       
      // Increase the frequency of yielding
      assert.commandWorked(adminDB.runCommand({configureFailPoint: 'recordNeedsFetchFail', mode: 'alwaysOn'}));
      assert.commandWorked(adminDB.runCommand({setParameter: 1, internalQueryExecYieldIterations: 1}));
      assert.commandWorked(adminDB.runCommand({setParameter: 1, internalQueryExecYieldPeriodMS: 1}));
       
      // Start the workload threads
      var threads = [];
       
      for (var i = 0; i < 10; i++) {
          var t;
       
          if (i === 0) {
              t = new ScopedThread(stepDownWorkload);
          } else {
              t = new ScopedThread(insertWorkload);
          }
       
          threads.push(t);
          t.start();
      }
       
      threads.forEach(function(t) {
          t.join();
      });
      

      Show
      'use strict';   load('jstests/libs/parallelTester.js');   function stepDownWorkload() { var conn = new Mongo();   while (true) { try { conn.getDB('admin').runCommand({replSetStepDown: 1, force: true}); } catch (e) { // ignore } sleep(1000); } }   function insertWorkload() { var conn = new Mongo();   while (true) { try { var testDB = conn.getDB('test');   var bulk = testDB.foo.initializeUnorderedBulkOp(); for (var i = 0; i < 1500; i++) { bulk.insert({a: new ISODate()}); } bulk.execute(); } catch (e) { // ignore } } }   // Set up the replica set var rs = new ReplSetTest({nodes: 3, startPort: 27017}); rs.startSet(); rs.initiate(); rs.awaitSecondaryNodes();   // Increase the frequency of TTL deletes var adminDB = rs.getPrimary().getDB('admin'); assert.commandWorked(adminDB.runCommand({setParameter: 1, ttlMonitorSleepSecs: 1}));   // Add a TTL index var testDB = rs.getPrimary().getDB('test'); assert.commandWorked(testDB.foo.ensureIndex({a: 1}, {expireAfterSeconds: 1}));   // Increase the frequency of yielding assert.commandWorked(adminDB.runCommand({configureFailPoint: 'recordNeedsFetchFail', mode: 'alwaysOn'})); assert.commandWorked(adminDB.runCommand({setParameter: 1, internalQueryExecYieldIterations: 1})); assert.commandWorked(adminDB.runCommand({setParameter: 1, internalQueryExecYieldPeriodMS: 1}));   // Start the workload threads var threads = [];   for (var i = 0; i < 10; i++) { var t;   if (i === 0) { t = new ScopedThread(stepDownWorkload); } else { t = new ScopedThread(insertWorkload); }   threads.push(t); t.start(); }   threads.forEach(function(t) { t.join(); });
    • Sprint:
      QuInt 8 08/28/15

      Description

      It's possible for a primary to step down while the TTL monitor is deleting documents. This leads to a fatal assertion on the node that's stepping down.

      This bug affects versions 3.1.2+. It does not seem to affect 3.0.5-rc0.

      [TTLMonitor] logOp() but can't accept write to collection test.foo
      [TTLMonitor] Fatal Assertion 17405
       
      * thread #18: tid = 0xccace, 0x00007fff899d96e7 libsystem_c.dylib`sprintf_l + 1
        * frame #0: 0x00007fff899d96e7 libsystem_c.dylib`sprintf_l + 1
          frame #1: 0x00007fff899ee456 libsystem_c.dylib`_yconv + 252
          frame #2: 0x00007fff899edfe4 libsystem_c.dylib`_st_fmt + 3243
          frame #3: 0x00007fff899ed263 libsystem_c.dylib`strftime_l + 139
          frame #4: 0x0000000108763248 mongod`mongo::(anonymous namespace)::_dateToISOString(date=(millis = 1437101989000), local=true, result=0x000000010c9a3ee0)::DateStringBuffer*) + 152 at time_support.cpp:210
          frame #5: 0x00000001087636bd mongod`mongo::outputDateAsISOStringLocal(os=0x00007fff7bcdb2f8, date=(millis = 1437101989000)) + 61 at time_support.cpp:296
          frame #6: 0x00000001083968a4 mongod`mongo::logger::MessageEventDetailsEncoder::encode(this=0x00007ffa40521290, event=0x000000010c9a4520, os=0x00007fff7bcdb2f8) + 84 at message_event_utf8_encoder.cpp:54
          frame #7: 0x0000000107b47aa6 mongod`mongo::logger::ConsoleAppender<mongo::logger::MessageEventEphemeral, mongo::Console>::append(this=0x00007ffa40524960, event=0x000000010c9a4520) + 198 at console_appender.h:53
          frame #8: 0x000000010839768e mongod`mongo::logger::LogDomain<mongo::logger::MessageEventEphemeral>::append(this=0x00007ffa40522468, event=0x000000010c9a4520) + 574 at log_domain-impl.h:59
          frame #9: 0x000000010839388f mongod`mongo::logger::LogstreamBuilder::~LogstreamBuilder(this=0x000000010c9a4a28) + 1359 at logstream_builder.cpp:126
          frame #10: 0x0000000108394245 mongod`mongo::logger::LogstreamBuilder::~LogstreamBuilder(this=0x000000010c9a4a28) + 21 at logstream_builder.cpp:119
          frame #11: 0x00000001086874be mongod`mongo::fassertFailed(msgid=17405) + 238 at assert_util.cpp:173
          frame #12: 0x0000000107f7abf5 mongod`mongo::repl::_logOp(txn=0x000000010c9a61e8, opstr=0x00000001092371dc, ns=0x00007ffa40706249, obj=0x000000010c9a53b0, o2=0x0000000000000000, fromMigrate=false) + 917 at oplog.cpp:270
          frame #13: 0x0000000107bee82a mongod`mongo::OpObserver::onDelete(this=0x00007ffa40601520, txn=0x000000010c9a61e8, ns=0x00007ffa40706248, idDoc=0x000000010c9a53b0, fromMigrate=false) + 426 at op_observer.cpp:86
          frame #14: 0x000000010771fa5f mongod`mongo::Collection::deleteDocument(this=0x00007ffa40706230, txn=0x000000010c9a61e8, loc=0x000000010c9a5950, cappedOK=false, noWarn=false, deletedId=0x0000000000000000) + 1151 at collection.cpp:445
          frame #15: 0x000000010835c81b mongod`mongo::TTLMonitor::doTTLForIndex(this=0x00007ffa40603550, txn=0x000000010c9a61e8, dbName=0x000000010c9a6190, idx=BSONObj at 0x000000010c9a6140) + 4523 at ttl.cpp:302
          frame #16: 0x000000010835a654 mongod`mongo::TTLMonitor::doTTLPass(this=0x00007ffa40603550) + 1780 at ttl.cpp:145
          frame #17: 0x0000000108359ca1 mongod`mongo::TTLMonitor::run(this=0x00007ffa40603550) + 1601 at ttl.cpp:113
          frame #18: 0x0000000108690564 mongod`mongo::BackgroundJob::jobBody(this=0x00007ffa40603550) + 1188 at background.cpp:152
          frame #19: 0x0000000108693748 mongod`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> > >(void*) [inlined] decltype(__f=0x00007ffa40603670, __a0=0x00007ffa40603680)).*fp(std::__1::forward<>(fp1))) std::__1::__invoke<void (mongo::BackgroundJob::*&)(), mongo::BackgroundJob*&, void>(void (mongo::BackgroundJob::*&&&)(), mongo::BackgroundJob*&&&) + 134 at __functional_base:380
          frame #20: 0x00000001086936c2 mongod`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> > >(void*) [inlined] std::__1::__bind_return<void (mongo::BackgroundJob::*)(), std::__1::tuple<mongo::BackgroundJob*>, std::__1::tuple<>, _is_valid_bind_return<void (mongo::BackgroundJob::*)(), std::__1::tuple<mongo::BackgroundJob*>, std::__1::tuple<> >::value>::type std::__1::__apply_functor<void (__f=0x00007ffa40603670, __bound_args=0x00007ffa40603680, (null)=__tuple_indices<0> at 0x000000010c9a6ea8, __args=0x000000010c9a6e68)(), std::__1::tuple<mongo::BackgroundJob*>, 0ul, std::__1::tuple<> >(void (mongo::BackgroundJob::*&)(), std::__1::tuple<mongo::BackgroundJob*>&, std::__1::__tuple_indices<0ul>, std::__1::tuple<>&&) + 40 at functional:2023
          frame #21: 0x000000010869369a mongod`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> > >(void*) [inlined] std::__1::__bind_return<void (mongo::BackgroundJob::*)(), std::__1::tuple<mongo::BackgroundJob*>, std::__1::tuple<>, _is_valid_bind_return<void (mongo::BackgroundJob::*)(), std::__1::tuple<mongo::BackgroundJob*>, std::__1::tuple<> >::value>::type std::__1::__bind<void (this=0x00007ffa40603670)(), mongo::BackgroundJob*>::operator()<>() + 38 at functional:2086
          frame #22: 0x0000000108693674 mongod`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> > >(void*) [inlined] decltype(__f=0x00007ffa40603670)(), mongo::BackgroundJob*> >(fp)(std::__1::forward<>(fp0))) std::__1::__invoke<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> >(std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*>&&) + 11 at __functional_base:413
          frame #23: 0x0000000108693669 mongod`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> > >(void*) [inlined] void std::__1::__thread_execute<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> >(__t=0x00007ffa40603670, (null)=__tuple_indices<> at 0x000000010c9a6e40)(), mongo::BackgroundJob*> >&, std::__1::__tuple_indices<>) + 25 at thread:332
          frame #24: 0x0000000108693650 mongod`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::__bind<void (mongo::BackgroundJob::*)(), mongo::BackgroundJob*> > >(__vp=0x00007ffa40603670) + 368 at thread:342
          frame #25: 0x00007fff9919f268 libsystem_pthread.dylib`_pthread_body + 131
          frame #26: 0x00007fff9919f1e5 libsystem_pthread.dylib`_pthread_start + 176
          frame #27: 0x00007fff9919d41d libsystem_pthread.dylib`thread_start + 13
      


      Version: 23963a5d822bfc8a3aa4754c2fe923cdcdaa1956

        Attachments

          Activity

            People

            • Votes:
              0 Vote for this issue
              Watchers:
              7 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: