Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-4781

replica set initial sync failure when update cannot be applied to a future version of an object received via clone

    XMLWordPrintable

Details

    • Bug
    • Status: Closed
    • Major - P3
    • Resolution: Fixed
    • None
    • 2.2.1, 2.3.0
    • Replication
    • None
    • ALL

    Description

      Some modifier operations may uassert when they cannot be applied to a selected document. It's possible for an update to uassert when applied to a future version of a document during the initial sync phase, and this aborts the initial sync. It appears that the initial sync is then retried but if new, bad updates arrive I imagine the initial sync failure could keep repeating

      Here is a test I wrote for this. The first attempt at initial sync fails, but the test stops sending updates so when the initial sync is retried. As a result, the current test passes. So it's just good as a demonstration right now.

      // Test update modifier uassert during initial sync. SERVER-4781
       
      load("jstests/replsets/rslib.js");
      basename = "jstests_initsync4";
       
      print("1. Bring up set");
      replTest = new ReplSetTest( {name: basename, nodes: 1} );
      replTest.startSet();
      replTest.initiate();
       
      m = replTest.getMaster();
      md = m.getDB("d");
      mc = m.getDB("d")["c"];
       
      print("2. Insert some data");
      N = 500000;
      for( i = 0; i < N; ++i ) {
          mc.save( {_id:i,a:{}} );
      }
      md.getLastError();
       
      print("3. Make sure synced");
      replTest.awaitReplication();
       
      print("4. Bring up a new node");
      ports = allocatePorts( 3 );
      basePath = "/data/db/" + basename;
      hostname = getHostName();
       
      s = startMongodTest (ports[2], basename, false, {replSet : basename, oplogSize : 2} );
       
      var config = replTest.getReplSetConfig();
      config.version = 2;
      config.members.push({_id:2, host:hostname+":"+ports[2]});
      try {
          m.getDB("admin").runCommand({replSetReconfig:config});
      }
      catch(e) {
          print(e);
      }
      reconnect(s);
       
      print("5. Wait for new node to start cloning");
       
      s.setSlaveOk();
      sc = s.getDB("d")["c"];
       
      wait( function() { printjson( sc.stats() ); return sc.stats().count > 0; } );
       
      print("6. Start updating documents on primary");
      for( i = N-1; i >= N-10000; --i ) {
          // If the document is cloned as {a:1}, the {$set:{'a.b':1}} modifier will uassert.
          mc.update( {_id:i}, {$set:{'a.b':1}} );
          mc.update( {_id:i}, {$set:{a:1}} );    
      }
       
      print("7. Wait for new node to become SECONDARY");
      wait(function() {
           var status = s.getDB("admin").runCommand({replSetGetStatus:1});
           printjson(status);
           return status.members &&
           (status.members[1].state == 2);
           });

      Here is some relevant logging from the test

       m31002| Wed Jan 25 20:38:29 [rsSync] replSet initial oplog application from Aaron-Staples-MacBook-Pro.local:31000 starting at Jan 25 20:37:31:9f8 to Jan 25 20:38:07:214
       m31002| Wed Jan 25 20:38:29 [rsSync] replSet initial sync failing, error applying oplog : 10145 LEFT_SUBFIELD only supports Object: a not: 1
       m31002| Wed Jan 25 20:38:29 [rsSync] replSet applying oplog from Aaron-Staples-MacBook-Pro.local:31000 failed, trying again
       m31002| Wed Jan 25 20:38:29 [rsSync] replSet not trying to sync from Aaron-Staples-MacBook-Pro.local:31000, it is vetoed for 60 more seconds

      Also I wrote a test for master slave mode and that seems to work ok

      // Test update modifier uassert during initial sync. SERVER-4781
       
      rt = new ReplTest( "repl13tests" );
       
      m = rt.start( true );
      mc = m.getDB( 'd' )[ 'c' ];
       
      for( i = 0; i < 100000; ++i ) {
          mc.save( {_id:i,a:{}} );
      }
      m.getDB( 'd' ).getLastError();
       
      s = rt.start( false );
      sc = s.getDB( 'd' )[ 'c' ];
       
      assert.soon( function() { printjson( sc.count() ); return sc.count() > 0; } );
       
      for( i = 99999; i >= 90000; --i ) {
          mc.update( {_id:i}, {$set:{'a.b':1}} );
          mc.update( {_id:i}, {$set:{a:1}} );    
      }
       
      mc.save( {} )
      assert.soon( function() { return sc.count() == 100001; } );
       
      printjson( sc.findOne( {_id:99999} ) );
      printjson( sc.findOne( {_id:90000} ) );

      I can push the tests when bb code freeze is over.

      Attachments

        Issue Links

          Activity

            People

              alerner Alberto Lerner
              aaron Aaron Staple
              Votes:
              3 Vote for this issue
              Watchers:
              13 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: