Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-21726

WiredTiger primaries replicate no-op update, leading to out-of-sync secondaries and data loss

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Critical - P2 Critical - P2
    • 3.2.0-rc6
    • Affects Version/s: 3.1.2
    • Component/s: Replication
    • None
    • Minor Change
    • ALL
    • Hide

      python buildscripts/resmoke.py --storageEngine=wiredTiger --executor=no_server repro.js

      'use strict';
      
      var rst = new ReplSetTest({nodes: 2});
      rst.startSet();
      rst.initiate();
      
      var primaryColl = rst.getPrimary().getDB('test').foo;
      var oplog = rst.getPrimary().getDB('local').oplog.rs;
      var secondaryColl = rst.getSecondary().getDB('test').foo;
      
      // Note: An array element is required to repro the bug.
      var res = primaryColl.insert({a: [1]});
      assert.writeOK(res);
      
      res = primaryColl.update({}, {$pushAll: {a: []}});
      assert.writeOK(res);
      
      // nModified is 1 here
      printjson(res);
      
      rst.awaitReplication();
      
      print('oplog ' + tojson(oplog.find().toArray()));
      print('Primary doc ' + tojson(primaryColl.find().toArray()));
      print('Secondary doc ' + tojson(secondaryColl.find().toArray()));
      
      assert.eq(primaryColl.find().next(), secondaryColl.find().next());
      
      Show
      python buildscripts/resmoke.py --storageEngine=wiredTiger --executor=no_server repro.js 'use strict'; var rst = new ReplSetTest({nodes: 2}); rst.startSet(); rst.initiate(); var primaryColl = rst.getPrimary().getDB('test').foo; var oplog = rst.getPrimary().getDB('local').oplog.rs; var secondaryColl = rst.getSecondary().getDB('test').foo; // Note: An array element is required to repro the bug. var res = primaryColl.insert({a: [1]}); assert.writeOK(res); res = primaryColl.update({}, {$pushAll: {a: []}}); assert.writeOK(res); // nModified is 1 here printjson(res); rst.awaitReplication(); print('oplog ' + tojson(oplog.find().toArray())); print('Primary doc ' + tojson(primaryColl.find().toArray())); print('Secondary doc ' + tojson(secondaryColl.find().toArray())); assert.eq(primaryColl.find().next(), secondaryColl.find().next());
    • Repl D (12/11/15)
    • 0

      This bug affects versions 3.1.2+ with WiredTiger as the storage engine. It doesn't affect 3.0.7.

      Here's the test output from the repro script:

      // The primary and secondary are synchronized up until this point.
      
      // A no-op update is issued, but an op is still logged in the oplog.
      // This leads to the secondary getting out of sync with the primary
      // (and is a form of data loss).
      > res = primaryColl.update({}, {$pushAll: {a: []}});
      > assert.writeOK(res);
      
      res => { "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 }
      
      oplog [
        {
            "ts" : Timestamp(1449024242, 1),
            "h" : NumberLong("-3274359779073625624"),
            "v" : 2,
            "op" : "n",
            "ns" : "",
            "o" : {
                "msg" : "initiating set"
            }
        },
        {
            "ts" : Timestamp(1449024254, 1),
            "t" : NumberLong(1),
            "h" : NumberLong("-7260876401968846914"),
            "v" : 2,
            "op" : "n",
            "ns" : "",
            "o" : {
                "msg" : "new primary"
            }
        },
        {
            "ts" : Timestamp(1449024254, 2),
            "t" : NumberLong(1),
            "h" : NumberLong("9051320871864503306"),
            "v" : 2,
            "op" : "c",
            "ns" : "test.$cmd",
            "o" : {
                "create" : "foo"
            }
        },
        {
            "ts" : Timestamp(1449024254, 3),
            "t" : NumberLong(1),
            "h" : NumberLong("-455571380945262558"),
            "v" : 2,
            "op" : "i",
            "ns" : "test.foo",
            "o" : {
                "_id" : ObjectId("565e5afe7750e3087f8ef04f"),
                "a" : [
                    1
                ]
            }
        },
        {
            "ts" : Timestamp(1449024254, 4),
            "t" : NumberLong(1),
            "h" : NumberLong("-4618385206475519820"),
            "v" : 2,
            "op" : "u",
            "ns" : "test.foo",
            "o2" : {
                "_id" : ObjectId("565e5afe7750e3087f8ef04f")
            },
            "o" : {
      
            }
        }
      ]
      Primary doc [ { "_id" : ObjectId("565e5afe7750e3087f8ef04f"), "a" : [ 1 ] } ]
      Secondary doc [ { "_id" : ObjectId("565e5afe7750e3087f8ef04f") } ]
      

            Assignee:
            milkie@mongodb.com Eric Milkie
            Reporter:
            kamran.khan Kamran K.
            Votes:
            0 Vote for this issue
            Watchers:
            15 Start watching this issue

              Created:
              Updated:
              Resolved: