Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-39753

getMore commands on aggregate cursors with "majority" read concern may return uncommitted data on secondaries

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 4.0.6, 4.1.8
    • Component/s: Querying, Replication
    • Labels:
      None
    • ALL
    • Hide
      /**
       * Test if 'getMore' on an aggregate cursor returns uncommitted data.
       */
      (function() {
          "use strict";
      
          load("jstests/libs/write_concern_util.js");  // for [stop|restart]ServerReplication.
      
          const name = "test";
          const replTest = new ReplSetTest({name: name, nodes: 4});
          replTest.startSet();
          replTest.initiate();
      
          const dbName = name;
          const collName = "test";
      
          const primary = replTest.getPrimary();
          let secondaries = replTest.getSecondaries();
          assert.eq(secondaries.length, 3);
          const primaryDb = primary.getDB(dbName);
          const secondaryDb = secondaries[0].getDB(dbName);
          const primaryColl = primaryDb[collName];
      
          // Do some initial writes on primary and commit them.
          assert.commandWorked(
              primaryColl.insert([{_id: 1}, {_id: 2}, {_id: 3}], {writeConcern: {w: 4}}));
          replTest.awaitLastOpCommitted();
      
          // Pause replication on two secondaries.
          jsTestLog("Stopping replication on 2 secondaries.");
          stopServerReplication(secondaries[1]);
          stopServerReplication(secondaries[2]);
      
          jsTestLog("Doing some writes on primary.");
          // Do a write and replicate to 1 secondary. This write is not majority committed yet.
          assert.commandWorked(
              primaryColl.insert([{_id: 4}, {_id: 5}, {_id: 6}], {writeConcern: {w: 2}}));
      
          jsTestLog("Doing reads on secondary.");
      
          // A majority 'find' query. This should only return doc ids {1,2,3}.
          let findDocs = secondaryDb[collName].find().readConcern("majority").toArray();
          assert.sameMembers(findDocs, [{_id: 1}, {_id: 2}, {_id: 3}]);
      
          // Do a majority 'getMore' on aggregate cursor.This should only return doc ids {1,2,3}, since
          // docs {4,5,6} are not majority committed yet.
          let res = secondaryDb.runCommand({
              aggregate: collName,
              pipeline: [],
              readConcern: {level: "majority"},
              cursor: {batchSize: 0}
          });
          let cursorId = res.cursor.id;
          res = secondaryDb.runCommand({getMore: cursorId, collection: collName});
          assert.sameMembers(res.cursor.nextBatch, [{_id: 1}, {_id: 2}, {_id: 3}]);
      
          // Restart replication to let test complete.
          restartServerReplication(secondaries[1]);
          restartServerReplication(secondaries[2]);
      
          replTest.stopSet();
      })();
      
      
      Show
      /** * Test if 'getMore' on an aggregate cursor returns uncommitted data. */ ( function () { "use strict" ; load( "jstests/libs/write_concern_util.js" ); // for [stop|restart]ServerReplication. const name = "test" ; const replTest = new ReplSetTest({name: name, nodes: 4}); replTest.startSet(); replTest.initiate(); const dbName = name; const collName = "test" ; const primary = replTest.getPrimary(); let secondaries = replTest.getSecondaries(); assert.eq(secondaries.length, 3); const primaryDb = primary.getDB(dbName); const secondaryDb = secondaries[0].getDB(dbName); const primaryColl = primaryDb[collName]; // Do some initial writes on primary and commit them. assert.commandWorked( primaryColl.insert([{_id: 1}, {_id: 2}, {_id: 3}], {writeConcern: {w: 4}})); replTest.awaitLastOpCommitted(); // Pause replication on two secondaries. jsTestLog( "Stopping replication on 2 secondaries." ); stopServerReplication(secondaries[1]); stopServerReplication(secondaries[2]); jsTestLog( "Doing some writes on primary." ); // Do a write and replicate to 1 secondary. This write is not majority committed yet. assert.commandWorked( primaryColl.insert([{_id: 4}, {_id: 5}, {_id: 6}], {writeConcern: {w: 2}})); jsTestLog( "Doing reads on secondary." ); // A majority 'find' query. This should only return doc ids {1,2,3}. let findDocs = secondaryDb[collName].find().readConcern( "majority" ).toArray(); assert.sameMembers(findDocs, [{_id: 1}, {_id: 2}, {_id: 3}]); // Do a majority 'getMore' on aggregate cursor.This should only return doc ids {1,2,3}, since // docs {4,5,6} are not majority committed yet. let res = secondaryDb.runCommand({ aggregate: collName, pipeline: [], readConcern: {level: "majority" }, cursor: {batchSize: 0} }); let cursorId = res.cursor.id; res = secondaryDb.runCommand({getMore: cursorId, collection: collName}); assert.sameMembers(res.cursor.nextBatch, [{_id: 1}, {_id: 2}, {_id: 3}]); // Restart replication to let test complete. restartServerReplication(secondaries[1]); restartServerReplication(secondaries[2]); replTest.stopSet(); })();
    • Query 2019-04-08

      When running a getMore command against a cursor that was originally created by an aggregate command with read concern "majority", we may return data that is not majority committed if the command is run against a replica set secondary. The read concern for a getMore command is not stored directly on its OperationContext. It instead inherits its read concern from the cursor object, and we will set the timestamp read source to kMajorityCommitted for getMore commands here. This happens before we generate the getMore batch, however. For aggregate commands, locks will only be taken once we enter the batch generation phase, and, for reads, these locks will be taken inside the AutoGetCollectionForRead constructor. The AutoGetCollectionForRead constructor determines if a command should read at the lastApplied timestamp based on the read concern on the OperationContext. For a getMore command, there shouldn't be a read concern on the OperationContext since it is inherited from the cursor. This means that we will decide to read at the lastApplied timestamp on secondaries, overriding the majority committed read source we previously set.

      This has implications for getMore commands run on standard aggregate query cursors and also for change streams, since they create cursors via the aggregate command i.e. it is possible for a change stream run against a secondary to return data that are not majority committed. I believe this issue goes back to 4.0, since that was when the logic around reading from the lastApplied timestamp on secondaries was added. The attached repro shows the same issue when run against a 4.0 server.

            Assignee:
            bernard.gorman@mongodb.com Bernard Gorman
            Reporter:
            william.schultz@mongodb.com William Schultz (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            9 Start watching this issue

              Created:
              Updated:
              Resolved: