OplogReplay query might need to scan extra oplog due to oplog visibility rules

XMLWordPrintableJSON

    • Type: Bug
    • Resolution: Works as Designed
    • Priority: Major - P3
    • None
    • Affects Version/s: 3.6.8, 4.0.2, 4.1.3
    • Component/s: Querying
    • None
    • Storage Execution
    • ALL
    • Hide

      When the collection has not been read from, we use a collection scan:

      (function() {
          load("jstests/libs/analyze_plan.js");
      
          let t = db.getSiblingDB("local").oplog.jstests_query_oplogreplay;
          assert.commandWorked(t.getDB().createCollection(t.getName(), {capped: true, size: 16 * 1024}));
      
          for (let i = 1; i <= 10; i++) {
              assert.writeOK(t.insert({_id: i, ts: Timestamp(1000, i)}));
          }
      
          let res =
              assert.commandWorked(t.find({ts: {$gte: Timestamp(1000, 10), $lte: Timestamp(1000, 10)}})
                                       .addOption(DBQuery.Option.oplogReplay)
                                       .explain("executionStats"));
          assert.eq(res.executionStats.totalDocsExamined, 10, tojson(res));
      }());
      

      When the collection has been read from, we use the oplog start hack and only examine 1 document:

      (function() {
          load("jstests/libs/analyze_plan.js");
      
          let t = db.getSiblingDB("local").oplog.jstests_query_oplogreplay;
          assert.commandWorked(t.getDB().createCollection(t.getName(), {capped: true, size: 16 * 1024}));
      
          for (let i = 1; i <= 10; i++) {
              assert.writeOK(t.insert({_id: i, ts: Timestamp(1000, i)}));
          }
          t.find().itcount();
          let res =
              assert.commandWorked(t.find({ts: {$gte: Timestamp(1000, 10), $lte: Timestamp(1000, 10)}})
                                       .addOption(DBQuery.Option.oplogReplay)
                                       .explain("executionStats"));
          assert.eq(res.executionStats.totalDocsExamined, 1, tojson(res));
      }());
      
      Show
      When the collection has not been read from, we use a collection scan: (function() { load( "jstests/libs/analyze_plan.js" ); let t = db.getSiblingDB( "local" ).oplog.jstests_query_oplogreplay; assert .commandWorked(t.getDB().createCollection(t.getName(), {capped: true , size: 16 * 1024})); for (let i = 1; i <= 10; i++) { assert .writeOK(t.insert({_id: i, ts: Timestamp(1000, i)})); } let res = assert .commandWorked(t.find({ts: {$gte: Timestamp(1000, 10), $lte: Timestamp(1000, 10)}}) .addOption(DBQuery.Option.oplogReplay) .explain( "executionStats" )); assert .eq(res.executionStats.totalDocsExamined, 10, tojson(res)); }()); When the collection has been read from, we use the oplog start hack and only examine 1 document: (function() { load( "jstests/libs/analyze_plan.js" ); let t = db.getSiblingDB( "local" ).oplog.jstests_query_oplogreplay; assert .commandWorked(t.getDB().createCollection(t.getName(), {capped: true , size: 16 * 1024})); for (let i = 1; i <= 10; i++) { assert .writeOK(t.insert({_id: i, ts: Timestamp(1000, i)})); } t.find().itcount(); let res = assert .commandWorked(t.find({ts: {$gte: Timestamp(1000, 10), $lte: Timestamp(1000, 10)}}) .addOption(DBQuery.Option.oplogReplay) .explain( "executionStats" )); assert .eq(res.executionStats.totalDocsExamined, 1, tojson(res)); }());
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      An OplogReplay query may perform a full collection scan if the collection has not been read from, but utilize the oplog start hack if the collection has been read from. This issue was introduced in 3.6 (it does not exist in 3.4). However, it does not appear to have been introduced by SERVER-29843, since the issue does not present at that time.

            Assignee:
            [DO NOT USE] Backlog - Storage Execution Team
            Reporter:
            Tess Avitabile (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

              Created:
              Updated:
              Resolved: