Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-71248

Adapt to changed batchSize in getMore

    • Type: Icon: Improvement Improvement
    • Resolution: Won't Do
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • Labels:
      None
    • Query Execution

      With a sharded find, the remote cursors stored on mongos will remember the batchSize from the initial find request and use that in all its subsequent getMores.  This makes the following pseudocode much slower than expected:

      # this find might take about 50ms
      > db.coll.find().batchSize(1000);
      
      > let r = db.coll.find().batchSize(1);
      # this getMore will take about 5 seconds, 100x slower than the original.
      > db.runCommand({getMore: r.cursor.id,
                       collection: "coll",
                       batchSize: 999});

      The batchSize set in the getMore controls the amount of data returned to the client, but it does not control internal cursors, so the pattern above is really inefficient.  This was surprising to me when working on SERVER-71241 and I expect it will surprise many customers.

       


      A more complete, syntactically correct example is here:

      /**
       *  @tags: [
       *   requires_sharding,
       *   requires_getmore,
       *   requires_fcv_62,
       *  ]
       */
      (function() {
      "use strict";function getMillis() {
          const d = new Date();
          return d.getTime();
      }
      function runtimeMillis(f) {
          var start = getMillis();
          f();
          return (getMillis() - start);
      }
      function isError(res) {
          return !res.hasOwnProperty('ok') || !res['ok'];
      }
      const dbName = "test-SERVER-57469";
      const collName = "test-SERVER-57469-coll";
      
      // Set up a 2-shard single-node replicaset cluster.
      const st = new ShardingTest({name: jsTestName(), shards: 2, rs: {nodes: 1}});
      const coll = st.s0.getDB(dbName)[collName];
      assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
      st.ensurePrimaryShard(dbName, st.shard0.name);
      
      // Insert some data spread across the two shards.
      function initDb(numSamples) {
          coll.drop();    // Use ranged sharding with 50% of the value range on the second shard.
          const splitPoint = Math.max(1, numSamples / 2);
          st.shardColl(
              coll,
              {_id: 1},              // shard key
              {_id: splitPoint},     // split point
              {_id: splitPoint + 1}  // move the chunk to the other shard
          );
          let bulk = coll.initializeUnorderedBulkOp();
          for (let i = 0; i < numSamples; i++) {
              bulk.insert({"_id": i});
          }
          assert.commandWorked(bulk.execute());
      }let nDocs = 10000;
      initDb(nDocs);
      
      // Time a find query that returns 10,000 docs in one batch.
      let fullQueryTimeoutMS = runtimeMillis(
          () => assert.commandWorked(coll.runCommand(
              {find: collName, maxTimeMS: 999999, allowPartialResults: true, batchSize: nDocs})));
      print("fullQueryTimeoutMS: " + fullQueryTimeoutMS);
      
      // Now time a getmore that returns 9,999 docs in one batch.  If this is executed after an initial
      // that that has a batchsize of 1, then the getMore will take significantly (eg., 100x) longer than
      // the original 10,000 batch find.
      {
          // Find a small first batch.
          const smallBatchSize = 1;
          let findRes = assert.commandWorked(coll.runCommand({
              find: collName,
              allowPartialResults: false,
              batchSize: smallBatchSize,
              maxTimeMS: 999999
          }));
          // Try to get remaining results with a getMore.
          // Although we set secondBatchSize = 9,999, we'll reuse the same cursor that has batch size = 1
          // and make 9,999 getMore request round trips to the shards.
          const secondBatchSize = nDocs - smallBatchSize;
          const getMoreTimeoutMS = runtimeMillis(
              () => assert.commandWorked(coll.runCommand(
                  {getMore: findRes.cursor.id, collection: collName, batchSize: secondBatchSize})));
          print("getMoreTimeoutMS: " + getMoreTimeoutMS);
          if (getMoreTimeoutMS > 2 * fullQueryTimeoutMS) {
              jsTest.log("Bailing out of getMore test because of it took too long.");
              st.stop();
              return;
          }
      }st.stop();
      }());
      

            Assignee:
            backlog-query-execution [DO NOT USE] Backlog - Query Execution
            Reporter:
            steve.tarzia@mongodb.com Steve Tarzia
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: