Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-57221

Inconsistent delete performance on collections with multiple secondary indexes on same key

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Open
    • Priority: Major - P3
    • Resolution: Unresolved
    • Affects Version/s: 4.2.14, 4.4.6, 5.0.0-rc7
    • Fix Version/s: Backlog
    • Component/s: None
    • Labels:
      None
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

      Run the following test:

      /**
       * Reproduces inconsistent delete performance with secondary indexes.
       * Notably, deletes on collections with multiple secondary indexes have significantly
       * worse performance than deletes with one index or deletes that hint a specific index.
       */
      (function() {
       
      const time = (fn, desc) => {
          print("starting " + desc);
          let start = new Date();
          fn();
          let end = new Date();
          print(desc + ": " + (end - start) + "ms");
      };
       
      const key = 'value';
      const bulkLoad = (coll) => {
          coll.drop();
          const DOCS = 10 * 1000;
          for (let i = 0; i < DOCS / 1000; i++) {
              let bulk = coll.initializeUnorderedBulkOp();
              for (let j = 0; j < 1000; j++) {
                  let val = i * 1000 + j;
                  // a is not unique, b is
                  bulk.insert({a: key, b: val, c: val});
              }
              assert.commandWorked(bulk.execute());
          }
          print("inserted " + DOCS);
      };
       
      const testColl = db.coll;
      (function remove1() {
          bulkLoad(testColl);
          assert.commandWorked(testColl.createIndex({a: 1, b: 1}));
       
          time(() => {
              assert.commandWorked(
                  db.runCommand({delete: testColl.getName(), deletes: [{q: {a: key}, limit: 0}]}));
          }, "remove with 1 index");
      })();
       
      (function remove2() {
          bulkLoad(testColl);
          assert.commandWorked(testColl.createIndex({a: 1, b: 1}));
          assert.commandWorked(testColl.createIndex({a: 1, c: 1}));
       
          time(() => {
              assert.commandWorked(
                  db.runCommand({delete: testColl.getName(), deletes: [{q: {a: key}, limit: 0}]}));
          }, "remove with 2 indexes");
      })();
       
      (function remove2Hint() {
          bulkLoad(testColl);
          assert.commandWorked(testColl.createIndex({a: 1, b: 1}));
          assert.commandWorked(testColl.createIndex({a: 1, c: 1}));
       
          time(() => {
              assert.commandWorked(db.runCommand(
                  {delete: testColl.getName(), deletes: [{q: {a: key}, limit: 0, hint: 'a_1_b_1'}]}));
          }, "remove with 2 indexes and a hint");
      })();
      })();
      

      Output locally:

      inserted 10000
      starting remove with 1 index
      remove with 1 index: 176ms
      inserted 10000
      starting remove with 2 indexes
      remove with 2 indexes: 3386ms
      inserted 10000
      starting remove with 2 indexes and a hint
      remove with 2 indexes and a hint: 189ms
      

      Show
      Run the following test: /** * Reproduces inconsistent delete performance with secondary indexes. * Notably, deletes on collections with multiple secondary indexes have significantly * worse performance than deletes with one index or deletes that hint a specific index. */ ( function () {   const time = (fn, desc) => { print( "starting " + desc); let start = new Date(); fn(); let end = new Date(); print(desc + ": " + (end - start) + "ms" ); };   const key = 'value' ; const bulkLoad = (coll) => { coll.drop(); const DOCS = 10 * 1000; for (let i = 0; i < DOCS / 1000; i++) { let bulk = coll.initializeUnorderedBulkOp(); for (let j = 0; j < 1000; j++) { let val = i * 1000 + j; // a is not unique, b is bulk.insert({a: key, b: val, c: val}); } assert.commandWorked(bulk.execute()); } print( "inserted " + DOCS); };   const testColl = db.coll; ( function remove1() { bulkLoad(testColl); assert.commandWorked(testColl.createIndex({a: 1, b: 1}));   time(() => { assert.commandWorked( db.runCommand({ delete : testColl.getName(), deletes: [{q: {a: key}, limit: 0}]})); }, "remove with 1 index" ); })();   ( function remove2() { bulkLoad(testColl); assert.commandWorked(testColl.createIndex({a: 1, b: 1})); assert.commandWorked(testColl.createIndex({a: 1, c: 1}));   time(() => { assert.commandWorked( db.runCommand({ delete : testColl.getName(), deletes: [{q: {a: key}, limit: 0}]})); }, "remove with 2 indexes" ); })();   ( function remove2Hint() { bulkLoad(testColl); assert.commandWorked(testColl.createIndex({a: 1, b: 1})); assert.commandWorked(testColl.createIndex({a: 1, c: 1}));   time(() => { assert.commandWorked(db.runCommand( { delete : testColl.getName(), deletes: [{q: {a: key}, limit: 0, hint: 'a_1_b_1' }]})); }, "remove with 2 indexes and a hint" ); })(); })(); Output locally: inserted 10000 starting remove with 1 index remove with 1 index: 176ms inserted 10000 starting remove with 2 indexes remove with 2 indexes: 3386ms inserted 10000 starting remove with 2 indexes and a hint remove with 2 indexes and a hint: 189ms
    • Sprint:
      Execution Team 2021-06-28

      Description

      Delete performance appears to suffer greatly if there are multiple secondary indexes with the same first field in a key. This does not appear to have an obvious reason, because using a hint improves performance significantly.

      For example:

      • Say collection has two secondary indexes {a: 1, b: 1} and {a: 1, c: 1} with 10K documents
      • Perform a delete on {a: <value>}
      • This query takes 5 seconds. If an index is dropped or a hint is used, the same query performance improves to around 200ms.
      • As additional indexes are added, performance degrades linearly

      Investigate the reason for this discrepancy.

      Note: I tested this regression and it exists at least as far back as 4.2

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              backlog-query-execution Backlog - Query Execution
              Reporter:
              louis.williams Louis Williams
              Participants:
              Votes:
              0 Vote for this issue
              Watchers:
              25 Start watching this issue

                Dates

                Created:
                Updated: