Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-57221

Inconsistent delete performance on collections with multiple secondary indexes on same key

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 4.2.14, 4.4.6, 5.0.0-rc7
    • Component/s: None
    • Labels:
      None
    • Storage Execution
    • Fully Compatible
    • ALL
    • Hide

      Run the following test:

      /**
       * Reproduces inconsistent delete performance with secondary indexes.
       * Notably, deletes on collections with multiple secondary indexes have significantly
       * worse performance than deletes with one index or deletes that hint a specific index.
       */
      (function() {
      
      const time = (fn, desc) => {
          print("starting " + desc);
          let start = new Date();
          fn();
          let end = new Date();
          print(desc + ": " + (end - start) + "ms");
      };
      
      const key = 'value';
      const bulkLoad = (coll) => {
          coll.drop();
          const DOCS = 10 * 1000;
          for (let i = 0; i < DOCS / 1000; i++) {
              let bulk = coll.initializeUnorderedBulkOp();
              for (let j = 0; j < 1000; j++) {
                  let val = i * 1000 + j;
                  // a is not unique, b is
                  bulk.insert({a: key, b: val, c: val});
              }
              assert.commandWorked(bulk.execute());
          }
          print("inserted " + DOCS);
      };
      
      const testColl = db.coll;
      (function remove1() {
          bulkLoad(testColl);
          assert.commandWorked(testColl.createIndex({a: 1, b: 1}));
      
          time(() => {
              assert.commandWorked(
                  db.runCommand({delete: testColl.getName(), deletes: [{q: {a: key}, limit: 0}]}));
          }, "remove with 1 index");
      })();
      
      (function remove2() {
          bulkLoad(testColl);
          assert.commandWorked(testColl.createIndex({a: 1, b: 1}));
          assert.commandWorked(testColl.createIndex({a: 1, c: 1}));
      
          time(() => {
              assert.commandWorked(
                  db.runCommand({delete: testColl.getName(), deletes: [{q: {a: key}, limit: 0}]}));
          }, "remove with 2 indexes");
      })();
      
      (function remove2Hint() {
          bulkLoad(testColl);
          assert.commandWorked(testColl.createIndex({a: 1, b: 1}));
          assert.commandWorked(testColl.createIndex({a: 1, c: 1}));
      
          time(() => {
              assert.commandWorked(db.runCommand(
                  {delete: testColl.getName(), deletes: [{q: {a: key}, limit: 0, hint: 'a_1_b_1'}]}));
          }, "remove with 2 indexes and a hint");
      })();
      })();
      

      Output locally:

      inserted 10000
      starting remove with 1 index
      remove with 1 index: 176ms
      inserted 10000
      starting remove with 2 indexes
      remove with 2 indexes: 3386ms
      inserted 10000
      starting remove with 2 indexes and a hint
      remove with 2 indexes and a hint: 189ms
      
      Show
      Run the following test: /** * Reproduces inconsistent delete performance with secondary indexes. * Notably, deletes on collections with multiple secondary indexes have significantly * worse performance than deletes with one index or deletes that hint a specific index. */ ( function () { const time = (fn, desc) => { print( "starting " + desc); let start = new Date(); fn(); let end = new Date(); print(desc + ": " + (end - start) + "ms" ); }; const key = 'value' ; const bulkLoad = (coll) => { coll.drop(); const DOCS = 10 * 1000; for (let i = 0; i < DOCS / 1000; i++) { let bulk = coll.initializeUnorderedBulkOp(); for (let j = 0; j < 1000; j++) { let val = i * 1000 + j; // a is not unique, b is bulk.insert({a: key, b: val, c: val}); } assert.commandWorked(bulk.execute()); } print( "inserted " + DOCS); }; const testColl = db.coll; ( function remove1() { bulkLoad(testColl); assert.commandWorked(testColl.createIndex({a: 1, b: 1})); time(() => { assert.commandWorked( db.runCommand({ delete : testColl.getName(), deletes: [{q: {a: key}, limit: 0}]})); }, "remove with 1 index" ); })(); ( function remove2() { bulkLoad(testColl); assert.commandWorked(testColl.createIndex({a: 1, b: 1})); assert.commandWorked(testColl.createIndex({a: 1, c: 1})); time(() => { assert.commandWorked( db.runCommand({ delete : testColl.getName(), deletes: [{q: {a: key}, limit: 0}]})); }, "remove with 2 indexes" ); })(); ( function remove2Hint() { bulkLoad(testColl); assert.commandWorked(testColl.createIndex({a: 1, b: 1})); assert.commandWorked(testColl.createIndex({a: 1, c: 1})); time(() => { assert.commandWorked(db.runCommand( { delete : testColl.getName(), deletes: [{q: {a: key}, limit: 0, hint: 'a_1_b_1' }]})); }, "remove with 2 indexes and a hint" ); })(); })(); Output locally: inserted 10000 starting remove with 1 index remove with 1 index: 176ms inserted 10000 starting remove with 2 indexes remove with 2 indexes: 3386ms inserted 10000 starting remove with 2 indexes and a hint remove with 2 indexes and a hint: 189ms
    • Execution Team 2021-06-28, QE 2021-11-01, QE 2021-11-15, QE 2021-11-29, QE 2021-12-13, QE 2021-12-27, QE 2022-01-10, QE 2022-02-07, QE 2022-02-21, QE 2022-01-24

      Delete performance appears to suffer greatly if there are multiple secondary indexes with the same first field in a key. This does not appear to have an obvious reason, because using a hint improves performance significantly.

      For example:

      • Say collection has two secondary indexes {a: 1, b: 1} and {a: 1, c: 1} with 10K documents
      • Perform a delete on {a: <value>}
      • This query takes 5 seconds. If an index is dropped or a hint is used, the same query performance improves to around 200ms.
      • As additional indexes are added, performance degrades linearly

      Investigate the reason for this discrepancy.

      Note: I tested this regression and it exists at least as far back as 4.2

            Assignee:
            backlog-server-execution [DO NOT USE] Backlog - Storage Execution Team
            Reporter:
            louis.williams@mongodb.com Louis Williams
            Votes:
            1 Vote for this issue
            Watchers:
            45 Start watching this issue

              Created:
              Updated:
              Resolved: