Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-42786

Ticket acquisition should more accurately adjust timeout for maxTimeMS

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Open
    • Priority: Major - P3
    • Resolution: Unresolved
    • Affects Version/s: None
    • Fix Version/s: Backlog
    • Component/s: Concurrency
    • Labels:
    • Steps To Reproduce:
      Hide

      /*
       * Ticket acquisition not respecting maxTimeMS.
       */
      load("jstests/libs/check_log.js");
      (function() {
       
      "use strict";
       
      const testName = "repro";
      const dbName = "test";
      const collName = "coll";
      // We set the number of write tickets to be a small value in order to avoid needing to spawn a
      // large number of threads to exhaust all of the available ones.
      const kNumWriteTickets = 5;
       
      var rst = new ReplSetTest({
          name: testName,
          nodes: [{}, {rsConfig: {priority: 0}}],
          nodeOptions: {setParameter: {wiredTigerConcurrentWriteTransactions: kNumWriteTickets}}
      });
      rst.startSet();
      rst.initiate();
       
      var primary = rst.getPrimary();
      const primaryDB = primary.getDB(dbName);
      const primaryAdmin = primary.getDB("admin");
      const primaryColl = primaryDB[collName];
      const collNss = primaryColl.getFullName();
       
      TestData.dbName = dbName;
      TestData.collName = collName;
       
      jsTestLog("Create Collection");
      assert.commandWorked(primaryDB.createCollection(collName));
      rst.awaitReplication();
       
      assert.commandWorked(primaryDB.setProfilingLevel(1, {slowms: 510}));
       
      // Enable fail point which makes insert thread to hang with write ticket held.
      assert.commandWorked(primaryAdmin.runCommand(
          {configureFailPoint: "hangWithLockDuringBatchInsert", mode: "alwaysOn"}));
       
      var writeFunc = () => {
          jsTestLog("Do a document write");
          primaryDB = db.getSiblingDB(TestData.dbName);
          assert.writeOK(
              primaryDB[TestData.collName].insert({x: 1}, {"writeConcern": {"w": "majority"}}));
      };
       
      let writers = [];
      // Start the write threads.
      for (let i = 0; i <= kNumWriteTickets; i++) {
          writers.push(startParallelShell(writeFunc, primary.port));
      }
       
      // We wait until all of the inserts threads to reach the fail point to know that we've exhausted
      // all of the available write tickets.
      assert.soon(
          () => {
              return primaryAdmin
                         .aggregate([
                             {$currentOp: {}},
                             {$match: {$and: [{"ns": collNss}, {"msg": "hangWithLockDuringBatchInsert"}]}}
                         ])
                         .itcount() == kNumWriteTickets;
          },
          () => {
              return `Didn't find ${kNumWriteTickets} insert commands running: ` +
                  tojson(primaryAdmin.currentOp());
          });
       
      jsTestLog("Do a blocked write that waits for write ticket.");
      assert.commandFailedWithCode(
          primaryDB.runCommand({insert: collName, documents: [{x: 2}], maxTimeMS: 510}),
          ErrorCodes.MaxTimeMSExpired);
       
      // Attempting to find insert cmd log message returned within 510ms.
      let found = false;
      assert.soon(() => {
          const logLines = checkLog.getGlobalLog(primaryDB);
          for (let line of logLines) {
              let matchResult =
                  line.match(/command: insert (.*) maxTimeMS: 510.0(.*) protocol:op_msg [5-6]/);
              if (matchResult) {
                  jsTestLog("Found the matching log entry" + matchResult);
                  found = true;
                  break;
              }
          }
          return found;
      }, 'Could not find the matching log entry');
       
      // Disable failpoint to allow insert threads to continue.
      assert.commandWorked(
          primaryAdmin.runCommand({configureFailPoint: "hangWithLockDuringBatchInsert", mode: "off"}));
       
      // Wait for the insert threads to join.
      for (let writer of writers) {
          writer();
      }
       
      rst.stopSet();
      })();
      

      Show
      /* * Ticket acquisition not respecting maxTimeMS. */ load("jstests/libs/check_log.js"); (function() {   "use strict";   const testName = "repro"; const dbName = "test"; const collName = "coll"; // We set the number of write tickets to be a small value in order to avoid needing to spawn a // large number of threads to exhaust all of the available ones. const kNumWriteTickets = 5;   var rst = new ReplSetTest({ name: testName, nodes: [{}, {rsConfig: {priority: 0}}], nodeOptions: {setParameter: {wiredTigerConcurrentWriteTransactions: kNumWriteTickets}} }); rst.startSet(); rst.initiate();   var primary = rst.getPrimary(); const primaryDB = primary.getDB(dbName); const primaryAdmin = primary.getDB("admin"); const primaryColl = primaryDB[collName]; const collNss = primaryColl.getFullName();   TestData.dbName = dbName; TestData.collName = collName;   jsTestLog("Create Collection"); assert.commandWorked(primaryDB.createCollection(collName)); rst.awaitReplication();   assert.commandWorked(primaryDB.setProfilingLevel(1, {slowms: 510}));   // Enable fail point which makes insert thread to hang with write ticket held. assert.commandWorked(primaryAdmin.runCommand( {configureFailPoint: "hangWithLockDuringBatchInsert", mode: "alwaysOn"}));   var writeFunc = () => { jsTestLog("Do a document write"); primaryDB = db.getSiblingDB(TestData.dbName); assert.writeOK( primaryDB[TestData.collName].insert({x: 1}, {"writeConcern": {"w": "majority"}})); };   let writers = []; // Start the write threads. for (let i = 0; i <= kNumWriteTickets; i++) { writers.push(startParallelShell(writeFunc, primary.port)); }   // We wait until all of the inserts threads to reach the fail point to know that we've exhausted // all of the available write tickets. assert.soon( () => { return primaryAdmin .aggregate([ {$currentOp: {}}, {$match: {$and: [{"ns": collNss}, {"msg": "hangWithLockDuringBatchInsert"}]}} ]) .itcount() == kNumWriteTickets; }, () => { return `Didn't find ${kNumWriteTickets} insert commands running: ` + tojson(primaryAdmin.currentOp()); });   jsTestLog("Do a blocked write that waits for write ticket."); assert.commandFailedWithCode( primaryDB.runCommand({insert: collName, documents: [{x: 2}], maxTimeMS: 510}), ErrorCodes.MaxTimeMSExpired);   // Attempting to find insert cmd log message returned within 510ms. let found = false; assert.soon(() => { const logLines = checkLog.getGlobalLog(primaryDB); for (let line of logLines) { let matchResult = line.match(/command: insert (.*) maxTimeMS: 510.0(.*) protocol:op_msg [5-6]/); if (matchResult) { jsTestLog("Found the matching log entry" + matchResult); found = true; break; } } return found; }, 'Could not find the matching log entry');   // Disable failpoint to allow insert threads to continue. assert.commandWorked( primaryAdmin.runCommand({configureFailPoint: "hangWithLockDuringBatchInsert", mode: "off"}));   // Wait for the insert threads to join. for (let writer of writers) { writer(); }   rst.stopSet(); })();

      Description

      Currently, for ticket acquisition, we don't respect the "maxTimeMS". We should do the same thing like we do it for the lock acquisition wait /prepare conflict wait . Though, TicketHolder::waitForTicketUntil() has the default smallest wait interval as 500 ms. If any user sets the command maxTimeMS < 500ms, then it's a problem.-

      EDIT:

      It's not just problem with maxTimeMs <500ms. Basically, ticket acquisition can wait up to maxTimeMs+ 500ms (default semaphore wait interval), Eg. If user sets maxTimeMs = 1005, the cmd wait  for 1500ms to acquire ticket. 

        Attachments

          Activity

            People

            • Votes:
              0 Vote for this issue
              Watchers:
              10 Start watching this issue

              Dates

              • Created:
                Updated: