Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-62175

Mongos fails to attach RetryableWrite Error Label For Command Interrupted In _parseCommand

    XMLWordPrintableJSON

Details

    • Minor Change
    • ALL
    • v6.0, v5.3, v5.0, v4.4
    • Hide

      The following resmoke invocation will execute the reproducing test once the git patch has been applied.

      /buildscripts/resmoke.py run --suite=sharding jstests/sharding/mongos_insert_fails_with_shutdown.js 
      

      The patch can be applied by executing the following command in the root of the repository.

      git apply git.diff
      

      diff --git a/jstests/sharding/mongos_insert_fails_with_shutdown.js b/jstests/sharding/mongos_insert_fails_with_shutdown.js
      new file mode 100644
      index 00000000000..ea21ee346c4
      --- /dev/null
      +++ b/jstests/sharding/mongos_insert_fails_with_shutdown.js
      @@ -0,0 +1,35 @@
      +/**
      + */
      + 
      + (function() {
      +    "use strict";
      +     
      +    load("jstests/libs/fail_point_util.js");
      +    load('jstests/libs/parallelTester.js');
      +     
      +    const st = new ShardingTest({
      +        mongos: 1,
      +        config: 1,
      +        shards: 2,
      +    });
      +     
      +    const hangBeforeCheckInterruptFailPoint = configureFailPoint(st.s, "hangBeforeCheckInterrupt");
      +     
      +    const dbName = "test";
      +    const collName = "mycoll";
      +     
      +    const insertThread = new Thread(function insertDoc(host, dbName, collName) {
      +        const conn = new Mongo(host);
      +        const collection = conn.getDB(dbName).getCollection(collName);
      +        const res = collection.insert({key: 1});
      +        jsTest.log(`Inserted document with _id: ${tojson(res)}`);
      +        assert.commandFailedWithCode(res, ErrorCodes.InterruptedAtShutdown);
      +    }, st.s.host, dbName, collName);
      +     
      +    insertThread.start();
      +    hangBeforeCheckInterruptFailPoint.wait();
      +    st.stopMongos(0);
      +    insertThread.join();
      +     
      +    st.stop();
      +    })();
      \ No newline at end of file
      diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
      index b9d903e6685..abb9a892a3c 100644
      --- a/src/mongo/s/commands/strategy.cpp
      +++ b/src/mongo/s/commands/strategy.cpp
      @@ -27,6 +27,7 @@
        *    it in the license file.
        */
       
      +#include "mongo/util/time_support.h"
       #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
       
       #include "mongo/platform/basic.h"
      @@ -372,6 +373,7 @@ Future<void> ExecCommandClient::run() {
       }
       
       MONGO_FAIL_POINT_DEFINE(doNotRefreshShardsOnRetargettingError);
      +MONGO_FAIL_POINT_DEFINE(hangBeforeCheckInterrupt);
       
       /**
        * Produces a future-chain that parses the command, runs the parsed command, and captures the result
      @@ -485,7 +487,6 @@ void ParseAndRunCommand::_parseCommand() {
           const auto& m = _rec->getMessage();
           const auto& request = _rec->getRequest();
           auto replyBuilder = _rec->getReplyBuilder();
      -
           auto const command = CommandHelpers::findCommand(_commandName);
           if (!command) {
               const std::string errorMsg = "no such cmd: {}"_format(_commandName);
      @@ -528,6 +529,11 @@ void ParseAndRunCommand::_parseCommand() {
           if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
               opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired);
           }
      +    if (_commandName == "insert") {
      +        LOGV2(555555, "ABOUT TO PAUSE");
      +        hangBeforeCheckInterrupt.pauseWhileSet();
      +        LOGV2(555555, "PAST THE FAILPOINT");
      +    }
           opCtx->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.
       
           // If the command includes a 'comment' field, set it on the current OpCtx.
      diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp
      index 0d14e523838..534a1638ea9 100644
      --- a/src/mongo/s/mongos_main.cpp
      +++ b/src/mongo/s/mongos_main.cpp
      @@ -27,6 +27,7 @@
        *    it in the license file.
        */
       
      +#include "mongo/util/time_support.h"
       #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
       
       #include "mongo/platform/basic.h"
      @@ -315,11 +316,13 @@ void cleanupTask(const ShutdownTaskArgs& shutdownArgs) {
       
               if (serviceContext) {
                   serviceContext->setKillAllOperations();
      -
      +            LOGV2(55555, "ABOUT TO TURN OFF FAILPOINT");
      +            globalFailPointRegistry().find("hangBeforeCheckInterrupt")->setMode(FailPoint::Mode::off, 0);
                   if (MONGO_unlikely(pauseWhileKillingOperationsAtShutdown.shouldFail())) {
                       LOGV2(4701800, "pauseWhileKillingOperationsAtShutdown failpoint enabled");
                       sleepsecs(1);
                   }
      +            sleepsecs(3);
               }
       
               // Perform all shutdown operations after setKillAllOperations is called in order to ensure
      
      

      Show
      The following resmoke invocation will execute the reproducing test once the git patch has been applied. /buildscripts/resmoke.py run --suite=sharding jstests/sharding/mongos_insert_fails_with_shutdown.js The patch can be applied by executing the following command in the root of the repository. git apply git.diff diff --git a/jstests/sharding/mongos_insert_fails_with_shutdown.js b/jstests/sharding/mongos_insert_fails_with_shutdown.js new file mode 100644 index 00000000000..ea21ee346c4 --- /dev/null +++ b/jstests/sharding/mongos_insert_fails_with_shutdown.js @@ -0,0 +1,35 @@ +/** + */ + + (function() { + "use strict"; + + load("jstests/libs/fail_point_util.js"); + load('jstests/libs/parallelTester.js'); + + const st = new ShardingTest({ + mongos: 1, + config: 1, + shards: 2, + }); + + const hangBeforeCheckInterruptFailPoint = configureFailPoint(st.s, "hangBeforeCheckInterrupt"); + + const dbName = "test"; + const collName = "mycoll"; + + const insertThread = new Thread(function insertDoc(host, dbName, collName) { + const conn = new Mongo(host); + const collection = conn.getDB(dbName).getCollection(collName); + const res = collection.insert({key: 1}); + jsTest.log(`Inserted document with _id: ${tojson(res)}`); + assert.commandFailedWithCode(res, ErrorCodes.InterruptedAtShutdown); + }, st.s.host, dbName, collName); + + insertThread.start(); + hangBeforeCheckInterruptFailPoint.wait(); + st.stopMongos(0); + insertThread.join(); + + st.stop(); + })(); \ No newline at end of file diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index b9d903e6685..abb9a892a3c 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/util/time_support.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding #include "mongo/platform/basic.h" @@ -372,6 +373,7 @@ Future<void> ExecCommandClient::run() { } MONGO_FAIL_POINT_DEFINE(doNotRefreshShardsOnRetargettingError); +MONGO_FAIL_POINT_DEFINE(hangBeforeCheckInterrupt); /** * Produces a future-chain that parses the command, runs the parsed command, and captures the result @@ -485,7 +487,6 @@ void ParseAndRunCommand::_parseCommand() { const auto& m = _rec->getMessage(); const auto& request = _rec->getRequest(); auto replyBuilder = _rec->getReplyBuilder(); - auto const command = CommandHelpers::findCommand(_commandName); if (!command) { const std::string errorMsg = "no such cmd: {}"_format(_commandName); @@ -528,6 +529,11 @@ void ParseAndRunCommand::_parseCommand() { if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) { opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired); } + if (_commandName == "insert") { + LOGV2(555555, "ABOUT TO PAUSE"); + hangBeforeCheckInterrupt.pauseWhileSet(); + LOGV2(555555, "PAST THE FAILPOINT"); + } opCtx->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // If the command includes a 'comment' field, set it on the current OpCtx. diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp index 0d14e523838..534a1638ea9 100644 --- a/src/mongo/s/mongos_main.cpp +++ b/src/mongo/s/mongos_main.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/util/time_support.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding #include "mongo/platform/basic.h" @@ -315,11 +316,13 @@ void cleanupTask(const ShutdownTaskArgs& shutdownArgs) { if (serviceContext) { serviceContext->setKillAllOperations(); - + LOGV2(55555, "ABOUT TO TURN OFF FAILPOINT"); + globalFailPointRegistry().find("hangBeforeCheckInterrupt")->setMode(FailPoint::Mode::off, 0); if (MONGO_unlikely(pauseWhileKillingOperationsAtShutdown.shouldFail())) { LOGV2(4701800, "pauseWhileKillingOperationsAtShutdown failpoint enabled"); sleepsecs(1); } + sleepsecs(3); } // Perform all shutdown operations after setKillAllOperations is called in order to ensure
    • Sharding NYC 2022-04-04, Sharding NYC 2022-04-18, Sharding 2022-05-02, Sharding NYC 2022-05-16
    • 3

    Description

      This issue was originally discovered in the linked HELP ticket. It was found due to the shutdown that is required as part of version upgrades in Atlas.

      The fundamental issue is due to how the ServiceEntryPoint logic in MongoS works.

      When a command fails, it calls getErrorLabels in order to attach the appropriate information to the response. Relevant to our discussion is that it uses the sessionInformation in _osi to determine whether or not to attach the kRetryableWrite label.

      But the problem is that _osi is emplaced after a call to checkForInterrupt.

      Which results in the retryable write label not being attached to the response even though it should be.

      Attachments

        Activity

          People

            rachita.dhawan@mongodb.com Rachita Dhawan
            luis.osta@mongodb.com Luis Osta (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            17 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: