Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-62175

Mongos fails to attach RetryableWrite Error Label For Command Interrupted In _parseCommand

    • Minor Change
    • ALL
    • v6.0, v5.3, v5.0, v4.4
    • Hide

      The following resmoke invocation will execute the reproducing test once the git patch has been applied.

      Unable to find source-code formatter for language: shell. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml
      /buildscripts/resmoke.py run --suite=sharding jstests/sharding/mongos_insert_fails_with_shutdown.js 
      

      The patch can be applied by executing the following command in the root of the repository.

      Unable to find source-code formatter for language: shell. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml
      git apply git.diff
      
      Unable to find source-code formatter for language: diff. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml
      diff --git a/jstests/sharding/mongos_insert_fails_with_shutdown.js b/jstests/sharding/mongos_insert_fails_with_shutdown.js
      new file mode 100644
      index 00000000000..ea21ee346c4
      --- /dev/null
      +++ b/jstests/sharding/mongos_insert_fails_with_shutdown.js
      @@ -0,0 +1,35 @@
      +/**
      + */
      + 
      + (function() {
      +    "use strict";
      +     
      +    load("jstests/libs/fail_point_util.js");
      +    load('jstests/libs/parallelTester.js');
      +     
      +    const st = new ShardingTest({
      +        mongos: 1,
      +        config: 1,
      +        shards: 2,
      +    });
      +     
      +    const hangBeforeCheckInterruptFailPoint = configureFailPoint(st.s, "hangBeforeCheckInterrupt");
      +     
      +    const dbName = "test";
      +    const collName = "mycoll";
      +     
      +    const insertThread = new Thread(function insertDoc(host, dbName, collName) {
      +        const conn = new Mongo(host);
      +        const collection = conn.getDB(dbName).getCollection(collName);
      +        const res = collection.insert({key: 1});
      +        jsTest.log(`Inserted document with _id: ${tojson(res)}`);
      +        assert.commandFailedWithCode(res, ErrorCodes.InterruptedAtShutdown);
      +    }, st.s.host, dbName, collName);
      +     
      +    insertThread.start();
      +    hangBeforeCheckInterruptFailPoint.wait();
      +    st.stopMongos(0);
      +    insertThread.join();
      +     
      +    st.stop();
      +    })();
      \ No newline at end of file
      diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
      index b9d903e6685..abb9a892a3c 100644
      --- a/src/mongo/s/commands/strategy.cpp
      +++ b/src/mongo/s/commands/strategy.cpp
      @@ -27,6 +27,7 @@
        *    it in the license file.
        */
       
      +#include "mongo/util/time_support.h"
       #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
       
       #include "mongo/platform/basic.h"
      @@ -372,6 +373,7 @@ Future<void> ExecCommandClient::run() {
       }
       
       MONGO_FAIL_POINT_DEFINE(doNotRefreshShardsOnRetargettingError);
      +MONGO_FAIL_POINT_DEFINE(hangBeforeCheckInterrupt);
       
       /**
        * Produces a future-chain that parses the command, runs the parsed command, and captures the result
      @@ -485,7 +487,6 @@ void ParseAndRunCommand::_parseCommand() {
           const auto& m = _rec->getMessage();
           const auto& request = _rec->getRequest();
           auto replyBuilder = _rec->getReplyBuilder();
      -
           auto const command = CommandHelpers::findCommand(_commandName);
           if (!command) {
               const std::string errorMsg = "no such cmd: {}"_format(_commandName);
      @@ -528,6 +529,11 @@ void ParseAndRunCommand::_parseCommand() {
           if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
               opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired);
           }
      +    if (_commandName == "insert") {
      +        LOGV2(555555, "ABOUT TO PAUSE");
      +        hangBeforeCheckInterrupt.pauseWhileSet();
      +        LOGV2(555555, "PAST THE FAILPOINT");
      +    }
           opCtx->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.
       
           // If the command includes a 'comment' field, set it on the current OpCtx.
      diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp
      index 0d14e523838..534a1638ea9 100644
      --- a/src/mongo/s/mongos_main.cpp
      +++ b/src/mongo/s/mongos_main.cpp
      @@ -27,6 +27,7 @@
        *    it in the license file.
        */
       
      +#include "mongo/util/time_support.h"
       #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
       
       #include "mongo/platform/basic.h"
      @@ -315,11 +316,13 @@ void cleanupTask(const ShutdownTaskArgs& shutdownArgs) {
       
               if (serviceContext) {
                   serviceContext->setKillAllOperations();
      -
      +            LOGV2(55555, "ABOUT TO TURN OFF FAILPOINT");
      +            globalFailPointRegistry().find("hangBeforeCheckInterrupt")->setMode(FailPoint::Mode::off, 0);
                   if (MONGO_unlikely(pauseWhileKillingOperationsAtShutdown.shouldFail())) {
                       LOGV2(4701800, "pauseWhileKillingOperationsAtShutdown failpoint enabled");
                       sleepsecs(1);
                   }
      +            sleepsecs(3);
               }
       
               // Perform all shutdown operations after setKillAllOperations is called in order to ensure
      
      
      Show
      The following resmoke invocation will execute the reproducing test once the git patch has been applied. Unable to find source-code formatter for language: shell. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml /buildscripts/resmoke.py run --suite=sharding jstests/sharding/mongos_insert_fails_with_shutdown.js The patch can be applied by executing the following command in the root of the repository. Unable to find source-code formatter for language: shell. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml git apply git.diff Unable to find source-code formatter for language: diff. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml diff --git a/jstests/sharding/mongos_insert_fails_with_shutdown.js b/jstests/sharding/mongos_insert_fails_with_shutdown.js new file mode 100644 index 00000000000..ea21ee346c4 --- /dev/ null +++ b/jstests/sharding/mongos_insert_fails_with_shutdown.js @@ -0,0 +1,35 @@ +/** + */ + + (function() { + "use strict" ; + + load( "jstests/libs/fail_point_util.js" ); + load( 'jstests/libs/parallelTester.js' ); + + const st = new ShardingTest({ + mongos: 1, + config: 1, + shards: 2, + }); + + const hangBeforeCheckInterruptFailPoint = configureFailPoint(st.s, "hangBeforeCheckInterrupt" ); + + const dbName = "test" ; + const collName = "mycoll" ; + + const insertThread = new Thread (function insertDoc(host, dbName, collName) { + const conn = new Mongo(host); + const collection = conn.getDB(dbName).getCollection(collName); + const res = collection.insert({key: 1}); + jsTest.log(`Inserted document with _id: ${tojson(res)}`); + assert .commandFailedWithCode(res, ErrorCodes.InterruptedAtShutdown); + }, st.s.host, dbName, collName); + + insertThread.start(); + hangBeforeCheckInterruptFailPoint.wait(); + st.stopMongos(0); + insertThread.join(); + + st.stop(); + })(); \ No newline at end of file diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index b9d903e6685..abb9a892a3c 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/util/time_support.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding #include "mongo/platform/basic.h" @@ -372,6 +373,7 @@ Future<void> ExecCommandClient::run() { } MONGO_FAIL_POINT_DEFINE(doNotRefreshShardsOnRetargettingError); +MONGO_FAIL_POINT_DEFINE(hangBeforeCheckInterrupt); /** * Produces a future -chain that parses the command, runs the parsed command, and captures the result @@ -485,7 +487,6 @@ void ParseAndRunCommand::_parseCommand() { const auto& m = _rec->getMessage(); const auto& request = _rec->getRequest(); auto replyBuilder = _rec->getReplyBuilder(); - auto const command = CommandHelpers::findCommand(_commandName); if (!command) { const std::string errorMsg = "no such cmd: {}" _format(_commandName); @@ -528,6 +529,11 @@ void ParseAndRunCommand::_parseCommand() { if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) { opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired); } + if (_commandName == "insert" ) { + LOGV2(555555, "ABOUT TO PAUSE" ); + hangBeforeCheckInterrupt.pauseWhileSet(); + LOGV2(555555, "PAST THE FAILPOINT" ); + } opCtx->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // If the command includes a 'comment' field, set it on the current OpCtx. diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp index 0d14e523838..534a1638ea9 100644 --- a/src/mongo/s/mongos_main.cpp +++ b/src/mongo/s/mongos_main.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/util/time_support.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding #include "mongo/platform/basic.h" @@ -315,11 +316,13 @@ void cleanupTask( const ShutdownTaskArgs& shutdownArgs) { if (serviceContext) { serviceContext->setKillAllOperations(); - + LOGV2(55555, "ABOUT TO TURN OFF FAILPOINT" ); + globalFailPointRegistry().find( "hangBeforeCheckInterrupt" )->setMode(FailPoint::Mode::off, 0); if (MONGO_unlikely(pauseWhileKillingOperationsAtShutdown.shouldFail())) { LOGV2(4701800, "pauseWhileKillingOperationsAtShutdown failpoint enabled" ); sleepsecs(1); } + sleepsecs(3); } // Perform all shutdown operations after setKillAllOperations is called in order to ensure
    • Sharding NYC 2022-04-04, Sharding NYC 2022-04-18, Sharding 2022-05-02, Sharding NYC 2022-05-16
    • 3

      This issue was originally discovered in the linked HELP ticket. It was found due to the shutdown that is required as part of version upgrades in Atlas.

      The fundamental issue is due to how the ServiceEntryPoint logic in MongoS works.

      When a command fails, it calls getErrorLabels in order to attach the appropriate information to the response. Relevant to our discussion is that it uses the sessionInformation in _osi to determine whether or not to attach the kRetryableWrite label.

      But the problem is that _osi is emplaced after a call to checkForInterrupt.

      Which results in the retryable write label not being attached to the response even though it should be.

            Assignee:
            rachita.dhawan@mongodb.com Rachita Dhawan
            Reporter:
            luis.osta@mongodb.com Luis Osta (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            17 Start watching this issue

              Created:
              Updated:
              Resolved: