Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-25497

Fix sharded query path to handle shutdown of the mongos process

    • Fully Compatible
    • ALL
    • Sharding 2016-10-10, Query 2018-02-26, Query 2020-05-04, Query 2020-10-05, Query 2020-10-19
    • 15

      The AsyncResultsMerger is the component of the new mongos find execution path introduced in version 3.2.0. It uses the TaskExecutor to run find and getMore commands against the shards. During shutdown of the mongos process, the TaskExecutor shutdown sequence occurs, during which mongos cursors may still exist but the TaskExecutor can no longer be used to run commands on remote nodes.

      The AsyncResultsMerger does not behave correctly during TaskExecutor shutdown. This can cause mongos to crash during shutdown in one of several ways.

      Original description

      Hi all,

      I found the following stack trace and I am not sure what is the root cause of this. It happened on all the mongos with a few minutes interval. It did occur after a signal 15 so I do not know if that is the reason.

      2016-08-05T08:03:08.269Z I -        [conn798228] Invariant failure event.isValid() src/mongo/executor/thread_pool_task_executor.cpp 234
      2016-08-05T08:03:08.269Z I -        [conn798228] 
      
      ***aborting after invariant() failure
      
      
      2016-08-05T08:03:08.271Z F -        [conn799245] Got signal: 6 (Aborted).
      
       0xc5dca2 0xc5cbc9 0xc5d3d2 0x7f03252f2cb0 0x7f0324f5b035 0x7f0324f5e79b 0xbe245b 0xa13cb8 0xb8a68c 0xb81536 0xb8777a 0xba0ee7 0xb8e8fb 0x672115 0xc07715 0x7f03252eae9a 0x7f032501836d
      ----- BEGIN BACKTRACE -----
      {"backtrace":[{"b":"400000","o":"85DCA2","s":"_ZN5mongo15printStackTraceERSo"},{"b":"400000","o":"85CBC9"},{"b":"400000","o":"85D3D2"},{"b":"7F03252E3000","o":"FCB0"},{"b":"7F0324F25000","o":"36035","s":"gsignal"},{"b":"7F0324F25000","o":"3979B","s":"abort"},{"b":"400000","o":"7E245B","s":"_ZN5mongo15invariantFailedEPKcS1_j"},{"b":"400000","o":"613CB8"},{"b":"400000","o":"78A68C","s":"_ZN5mongo16RouterStageMerge4killEv"},{"b":"400000","o":"781536","s":"_ZN5mongo24ClusterClientCursorGuardD1Ev"},{"b":"400000","o":"78777A","s":"_ZN5mongo11ClusterFind8runQueryEPNS_16OperationContextERKNS_14CanonicalQueryERKNS_21ReadPreferenceSettingEPSt6vectorINS_7BSONObjESaISA_EE"},{"b":"400000","o":"7A0EE7","s":"_ZN5mongo8Strategy7queryOpEPNS_16OperationContextERNS_7RequestE"},{"b":"400000","o":"78E8FB","s":"_ZN5mongo7Request7processEPNS_16OperationContextEi"},{"b":"400000","o":"272115","s":"_ZN5mongo21ShardedMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortE"},{"b":"400000","o":"807715","s":"_ZN5mongo17PortMessageServer17handleIncomingMsgEPv"},{"b":"7F03252E3000","o":"7E9A"},{"b":"7F0324F25000","o":"F336D","s":"clone"}],"processInfo":{ "mongodbVersion" : "3.2.8", "gitVersion" : "ed70e33130c977bda0024c125b56d159573dbaf0", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "3.2.0-77-virtual", "version" : "#114-Ubuntu SMP Tue Mar 10 17:38:02 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "16C848A3171CB2CF2FDC800A602C9F785595A08F" }, { "b" : "7FFF0700B000", "elfType" : 3, "buildId" : "4CF95530ACFE32D062E1E8F7524D5E956AF54586" }, { "b" : "7F03261FA000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "05BB9627FE4CCFC087FC0A1E064098BAAAF0ABE2" }, { "b" : "7F0325E1E000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "88FFD2D95DF12BF9CA931AF6E50F42D83A98C3FE" }, { "b" : "7F0325C16000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "8C19981216B96C14A1C82A4C884D1FA7FECB7979" }, { "b" : "7F0325A12000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "68048F961CC6B636BC27B40DE0DD8B8632B10180" }, { "b" : "7F0325716000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "97900BE41183B1AB4E6322A8FE08D68315FED60A" }, { "b" : "7F0325500000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "ECF322A96E26633C5D10F18215170DD4395AF82C" }, { "b" : "7F03252E3000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "9E17F98B86F7723CA1DC42235AEAB33B86FCB89D" }, { "b" : "7F0324F25000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "FC161CFC250D850EB08A7B76BAD5D2022AAEBB10" }, { "b" : "7F0326458000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "B60D4C2FCB331CEC2764B936F83C0622676CF9ED" }, { "b" : "7F0324D0E000", "path" : "/lib/x86_64-linux-gnu/libz.so.1", "elfType" : 3, "buildId" : "F695ECFCF3918D5D34989398A14B7ECDD9F46CD0" } ] }}
       mongos(_ZN5mongo15printStackTraceERSo+0x32) [0xc5dca2]
       mongos(+0x85CBC9) [0xc5cbc9]
       mongos(+0x85D3D2) [0xc5d3d2]
       libpthread.so.0(+0xFCB0) [0x7f03252f2cb0]
       libc.so.6(gsignal+0x35) [0x7f0324f5b035]
       libc.so.6(abort+0x17B) [0x7f0324f5e79b]
       mongos(_ZN5mongo15invariantFailedEPKcS1_j+0xCB) [0xbe245b]
       mongos(+0x613CB8) [0xa13cb8]
       mongos(_ZN5mongo16RouterStageMerge4killEv+0x2C) [0xb8a68c]
       mongos(_ZN5mongo24ClusterClientCursorGuardD1Ev+0x46) [0xb81536]
       mongos(_ZN5mongo11ClusterFind8runQueryEPNS_16OperationContextERKNS_14CanonicalQueryERKNS_21ReadPreferenceSettingEPSt6vectorINS_7BSONObjESaISA_EE+0xCFA) [0xb8777a]
       mongos(_ZN5mongo8Strategy7queryOpEPNS_16OperationContextERNS_7RequestE+0x567) [0xba0ee7]
       mongos(_ZN5mongo7Request7processEPNS_16OperationContextEi+0x5CB) [0xb8e8fb]
       mongos(_ZN5mongo21ShardedMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortE+0x65) [0x672115]
       mongos(_ZN5mongo17PortMessageServer17handleIncomingMsgEPv+0x325) [0xc07715]
       libpthread.so.0(+0x7E9A) [0x7f03252eae9a]
       libc.so.6(clone+0x6D) [0x7f032501836d]
      -----  END BACKTRACE  -----
      

            Assignee:
            ruoxin.xu@mongodb.com Ruoxin Xu
            Reporter:
            davenson.lombard@mongodb.com Davenson Lombard
            Votes:
            0 Vote for this issue
            Watchers:
            23 Start watching this issue

              Created:
              Updated:
              Resolved: