Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-36061

Mongos crashes due to invariant failure

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.6.4
    • Component/s: Sharding
    • Labels:
      None
    • ALL

      We are using a sharded Mongo cluster version 3.6.4. The Mongs instances keep crashing with an invariant failure error on the bulk_write path with ordered flag set to false.

      Following is the full stack trace:

      2018-07-10T23:35:41.802+0000 F -        [conn107] Invariant failure targetedBatches->find(batch->getEndpoint().shardName) == targetedBatches->end() src/mo
      ngo/s/write_ops/batch_write_op.cpp 353
      2018-07-10T23:35:41.802+0000 F -        [conn107] ***aborting after invariant() failure
      2018-07-10T23:35:41.812+0000 F -        [conn107] Got signal: 6 (Aborted). 0x558882319431 0x558882318649 0x558882318b2d 0x7fe1c83cc390 0x7fe1c8026428 0x7fe1c802802a 0x55888174f7f2 0x55888188f4c6 0x5588818925ef 0x55888189f5fc 0x5
      58881860a00 0x558881c510cf 0x55888187f095 0x55888187f9d3 0x5588818800b9 0x5588817a1d15 0x5588817be55a 0x5588817b9f17 0x5588817bd351 0x558881c116a2 0x55888
      17b8d7f 0x5588817bb2c5 0x5588817bbbbb 0x5588817b9f9d 0x5588817bd351 0x558881c11c05 0x5588821cadf4 0x7fe1c83c26ba 0x7fe1c80f841d
      ----- BEGIN BACKTRACE -----
      {"backtrace":[{"b":"55888124E000","o":"10CB431","s":"_ZN5mongo15printStackTraceERSo"},{"b":"55888124E000","o":"10CA649"},{"b":"55888124E000","o":"10CAB2D"
      },{"b":"7FE1C83BB000","o":"11390"},{"b":"7FE1C7FF1000","o":"35428","s":"gsignal"},{"b":"7FE1C7FF1000","o":"3702A","s":"abort"},{"b":"55888124E000","o":"50
      17F2","s":"_ZN5mongo17invariantOKFailedEPKcRKNS_6StatusES1_j"},{"b":"55888124E000","o":"6414C6","s":"_ZN5mongo12BatchWriteOp11targetBatchERKNS_10NSTargete
      rEbPSt3mapINS_7ShardIdEPNS_18TargetedWriteBatchESt4lessIS5_ESaISt4pairIKS5_S7_EEE"},{"b":"55888124E000","o":"6445EF","s":"_ZN5mongo14BatchWriteExec12execu
      teBatchEPNS_16OperationContextERNS_10NSTargeterERKNS_21BatchedCommandRequestEPNS_22BatchedCommandResponseEPNS_19BatchWriteExecStatsE"},{"b":"55888124E000"
      ,"o":"6515FC","s":"_ZN5mongo13ClusterWriter5writeEPNS_16OperationContextERKNS_21BatchedCommandRequestEPNS_19BatchWriteExecStatsEPNS_22BatchedCommandRespon
      seE"},{"b":"55888124E000","o":"612A00"},{"b":"55888124E000","o":"A030CF","s":"_ZN5mongo7Command9publicRunEPNS_16OperationContextERKNS_12OpMsgRequestERNS_1
      4BSONObjBuilderE"},{"b":"55888124E000","o":"631095"},{"b":"55888124E000","o":"6319D3"},{"b":"55888124E000","o":"6320B9","s":"_ZN5mongo8Strategy13clientCom
      mandEPNS_16OperationContextERKNS_7MessageE"},{"b":"55888124E000","o":"553D15","s":"_ZN5mongo23ServiceEntryPointMongos13handleRequestEPNS_16OperationContex
      tERKNS_7MessageE"},{"b":"55888124E000","o":"57055A","s":"_ZN5mongo19ServiceStateMachine15_processMessageENS0_11ThreadGuardE"},{"b":"55888124E000","o":"56B
      F17","s":"_ZN5mongo19ServiceStateMachine15_runNextInGuardENS0_11ThreadGuardE"},{"b":"55888124E000","o":"56F351"},{"b":"55888124E000","o":"9C36A2","s":"_ZN
      5mongo9transport26ServiceExecutorSynchronous8scheduleESt8functionIFvvEENS0_15ServiceExecutor13ScheduleFlagsENS0_23ServiceExecutorTaskNameE"},{"b":"5588812
      4E000","o":"56AD7F","s":"_ZN5mongo19ServiceStateMachine22_scheduleNextWithGuardENS0_11ThreadGuardENS_9transport15ServiceExecutor13ScheduleFlagsENS2_23Serv
      iceExecutorTaskNameENS0_9OwnershipE"},{"b":"55888124E000","o":"56D2C5","s":"_ZN5mongo19ServiceStateMachine15_sourceCallbackENS_6StatusE"},{"b":"55888124E0
      00","o":"56DBBB","s":"_ZN5mongo19ServiceStateMachine14_sourceMessageENS0_11ThreadGuardE"},{"b":"55888124E000","o":"56BF9D","s":"_ZN5mongo19ServiceStateMac
      hine15_runNextInGuardENS0_11ThreadGuardE"},{"b":"55888124E000","o":"56F351"},{"b":"55888124E000","o":"9C3C05"},{"b":"55888124E000","o":"F7CDF4"},{"b":"7FE
      1C83BB000","o":"76BA"},{"b":"7FE1C7FF1000","o":"10741D","s":"clone"}],"processInfo":{ "mongodbVersion" : "3.6.4", "gitVersion" : "d0181a711f7e7f39e60b5aeb
      1dc7097bf6ae5856", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "4.13.0-1012-gcp", "version" : "#16-Ubuntu SMP Thu Mar 15 12:00:42
       UTC 2018", "machine" : "x86_64" }, "somap" : [ { "b" : "55888124E000", "elfType" : 3, "buildId" : "2B07F3DDB25F64780B1BB21D3BE3EB969E862FF7" }, { "b" : "
      7FFF80BCC000", "elfType" : 3, "buildId" : "688440B93CEB9CEBD909F93F8A330CBE51E78F92" }, { "b" : "7FE1C95B0000", "path" : "/lib/x86_64-linux-gnu/libresolv.
      so.2", "elfType" : 3, "buildId" : "6EF73266978476EF9F2FD2CF31E57F4597CB74F8" }, { "b" : "7FE1C9347000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", 
      "elfType" : 3, "buildId" : "513282AC7EB386E2C0133FD9E1B6B8A0F38B047D" }, { "b" : "7FE1C8F03000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "250E875F74377DFC74DE48BF80CCB237BB4EFF1D" }, { "b" : "7FE1C8CFB000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "
      buildId" : "89C34D7A182387D76D5CDA1F7718F5D58824DFB3" }, { "b" : "7FE1C8AF7000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "
      8CC8D0D119B142D839800BFF71FB71E73AEA7BD4" }, { "b" : "7FE1C87EE000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "DFB85DE42DAFF
      D09640C8FE377D572DE3E168920" }, { "b" : "7FE1C85D8000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "68220AE2C65D65C1B6AAA1
      2FA6765A6EC2F5F434" }, { "b" : "7FE1C83BB000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "CE17E023542265FC11D9BC8F534BB
      4F070493D30" }, { "b" : "7FE1C7FF1000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "B5381A457906D279073822A5CEB24C4BFEF94DDB" 
      }, { "b" : "7FE1C97CB000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "5D7B6259552275A3C17BD4C3FD05F5A6BF40CAA5" }, { "b" : "7FE1C
      7DDF000", "path" : "/lib/x86_64-linux-gnu/libnss_files.so.2", "elfType" : 3, "buildId" : "747EF0B680F5A347531D0F464DC6460E89AD2111" }, { "b" : "7FE1C7BD80
      00", "path" : "/lib/x86_64-linux-gnu/libnss_dns.so.2", "elfType" : 3, "buildId" : "D5297928BDA8B2F7703CBA3CD914E7A102C0FB5A" } ] }}
       mongos(_ZN5mongo15printStackTraceERSo+0x41) [0x558882319431]
       mongos(+0x10CA649) [0x558882318649]
       mongos(+0x10CAB2D) [0x558882318b2d]
       libpthread.so.0(+0x11390) [0x7fe1c83cc390]
       libc.so.6(gsignal+0x38) [0x7fe1c8026428]
       libc.so.6(abort+0x16A) [0x7fe1c802802a]
       mongos(_ZN5mongo17invariantOKFailedEPKcRKNS_6StatusES1_j+0x0) [0x55888174f7f2]
       mongos(_ZN5mongo12BatchWriteOp11targetBatchERKNS_10NSTargeterEbPSt3mapINS_7ShardIdEPNS_18TargetedWriteBatchESt4lessIS5_ESaISt4pairIKS5_S7_EEE+0xA06) [0x55888188f4c6]
       mongos(_ZN5mongo14BatchWriteExec12executeBatchEPNS_16OperationContextERNS_10NSTargeterERKNS_21BatchedCommandRequestEPNS_22BatchedCommandResponseEPNS_19BatchWriteExecStatsE+0x23F) [0x5588818925ef]
       mongos(_ZN5mongo13ClusterWriter5writeEPNS_16OperationContextERKNS_21BatchedCommandRequestEPNS_19BatchWriteExecStatsEPNS_22BatchedCommandResponseE+0x53C) [0x55888189f5fc]
       mongos(+0x612A00) [0x558881860a00]
       mongos(_ZN5mongo7Command9publicRunEPNS_16OperationContextERKNS_12OpMsgRequestERNS_14BSONObjBuilderE+0x1F) [0x558881c510cf]
       mongos(+0x631095) [0x55888187f095]
       mongos(+0x6319D3) [0x55888187f9d3]
       mongos(_ZN5mongo8Strategy13clientCommandEPNS_16OperationContextERKNS_7MessageE+0x59) [0x5588818800b9]
       mongos(_ZN5mongo23ServiceEntryPointMongos13handleRequestEPNS_16OperationContextERKNS_7MessageE+0x5B5) [0x5588817a1d15]
       mongos(_ZN5mongo19ServiceStateMachine15_processMessageENS0_11ThreadGuardE+0xBA) [0x5588817be55a]
       mongos(_ZN5mongo19ServiceStateMachine15_runNextInGuardENS0_11ThreadGuardE+0x97) [0x5588817b9f17]
       mongos(+0x56F351) [0x5588817bd351]
       mongos(_ZN5mongo9transport26ServiceExecutorSynchronous8scheduleESt8functionIFvvEENS0_15ServiceExecutor13ScheduleFlagsENS0_23ServiceExecutorTaskNameE+0x1A2) [0x558881c116a2]
       mongos(_ZN5mongo19ServiceStateMachine22_scheduleNextWithGuardENS0_11ThreadGuardENS_9transport15ServiceExecutor13ScheduleFlagsENS2_23ServiceExecutorTaskNameENS0_9OwnershipE+0x15F) [0x5588817b8d7f]
       mongos(_ZN5mongo19ServiceStateMachine15_sourceCallbackENS_6StatusE+0xAF5) [0x5588817bb2c5]
       mongos(_ZN5mongo19ServiceStateMachine14_sourceMessageENS0_11ThreadGuardE+0x23B) [0x5588817bbbbb]
       mongos(_ZN5mongo19ServiceStateMachine15_runNextInGuardENS0_11ThreadGuardE+0x11D) [0x5588817b9f9d]
       mongos(+0x56F351) [0x5588817bd351]
       mongos(+0x9C3C05) [0x558881c11c05]
       mongos(+0xF7CDF4) [0x5588821cadf4]
       libpthread.so.0(+0x76BA) [0x7fe1c83c26ba]
       libc.so.6(clone+0x6D) [0x7fe1c80f841d]
      -----  END BACKTRACE  -----
      
      

      The bulk write operations fail intermittently. I cannot say for sure, but all the instances I have observed were the ones where the bulk write operation batch spanned across multiple shards.

            Assignee:
            kelsey.schubert@mongodb.com Kelsey Schubert
            Reporter:
            ashu210890 Ashu Pachauri
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: