Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-24874

Mongod Crash on flakey network

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.2.7
    • Component/s: Replication, WiredTiger
    • Labels:
      None
    • Environment:
      Centos 7.2. MongoDB 3.2.7
    • ALL
    • Hide

      We have not seen this issue before. In fact we have never really had a mongod crash. The only thing we do see is there was a lot of network flapping or loss at the time of this crash.

      Show
      We have not seen this issue before. In fact we have never really had a mongod crash. The only thing we do see is there was a lot of network flapping or loss at the time of this crash.

      We have a 3-node replica set. 2 nodes are in one data center (chi3 and chi4). And 1 node is in another (can1). The mongod on chi4 crashed with the below stack trace and log.

      2016-07-01T04:18:39.826-0500 I NETWORK  [conn158753] end connection 167.114.100.50:43176 (464 connections now open)
      2016-07-01T04:18:42.655-0500 I REPL     [ReplicationExecutor] Error in heartbeat request to can1:27017; ExceededTimeLimit: Couldn't get a connection within the time limit
      2016-07-01T04:18:52.690-0500 I -        [NetworkInterfaceASIO-Replication-0] Invariant failure _connection.is_initialized() src/mongo/executor/network_interface_asio_operation.cpp 142
      2016-07-01T04:18:52.690-0500 I -        [NetworkInterfaceASIO-Replication-0]
      
      ***aborting after invariant() failure
      
      
      2016-07-01T04:18:52.811-0500 F -        [NetworkInterfaceASIO-Replication-0] Got signal: 6 (Aborted).
      
       0x131a0d2 0x1319229 0x1319a32 0x7f06a1339100 0x7f06a0f9d5f7 0x7f06a0f9ece8 0x12a393b 0x10e779d 0x10c6773 0x10c776c 0x10c7cd7 0x10c8409 0x1335de1 0x1336001 0x133a19f 0x10d37e5 0x1b34290 0x7f06a1331dc5 0x7f06a105eced
      ----- BEGIN BACKTRACE -----
      {"backtrace":[{"b":"400000","o":"F1A0D2","s":"_ZN5mongo15printStackTraceERSo"},{"b":"400000","o":"F19229"},{"b":"400000","o":"F19A32"},{"b":"7F06A132A000","o":"F100"},{"b":"7F06A0F68000","o":"355F7","s":"gsignal"},{"b":"7F06A0F68000","o":"36CE8","s":"abort"},{"b":"400000","o":"EA393B","s":"_ZN5mongo15invariantFailedEPKcS1_j"},{"b":"400000","o":"CE779D"},{"b":"400000","o":"CC6773"},{"b":"400000","o":"CC776C"},{"b":"400000","o":"CC7CD7"},{"b":"400000","o":"CC8409"},{"b":"400000","o":"F35DE1","s":"_ZN4asio6detail9scheduler10do_run_oneERNS0_11scoped_lockINS0_11posix_mutexEEERNS0_21scheduler_thread_infoERKSt10error_code"},{"b":"400000","o":"F36001","s":"_ZN4asio6detail9scheduler3runERSt10error_code"},{"b":"400000","o":"F3A19F","s":"_ZN4asio10io_service3runEv"},{"b":"400000","o":"CD37E5"},{"b":"400000","o":"1734290","s":"execute_native_thread_routine"},{"b":"7F06A132A000","o":"7DC5"},{"b":"7F06A0F68000","o":"F6CED","s":"clone"}],"processInfo":{ "mongodbVersion" : "3.2.7", "gitVersion" : "4249c1d2b5999ebbf1fdf3bc0e0e3b3ff5c0aaf2", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "3.10.0-327.18.2.el7.x86_64", "version" : "#1 SMP Thu May 12 11:03:55 UTC 2016", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "05C2980D41C615E7C1AB7B5330630B8AB5F5B9D0" }, { "b" : "7FFD42485000", "elfType" : 3, "buildId" : "627B075D566CF4BFF68497DAB7DF9B024F8E5A83" }, { "b" : "7F06A2252000", "path" : "/usr/lib64/libssl.so.10", "elfType" : 3, "buildId" : "478D01A08B923A251D755BB421F3EBAF9F2982C1" }, { "b" : "7F06A1E6A000", "path" : "/usr/lib64/libcrypto.so.10", "elfType" : 3, "buildId" : "42AAFD25E9B5F4CE2EFE6309491445B1A92A575D" }, { "b" : "7F06A1C62000", "path" : "/usr/lib64/librt.so.1", "elfType" : 3, "buildId" : "CB0D2C9F29DBD13C47E7D2EEFB94B35835698CCA" }, { "b" : "7F06A1A5E000", "path" : "/usr/lib64/libdl.so.2", "elfType" : 3, "buildId" : "091060A163E7EDA25572F3B1BAF2E8F80209C00E" }, { "b" : "7F06A175C000", "path" : "/usr/lib64/libm.so.6", "elfType" : 3, "buildId" : "F9DF294FB70243549DCB643F1322BB20E70E9FE8" }, { "b" : "7F06A1546000", "path" : "/usr/lib64/libgcc_s.so.1", "elfType" : 3, "buildId" : "6AA1DCC4DE7F1836344949857FC2017278631FFD" }, { "b" : "7F06A132A000", "path" : "/usr/lib64/libpthread.so.0", "elfType" : 3, "buildId" : "723F0AC75EF88E778940AE8A8BC30141D85B116A" }, { "b" : "7F06A0F68000", "path" : "/usr/lib64/libc.so.6", "elfType" : 3, "buildId" : "088D48A9AB5A512D9F75BA3D66B6CF77EB6588F9" }, { "b" : "7F06A24BF000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "09E1BB4D034C7263810A41100647068858A7ECB6" }, { "b" : "7F06A0D1C000", "path" : "/usr/lib64/libgssapi_krb5.so.2", "elfType" : 3, "buildId" : "D46A230FFF4A7B808B3CFC213D31FCAC542FB504" }, { "b" : "7F06A0A37000", "path" : "/usr/lib64/libkrb5.so.3", "elfType" : 3, "buildId" : "6D6136A0E795420B05854DEF13A10C226FE9CCB2" }, { "b" : "7F06A0833000", "path" : "/usr/lib64/libcom_err.so.2", "elfType" : 3, "buildId" : "3A1166709F88740C49E060731832E3FAD2DFB66B" }, { "b" : "7F06A0601000", "path" : "/usr/lib64/libk5crypto.so.3", "elfType" : 3, "buildId" : "AA97A848DD7C9E57B06EC913E10D420AEBBCE027" }, { "b" : "7F06A03EB000", "path" : "/usr/lib64/libz.so.1", "elfType" : 3, "buildId" : "1982C8CDAE90F898D1AD26DC07E807333B4789D0" }, { "b" : "7F06A01DC000", "path" : "/usr/lib64/libkrb5support.so.0", "elfType" : 3, "buildId" : "AEF6C3D3C5152F339942041519A106FC055DAF71" }, { "b" : "7F069FFC5000", "path" : "/usr/lib64/tls/libkeyutils.so.1", "elfType" : 3, "buildId" : "A2F343295160ECFB175883419207826BB467BD69" }, { "b" : "7F069FDAB000", "path" : "/usr/lib64/libresolv.so.2", "elfType" : 3, "buildId" : "D02DC134F38F06F3885231FD2486D5EF4796E5F9" }, { "b" : "7F069FB86000", "path" : "/usr/lib64/libselinux.so.1", "elfType" : 3, "buildId" : "82FF6B18E1E42825CC2D060F969479AD4AF2F62C" }, { "b" : "7F069F925000", "path" : "/usr/lib64/libpcre.so.1", "elfType" : 3, "buildId" : "AE64AA461A26E01F60408013D361749D56DD0AE1" }, { "b" : "7F069F700000", "path" : "/usr/lib64/liblzma.so.5", "elfType" : 3, "buildId" : "98131C9354279ABD39FD80D4BE5B3EC5678BD9E0" } ] }}
       mongod(_ZN5mongo15printStackTraceERSo+0x32) [0x131a0d2]
       mongod(+0xF19229) [0x1319229]
       mongod(+0xF19A32) [0x1319a32]
       libpthread.so.0(+0xF100) [0x7f06a1339100]
       libc.so.6(gsignal+0x37) [0x7f06a0f9d5f7]
       libc.so.6(abort+0x148) [0x7f06a0f9ece8]
       mongod(_ZN5mongo15invariantFailedEPKcS1_j+0xCB) [0x12a393b]
       mongod(+0xCE779D) [0x10e779d]
       mongod(+0xCC6773) [0x10c6773]
       mongod(+0xCC776C) [0x10c776c]
       mongod(+0xCC7CD7) [0x10c7cd7]
       mongod(+0xCC8409) [0x10c8409]
       mongod(_ZN4asio6detail9scheduler10do_run_oneERNS0_11scoped_lockINS0_11posix_mutexEEERNS0_21scheduler_thread_infoERKSt10error_code+0x2F1) [0x1335de1]
       mongod(_ZN4asio6detail9scheduler3runERSt10error_code+0xC1) [0x1336001]
       mongod(_ZN4asio10io_service3runEv+0x2F) [0x133a19f]
       mongod(+0xCD37E5) [0x10d37e5]
       mongod(execute_native_thread_routine+0x20) [0x1b34290]
       libpthread.so.0(+0x7DC5) [0x7f06a1331dc5]
       libc.so.6(clone+0x6D) [0x7f06a105eced]
      -----  END BACKTRACE  -----
      

            Assignee:
            Unassigned Unassigned
            Reporter:
            amit.gupta@sendergen.com Amit Gupta
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: