Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-25161

MongoDB sharding data node crashed when adding new config server

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Critical - P2 Critical - P2
    • None
    • Affects Version/s: 3.2.7
    • None
    • ALL
    • Hide

      1. Setup sharding clusters
      2. Add config server into it's Replica Set, while the new config server is unreachable yet.
      3. Monitor all the data node logs, look for crash.

      Happened only once.

      Show
      1. Setup sharding clusters 2. Add config server into it's Replica Set, while the new config server is unreachable yet. 3. Monitor all the data node logs, look for crash. Happened only once.

      From source code and backtrace, it mostly like that one thread (such as server discovery thread) added/removing server list elements, while ConnectionString::forReplicaSet was iterating that list, without interlock.

      2016-07-19T21:41:42.689+0000 W NETWORK  [ReplicaSetMonitorWatcher] Failed to connect to ###.###.###.###:27017, reason: errno:111 Connection refused
      2016-07-19T21:41:42.691+0000 W NETWORK  [ReplicaSetMonitorWatcher] Failed to connect to ###.###.###.###:27017, reason: errno:111 Connection refused
      2016-07-19T21:41:52.739+0000 F -        [ReplicaSetMonitorWatcher] Invalid access at address: 0x2116000
      2016-07-19T21:41:52.752+0000 F -        [ReplicaSetMonitorWatcher] Got signal: 11 (Segmentation fault).
      
       0x1319fa2 0x13190f9 0x1319478 0x7fc43dad1340 0x7fc43d7940c7 0x137d5de 0x1b3de8d 0xa095d8 0xa0967e 0x11e517e 0xa0b2ad 0xa0cc88 0xa0ce11 0xa0d035 0xa5fbe4 0xa60af1 0x12a8200 0x1b34160 0x7fc43dac9182 0x7fc43d7f647d
      ----- BEGIN BACKTRACE -----
      {"backtrace":[{"b":"400000","o":"F19FA2","s":"_ZN5mongo15printStackTraceERSo"},{"b":"400000","o":"F190F9"},{"b":"400000","o":"F19478"},{"b":"7FC43DAC1000","o":"10340"},{"b":"7FC43D6FC000","o":"980C7"},{"b":"400000","o":"F7D5DE","s":"_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag"},{"b":"400000","o":"173DE8D","s":"_ZNSsC1EPKcmRKSaIcE"},{"b":"400000","o":"6095D8","s":"_ZN5mongo16ConnectionStringC1ENS_10StringDataESt6vectorINS_11HostAndPortESaIS3_EE"},{"b":"400000","o":"60967E","s":"_ZN5mongo16ConnectionString13forReplicaSetENS_10StringDataESt6vectorINS_11HostAndPortESaIS3_EE"},{"b":"400000","o":"DE517E","s":"_ZN5mongo22ShardingConnectionHook8onCreateEPNS_12DBClientBaseE"},{"b":"400000","o":"60B2AD","s":"_ZN5mongo16DBConnectionPool8onCreateEPNS_12DBClientBaseE"},{"b":"400000","o":"60CC88","s":"_ZN5mongo16DBConnectionPool13_finishCreateERKSsdPNS_12DBClientBaseE"},{"b":"400000","o":"60CE11","s":"_ZN5mongo16DBConnectionPool3getERKNS_16ConnectionStringEd"},{"b":"400000","o":"60D035","s":"_ZN5mongo18ScopedDbConnectionC1ERKNS_16ConnectionStringEd"},{"b":"400000","o":"65FBE4","s":"_ZN5mongo17ReplicaSetMonitor9Refresher20_refreshUntilMatchesEPKNS_21ReadPreferenceSettingE"},{"b":"400000","o":"660AF1"},{"b":"400000","o":"EA8200","s":"_ZN5mongo13BackgroundJob7jobBodyEv"},{"b":"400000","o":"1734160","s":"execute_native_thread_routine"},{"b":"7FC43DAC1000","o":"8182"},{"b":"7FC43D6FC000","o":"FA47D","s":"clone"}],"processInfo":{ "mongodbVersion" : "3.2.7", "gitVersion" : "4249c1d2b5999ebbf1fdf3bc0e0e3b3ff5c0aaf2", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "3.13.0-79-generic", "version" : "#123-Ubuntu SMP Fri Feb 19 14:27:58 UTC 2016", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "A8DC02B241532EEAD31FFE2633EE8C8E565C1F1A" }, { "b" : "7FFED88E7000", "elfType" : 3, "buildId" : "E37313376D77AA284BA26F3EFE65368125B2020B" }, { "b" : "7FC43E9E3000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "E21720F2804EF30440F2B39CD409252C26F58F73" }, { "b" : "7FC43E607000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "9BC22F9457E3D7E9CF8DDC135C0DAC8F7742135D" }, { "b" : "7FC43E3FF000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "B376100CAB1EAC4E5DE066EACFC282BF7C0B54F3" }, { "b" : "7FC43E1FB000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "67699FFDA9FD2A552032E0652A242E82D65AA10D" }, { "b" : "7FC43DEF5000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "EF3F6DFFA1FBE48436EC6F45CD3AABA157064BB4" }, { "b" : "7FC43DCDF000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "36311B4457710AE5578C4BF00791DED7359DBB92" }, { "b" : "7FC43DAC1000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "AF06068681750736E0524DF17D5A86CB2C3F765C" }, { "b" : "7FC43D6FC000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "5382058B69031CAA9B9996C11061CD164C9398FF" }, { "b" : "7FC43EC42000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "2A816C3EBBA4E12813FBD34B06FBD25BC892A67F" } ] }}
       mongod(_ZN5mongo15printStackTraceERSo+0x32) [0x1319fa2]
       mongod(+0xF190F9) [0x13190f9]
       mongod(+0xF19478) [0x1319478]
       libpthread.so.0(+0x10340) [0x7fc43dad1340]
       libc.so.6(+0x980C7) [0x7fc43d7940c7]
       mongod(_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag+0x7E) [0x137d5de]
       mongod(_ZNSsC1EPKcmRKSaIcE+0x1D) [0x1b3de8d]
       mongod(_ZN5mongo16ConnectionStringC1ENS_10StringDataESt6vectorINS_11HostAndPortESaIS3_EE+0x78) [0xa095d8]
       mongod(_ZN5mongo16ConnectionString13forReplicaSetENS_10StringDataESt6vectorINS_11HostAndPortESaIS3_EE+0x4E) [0xa0967e]
       mongod(_ZN5mongo22ShardingConnectionHook8onCreateEPNS_12DBClientBaseE+0xCBE) [0x11e517e]
       mongod(_ZN5mongo16DBConnectionPool8onCreateEPNS_12DBClientBaseE+0x2D) [0xa0b2ad]
       mongod(_ZN5mongo16DBConnectionPool13_finishCreateERKSsdPNS_12DBClientBaseE+0x128) [0xa0cc88]
       mongod(_ZN5mongo16DBConnectionPool3getERKNS_16ConnectionStringEd+0x91) [0xa0ce11]
       mongod(_ZN5mongo18ScopedDbConnectionC1ERKNS_16ConnectionStringEd+0x65) [0xa0d035]
       mongod(_ZN5mongo17ReplicaSetMonitor9Refresher20_refreshUntilMatchesEPKNS_21ReadPreferenceSettingE+0x194) [0xa5fbe4]
       mongod(+0x660AF1) [0xa60af1]
       mongod(_ZN5mongo13BackgroundJob7jobBodyEv+0x160) [0x12a8200]
       mongod(execute_native_thread_routine+0x20) [0x1b34160]
       libpthread.so.0(+0x8182) [0x7fc43dac9182]
       libc.so.6(clone+0x6D) [0x7fc43d7f647d]
      -----  END BACKTRACE  -----
      

            Assignee:
            Unassigned Unassigned
            Reporter:
            e7988c Bin Bai
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: