Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-73597

Sporadic seg faults days/weeks after mongod server running

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 5.0.14
    • Component/s: None
    • Server Triage
    • ALL
    • Hide

      unable to reproduce.  it happens somewhat randomly (it seems like the servers are pretty busy when it happens though)

      Show
      unable to reproduce.  it happens somewhat randomly (it seems like the servers are pretty busy when it happens though)

      Environment:

      • Ubuntu 20.04 latest patch
      • MongoDB 5.0.14
      • 128 GB ram server

       

      Hi, we have experienced random crashes on 2 different servers. Below is the stack trace from mongod.log. I have the crash files from /var/crash (they are about 16GB each.  I can put them on a private SFTP server for analysis).

       

      Each server is a member of a 3-node replica set (and part of a sharded cluster). I was wondering if anyone has any clues on what is causing the seg fault. Each server has 128GB RAM with 16 threads (Intel Xeon E2288G CPU @ 3.70Ghz).

      At the time of crash, they seem to be pretty busy.

       

       

      crash log #1

      {"t":

      {"$date":"2023-01-30T12:47:32.140-05:00"}

      ,"s":"F", "c":"CONTROL", "id":6384300, "ctx":"initandlisten","msg":"Writing fatal message","attr":{"message":"Invalid access at address: 0x9ae8c\n"}} {"t":

      {"$date":"2023-01-30T12:47:32.140-05:00"}

      ,"s":"F", "c":"CONTROL", "id":6384300, "ctx":"initandlisten","msg":"Writing fatal message","attr":{"message":"Got signal: 11 (Segmentation fault).\n"}} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31380, "ctx":"initandlisten","msg":"BACKTRACE","attr":{"bt":{"backtrace":[

      {"a":"55D2950F40A5","b":"55D2911E4000","o":"3F100A5","s":"_ZN5mongo18stack_trace_detail12_GLOBAL__N_119printStackTraceImplERKNS1_7OptionsEPNS_14StackTraceSinkE.constprop.361","s+":"215"}

      ,

      {"a":"55D2950F6B29","b":"55D2911E4000","o":"3F12B29","s":"_ZN5mongo15printStackTraceEv","s+":"29"}

      ,

      {"a":"55D2950EF09C","b":"55D2911E4000","o":"3F0B09C","s":"abruptQuitWithAddrSignal","s+":"EC"}

      ,

      {"a":"7F8A6A9F8420","b":"7F8A6A9E4000","o":"14420","s":"funlockfile","s+":"60"}

      ,

      {"a":"7F8A6A9F3376","b":"7F8A6A9E4000","o":"F376","s":"pthread_cond_wait","s+":"216"}

      ,

      {"a":"55D29529B76C","b":"55D2911E4000","o":"40B776C","s":"_ZNSt18condition_variable4waitERSt11unique_lockISt5mutexE","s+":"C"}

      ,

      {"a":"55D2950EA987","b":"55D2911E4000","o":"3F06987","s":"_ZN5mongo15waitForShutdownEv","s+":"107"}

      ,

      {"a":"55D29274CB91","b":"55D2911E4000","o":"1568B91","s":"_ZN5mongo12_GLOBAL__N_114_initAndListenEPNS_14ServiceContextEi.isra.1929","s+":"13E1"}

      ,

      {"a":"55D29274E5AF","b":"55D2911E4000","o":"156A5AF","s":"_ZN5mongo11mongod_mainEiPPc","s+":"CDF"}

      ,

      {"a":"55D2925E2F2E","b":"55D2911E4000","o":"13FEF2E","s":"main","s+":"E"}

      ,

      {"a":"7F8A6A816083","b":"7F8A6A7F2000","o":"24083","s":"__libc_start_main","s+":"F3"}

      ,

      {"a":"55D2927489DE","b":"55D2911E4000","o":"15649DE","s":"_start","s+":"2E"}

      ],"processInfo":{"mongodbVersion":"5.0.14","gitVersion":"1b3b0073a0b436a8a502b612f24fb2bd572772e5","compiledModules":[],"uname":

      {"sysname":"Linux","release":"5.4.0-137-generic","version":"#154-Ubuntu SMP Thu Jan 5 17:03:22 UTC 2023","machine":"x86_64"}

      ,"somap":[

      {"b":"55D2911E4000","elfType":3,"buildId":"44AD2830EB7E90ABFF5F592CAAA6392F81AEC690"}

      ,

      {"b":"7F8A6A9E4000","path":"/lib/x86_64-linux-gnu/libpthread.so.0","elfType":3,"buildId":"7B4536F41CDAA5888408E82D0836E33DCF436466"}

      ,

      {"b":"7F8A6A7F2000","path":"/lib/x86_64-linux-gnu/libc.so.6","elfType":3,"buildId":"1878E6B475720C7C51969E69AB2D276FAE6D1DEE"}

      ]}}}} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D2950F40A5","b":"55D2911E4000","o":"3F100A5","s":"_ZN5mongo18stack_trace_detail12_GLOBAL__N_119printStackTraceImplERKNS1_7OptionsEPNS_14StackTraceSinkE.constprop.361","s+":"215"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D2950F6B29","b":"55D2911E4000","o":"3F12B29","s":"_ZN5mongo15printStackTraceEv","s+":"29"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D2950EF09C","b":"55D2911E4000","o":"3F0B09C","s":"abruptQuitWithAddrSignal","s+":"EC"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"7F8A6A9F8420","b":"7F8A6A9E4000","o":"14420","s":"funlockfile","s+":"60"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"7F8A6A9F3376","b":"7F8A6A9E4000","o":"F376","s":"pthread_cond_wait","s+":"216"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D29529B76C","b":"55D2911E4000","o":"40B776C","s":"_ZNSt18condition_variable4waitERSt11unique_lockISt5mutexE","s+":"C"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D2950EA987","b":"55D2911E4000","o":"3F06987","s":"_ZN5mongo15waitForShutdownEv","s+":"107"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D29274CB91","b":"55D2911E4000","o":"1568B91","s":"_ZN5mongo12_GLOBAL__N_114_initAndListenEPNS_14ServiceContextEi.isra.1929","s+":"13E1"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D29274E5AF","b":"55D2911E4000","o":"156A5AF","s":"_ZN5mongo11mongod_mainEiPPc","s+":"CDF"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D2925E2F2E","b":"55D2911E4000","o":"13FEF2E","s":"main","s+":"E"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"7F8A6A816083","b":"7F8A6A7F2000","o":"24083","s":"__libc_start_main","s+":"F3"}

      }} {"t":

      {"$date":"2023-01-30T12:47:32.226-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"55D2927489DE","b":"55D2911E4000","o":"15649DE","s":"_start","s+":"2E"}

      }}

      crash log #2

      {"t":

      {"$date":"2023-02-01T11:55:24.317-05:00"}

      ,"s":"F", "c":"CONTROL", "id":6384300, "ctx":"initandlisten","msg":"Writing fatal message","attr":{"message":"Invalid access at address: 0x3dfff\n"}} {"t":

      {"$date":"2023-02-01T11:55:24.317-05:00"}

      ,"s":"F", "c":"CONTROL", "id":6384300, "ctx":"initandlisten","msg":"Writing fatal message","attr":{"message":"Got signal: 11 (Segmentation fault).\n"}} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31380, "ctx":"initandlisten","msg":"BACKTRACE","attr":{"bt":{"backtrace":[

      {"a":"56098E17A0A5","b":"56098A26A000","o":"3F100A5","s":"_ZN5mongo18stack_trace_detail12_GLOBAL__N_119printStackTraceImplERKNS1_7OptionsEPNS_14StackTraceSinkE.constprop.361","s+":"215"}

      ,

      {"a":"56098E17CB29","b":"56098A26A000","o":"3F12B29","s":"_ZN5mongo15printStackTraceEv","s+":"29"}

      ,

      {"a":"56098E17509C","b":"56098A26A000","o":"3F0B09C","s":"abruptQuitWithAddrSignal","s+":"EC"}

      ,

      {"a":"7FD7CF902420","b":"7FD7CF8EE000","o":"14420","s":"funlockfile","s+":"60"}

      ,

      {"a":"7FD7CF8FD376","b":"7FD7CF8EE000","o":"F376","s":"pthread_cond_wait","s+":"216"}

      ,

      {"a":"56098E32176C","b":"56098A26A000","o":"40B776C","s":"_ZNSt18condition_variable4waitERSt11unique_lockISt5mutexE","s+":"C"}

      ,

      {"a":"56098E170987","b":"56098A26A000","o":"3F06987","s":"_ZN5mongo15waitForShutdownEv","s+":"107"}

      ,

      {"a":"56098B7D2B91","b":"56098A26A000","o":"1568B91","s":"_ZN5mongo12_GLOBAL__N_114_initAndListenEPNS_14ServiceContextEi.isra.1929","s+":"13E1"}

      ,

      {"a":"56098B7D45AF","b":"56098A26A000","o":"156A5AF","s":"_ZN5mongo11mongod_mainEiPPc","s+":"CDF"}

      ,

      {"a":"56098B668F2E","b":"56098A26A000","o":"13FEF2E","s":"main","s+":"E"}

      ,

      {"a":"7FD7CF720083","b":"7FD7CF6FC000","o":"24083","s":"__libc_start_main","s+":"F3"}

      ,

      {"a":"56098B7CE9DE","b":"56098A26A000","o":"15649DE","s":"_start","s+":"2E"}

      ],"processInfo":{"mongodbVersion":"5.0.14","gitVersion":"1b3b0073a0b436a8a502b612f24fb2bd572772e5","compiledModules":[],"uname":

      {"sysname":"Linux","release":"5.4.0-137-generic","version":"#154-Ubuntu SMP Thu Jan 5 17:03:22 UTC 2023","machine":"x86_64"}

      ,"somap":[

      {"b":"56098A26A000","elfType":3,"buildId":"44AD2830EB7E90ABFF5F592CAAA6392F81AEC690"}

      ,

      {"b":"7FD7CF8EE000","path":"/lib/x86_64-linux-gnu/libpthread.so.0","elfType":3,"buildId":"7B4536F41CDAA5888408E82D0836E33DCF436466"}

      ,

      {"b":"7FD7CF6FC000","path":"/lib/x86_64-linux-gnu/libc.so.6","elfType":3,"buildId":"1878E6B475720C7C51969E69AB2D276FAE6D1DEE"}

      ]}}}} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098E17A0A5","b":"56098A26A000","o":"3F100A5","s":"_ZN5mongo18stack_trace_detail12_GLOBAL__N_119printStackTraceImplERKNS1_7OptionsEPNS_14StackTraceSinkE.constprop.361","s+":"215"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098E17CB29","b":"56098A26A000","o":"3F12B29","s":"_ZN5mongo15printStackTraceEv","s+":"29"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098E17509C","b":"56098A26A000","o":"3F0B09C","s":"abruptQuitWithAddrSignal","s+":"EC"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"7FD7CF902420","b":"7FD7CF8EE000","o":"14420","s":"funlockfile","s+":"60"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"7FD7CF8FD376","b":"7FD7CF8EE000","o":"F376","s":"pthread_cond_wait","s+":"216"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098E32176C","b":"56098A26A000","o":"40B776C","s":"_ZNSt18condition_variable4waitERSt11unique_lockISt5mutexE","s+":"C"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098E170987","b":"56098A26A000","o":"3F06987","s":"_ZN5mongo15waitForShutdownEv","s+":"107"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098B7D2B91","b":"56098A26A000","o":"1568B91","s":"_ZN5mongo12_GLOBAL__N_114_initAndListenEPNS_14ServiceContextEi.isra.1929","s+":"13E1"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098B7D45AF","b":"56098A26A000","o":"156A5AF","s":"_ZN5mongo11mongod_mainEiPPc","s+":"CDF"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098B668F2E","b":"56098A26A000","o":"13FEF2E","s":"main","s+":"E"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"7FD7CF720083","b":"7FD7CF6FC000","o":"24083","s":"__libc_start_main","s+":"F3"}

      }} {"t":

      {"$date":"2023-02-01T11:55:24.398-05:00"}

      ,"s":"I", "c":"CONTROL", "id":31445, "ctx":"initandlisten","msg":"Frame","attr":{"frame":

      {"a":"56098B7CE9DE","b":"56098A26A000","o":"15649DE","s":"_start","s+":"2E"}

      }}

            Assignee:
            yuan.fang@mongodb.com Yuan Fang
            Reporter:
            amit.gupta@opensense.com Amit Gupta
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

              Created:
              Updated:
              Resolved: