Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-41326

Initial replica member sync failing due to OOM

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.4.16
    • Component/s: Replication
    • Labels:
      None
    • ALL
      1. Stop the mongod process on the replica
      2. Delete the DB
      3. Start the mongod process

      Hey guys, we needed to do a re-sync of some of our smaller(CPU/memory) replica set members that are used for DB backups. During the process we noticed that on few occasions the Mongo process dies with the following logged error: 

      2019-05-27T09:40:39.863+0000 F -        [InitialSyncInserters-metrilodata.mails_events_by_customer_campa] out of memory. 0x558e91b4d181 0x558e91b4c7b4 0x558e91d0fb5b 0x558e9264b992 0x558e91241d61 0x558e91241df2 0x558e9124901a 0x558e912491cd 0x558e912495f1 0x558e9123b1d2 0x558e90feacf1 0x558e90fb6d05 0x558e914e6fea 0x558e914e6e55 0x558e916375b1 0x558e916377b9 0x558e9163891f 0x558e91ac4aec 0x558e91ac559c 0x558e91ac5f86 0x558e925c9a60 0x7f37532cf6ba 0x7f375300541d
      ----- BEGIN BACKTRACE -----
      {"backtrace":[{"b":"558E905D2000","o":"157B181","s":"_ZN5mongo15printStackTraceERSo"},{"b":"558E905D2000","o":"157A7B4","s":"_ZN5mongo29reportOutOfMemoryErrorAndExitEv"},{"b":"558E905D2000","o":"173DB5B"},{"b":"558E905D2000","o":"2079992","s":"tc_newarray_nothrow"},{"b":"558E905D2000","o":"C6FD61","s":"_ZSt20get_temporary_bufferISt4pairIN5mongo7BSONObjENS1_8RecordIdEEES0_IPT_lEl"},{"b":"558E905D2000","o":"C6FDF2","s":"_ZNSt17_Temporary_bufferISt15_Deque_iteratorISt4pairIN5mongo7BSONObjENS2_8RecordIdEERS5_PS5_ES5_EC1ES8_S8_"},{"b":"558E905D2000","o":"C7701A","s":"_ZN5mongo6sorter13NoLimitSorterINS_7BSONObjENS_8RecordIdENS_27BtreeExternalSortComparisonEE4sortEv"},{"b":"558E905D2000","o":"C771CD","s":"_ZN5mongo6sorter13NoLimitSorterINS_7BSONObjENS_8RecordIdENS_27BtreeExternalSortComparisonEE5spillEv"},{"b":"558E905D2000","o":"C775F1","s":"_ZN5mongo6sorter13NoLimitSorterINS_7BSONObjENS_8RecordIdENS_27BtreeExternalSortComparisonEE3addERKS2_RKS3_"},{"b":"558E905D2000","o":"C691D2","s":"_ZN5mongo17IndexAccessMethod11BulkBuilder6insertEPNS_16OperationContextERKNS_7BSONObjERKNS_8RecordIdERKNS_19InsertDeleteOptionsEPl"},{"b":"558E905D2000","o":"A18CF1","s":"_ZN5mongo15MultiIndexBlock6insertERKNS_7BSONObjERKNS_8RecordIdE"},{"b":"558E905D2000","o":"9E4D05","s":"_ZN5mongo10Collection14insertDocumentEPNS_16OperationContextERKNS_7BSONObjERKSt6vectorIPNS_15MultiIndexBlockESaIS8_EEb"},{"b":"558E905D2000","o":"F14FEA"},{"b":"558E905D2000","o":"F14E55"},{"b":"558E905D2000","o":"10655B1"},{"b":"558E905D2000","o":"10657B9"},{"b":"558E905D2000","o":"106691F","s":"_ZN5mongo4repl10TaskRunner9_runTasksEv"},{"b":"558E905D2000","o":"14F2AEC","s":"_ZN5mongo10ThreadPool10_doOneTaskEPSt11unique_lockISt5mutexE"},{"b":"558E905D2000","o":"14F359C","s":"_ZN5mongo10ThreadPool13_consumeTasksEv"},{"b":"558E905D2000","o":"14F3F86","s":"_ZN5mongo10ThreadPool17_workerThreadBodyEPS0_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},{"b":"558E905D2000","o":"1FF7A60"},{"b":"7F37532C8000","o":"76BA"},{"b":"7F3752EFE000","o":"10741D","s":"clone"}],"processInfo":{ "mongodbVersion" : "3.4.16", "gitVersion" : "0d6a9242c11b99ddadcfb6e86a850b6ba487530a", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "4.15.0-1032-gcp", "version" : "#34~16.04.1-Ubuntu SMP Wed May 8 13:26:26 UTC 2019", "machine" : "x86_64" }, "somap" : [ { "b" : "558E905D2000", "elfType" : 3, "buildId" : "A81F940E9E89093155964B0A2D677A914A37DFD5" }, { "b" : "7FFC9576D000", "elfType" : 3, "buildId" : "66B53182A7BB051F9714B540CC1F984B12D5C551" }, { "b" : "7F3754255000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "FF69EA60EBE05F2DD689D2B26FC85A73E5FBC3A0" }, { "b" : "7F3753E10000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "15FFEB43278726B025F020862BF51302822A40EC" }, { "b" : "7F3753C08000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "89C34D7A182387D76D5CDA1F7718F5D58824DFB3" }, { "b" : "7F3753A04000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "8CC8D0D119B142D839800BFF71FB71E73AEA7BD4" }, { "b" : "7F37536FB000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "DFB85DE42DAFFD09640C8FE377D572DE3E168920" }, { "b" : "7F37534E5000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "68220AE2C65D65C1B6AAA12FA6765A6EC2F5F434" }, { "b" : "7F37532C8000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "CE17E023542265FC11D9BC8F534BB4F070493D30" }, { "b" : "7F3752EFE000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "B5381A457906D279073822A5CEB24C4BFEF94DDB" }, { "b" : "7F37544BE000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "5D7B6259552275A3C17BD4C3FD05F5A6BF40CAA5" } ] }}
       mongod(_ZN5mongo15printStackTraceERSo+0x41) [0x558e91b4d181]
       mongod(_ZN5mongo29reportOutOfMemoryErrorAndExitEv+0x84) [0x558e91b4c7b4]
       mongod(+0x173DB5B) [0x558e91d0fb5b]
       mongod(tc_newarray_nothrow+0x212) [0x558e9264b992]
       mongod(_ZSt20get_temporary_bufferISt4pairIN5mongo7BSONObjENS1_8RecordIdEEES0_IPT_lEl+0x41) [0x558e91241d61]
       mongod(_ZNSt17_Temporary_bufferISt15_Deque_iteratorISt4pairIN5mongo7BSONObjENS2_8RecordIdEERS5_PS5_ES5_EC1ES8_S8_+0x72) [0x558e91241df2]
       mongod(_ZN5mongo6sorter13NoLimitSorterINS_7BSONObjENS_8RecordIdENS_27BtreeExternalSortComparisonEE4sortEv+0xBA) [0x558e9124901a]
       mongod(_ZN5mongo6sorter13NoLimitSorterINS_7BSONObjENS_8RecordIdENS_27BtreeExternalSortComparisonEE5spillEv+0x4D) [0x558e912491cd]
       mongod(_ZN5mongo6sorter13NoLimitSorterINS_7BSONObjENS_8RecordIdENS_27BtreeExternalSortComparisonEE3addERKS2_RKS3_+0xD1) [0x558e912495f1]
       mongod(_ZN5mongo17IndexAccessMethod11BulkBuilder6insertEPNS_16OperationContextERKNS_7BSONObjERKNS_8RecordIdERKNS_19InsertDeleteOptionsEPl+0x2A2) [0x558e9123b1d2]
       mongod(_ZN5mongo15MultiIndexBlock6insertERKNS_7BSONObjERKNS_8RecordIdE+0xF1) [0x558e90feacf1]
       mongod(_ZN5mongo10Collection14insertDocumentEPNS_16OperationContextERKNS_7BSONObjERKSt6vectorIPNS_15MultiIndexBlockESaIS8_EEb+0x1B5) [0x558e90fb6d05]
       mongod(+0xF14FEA) [0x558e914e6fea]
       mongod(+0xF14E55) [0x558e914e6e55]
       mongod(+0x10655B1) [0x558e916375b1]
       mongod(+0x10657B9) [0x558e916377b9]
       mongod(_ZN5mongo4repl10TaskRunner9_runTasksEv+0xAF) [0x558e9163891f]
       mongod(_ZN5mongo10ThreadPool10_doOneTaskEPSt11unique_lockISt5mutexE+0x14C) [0x558e91ac4aec]
       mongod(_ZN5mongo10ThreadPool13_consumeTasksEv+0xBC) [0x558e91ac559c]
       mongod(_ZN5mongo10ThreadPool17_workerThreadBodyEPS0_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x96) [0x558e91ac5f86]
       mongod(+0x1FF7A60) [0x558e925c9a60]
       libpthread.so.0(+0x76BA) [0x7f37532cf6ba]
       libc.so.6(clone+0x6D) [0x7f375300541d]
      -----  END BACKTRACE  -----

      Similar issues

      I noticed this issue: https://jira.mongodb.org/browse/SERVER-28241, but it is related to MongoDB 3.4.1 and we're running 3.4.16

      Setup information

      2019-05-27T08:31:55.653+0000 I CONTROL  [initandlisten] MongoDB starting : pid=23408 port=27017 dbpath=/data/db 64-bit host=monolith-data-mongodbak-rs2
      2019-05-27T08:31:55.653+0000 I CONTROL  [initandlisten] db version v3.4.16
      2019-05-27T08:31:55.653+0000 I CONTROL  [initandlisten] git version: 0d6a9242c11b99ddadcfb6e86a850b6ba487530a
      2019-05-27T08:31:55.654+0000 I CONTROL  [initandlisten] OpenSSL version: OpenSSL 1.0.2g  1 Mar 2016
      2019-05-27T08:31:55.654+0000 I CONTROL  [initandlisten] allocator: tcmalloc
      2019-05-27T08:31:55.654+0000 I CONTROL  [initandlisten] modules: none
      2019-05-27T08:31:55.654+0000 I CONTROL  [initandlisten] build environment:
      2019-05-27T08:31:55.654+0000 I CONTROL  [initandlisten]     distmod: ubuntu1604
      2019-05-27T08:31:55.655+0000 I CONTROL  [initandlisten]     distarch: x86_64
      2019-05-27T08:31:55.655+0000 I CONTROL  [initandlisten]     target_arch: x86_64
      2019-05-27T08:31:55.655+0000 I CONTROL  [initandlisten] options: { config: "/etc/mongod.conf", net: { port: 27017 }, processManagement: { pidFilePath: "/var/run/mongo/mongod.pid" }, replication: { replSet: "rs2" }, sharding: { clusterRole: "shardsvr" }, storage: { dbPath: "/data/db", engine: "wiredTiger", mmapv1: { smallFiles: true }, wiredTiger: { engineConfig: { cacheSizeGB: 1.5 } } }, systemLog: { destination: "file", logAppend: true, logRotate: "reopen", path: "/var/log/mongodb/mongod.log" } }

        1. diagnostic.data.tar
          1.55 MB
        2. primary_diagnostic.data.tar
          29.94 MB
        3. Screen Shot 2019-05-30 at 10.48.37 AM.png
          Screen Shot 2019-05-30 at 10.48.37 AM.png
          505 kB
        4. Screen Shot 2019-06-06 at 2.49.56 PM.png
          Screen Shot 2019-06-06 at 2.49.56 PM.png
          256 kB

            Assignee:
            eric.sedor@mongodb.com Eric Sedor
            Reporter:
            adamof Stefan Adamov
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

              Created:
              Updated:
              Resolved: