[SERVER-20835] Out of memory crash during queries Created: 08/Oct/15  Updated: 09/Jan/16  Resolved: 09/Jan/16

Status: Closed
Project: Core Server
Component/s: Admin
Affects Version/s: 3.0.4
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Maxime Santerre Assignee: Kelsey Schubert
Resolution: Incomplete Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Operating System: ALL
Participants:

 Description   

From time to time, Mongodb will crash with an out of memory error.

I'm not quite sure how to reproduce, but I've included as many relevant logs as I can in this gist:

https://gist.github.com/msanterre/d4da445a15e3f905b916

2015-10-08T20:02:12.156+0000 I NETWORK  [conn275701] end connection 10.0.250.179:48299 (355 connections now open)
2015-10-08T20:02:12.158+0000 I NETWORK  [initandlisten] connection accepted from 10.0.250.179:48311 #275716 (356 connections now open)
2015-10-08T20:02:13.582+0000 I NETWORK  [initandlisten] connection accepted from 10.0.0.175:40750 #275717 (357 connections now open)
2015-10-08T20:02:13.631+0000 I NETWORK  [conn275717] end connection 10.0.0.175:40750 (356 connections now open)
2015-10-08T20:02:15.964+0000 I NETWORK  [initandlisten] connection accepted from 10.0.0.176:45914 #275718 (357 connections now open)
2015-10-08T20:02:16.281+0000 F -        [conn275713] out of memory.
 
 0xf5bfc9 0xf5baf9 0x1405c4f 0xa06bed 0xbb149b 0xbb281b 0xbb32e5 0xbd2959 0xbd5837 0xbd5b9e 0xbd4e7d 0xbe0ae6 0xb9f4de 0xba33ce 0x953434 0x9d6234 0x9d71bd 0x9d7ecb 0xb9cac6 0xab2e20 0x80e0ad 0xf0ef0b 0x7f6ea0fc5182 0x7f6e9fa8d47d
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"B5BFC9"},{"b":"400000","o":"B5BAF9"},{"b":"400000","o":"1005C4F"},{"b":"400000","o":"606BED"},{"b":"400000","o":"7B149B"},{"b":"400000","o":"7B281B"},{"b":"400000","o":"7B32E5"},{"b":"400000","o":"7D2959"},{"b":"400000","o":"7D5837"},{"b":"400000","o":"7D5B9E"},{"b":"400000","o":"7D4E7D"},{"b":"400000","o":"7E0AE6"},{"b":"400000","o":"79F4DE"},{"b":"400000","o":"7A33CE"},{"b":"400000","o":"553434"},{"b":"400000","o":"5D6234"},{"b":"400000","o":"5D71BD"},{"b":"400000","o":"5D7ECB"},{"b":"400000","o":"79CAC6"},{"b":"400000","o":"6B2E20"},{"b":"400000","o":"40E0AD"},{"b":"400000","o":"B0EF0B"},{"b":"7F6EA0FBD000","o":"8182"},{"b":"7F6E9F993000","o":"FA47D"}],"processInfo":{ "mongodbVersion" : "3.0.6", "gitVersion" : "1ef45a23a4c5e3480ac919b28afcba3c615488f2", "uname" : { "sysname" : "Linux", "release" : "3.13.0-48-generic", "version" : "#80-Ubuntu SMP Thu Mar 12 11:16:15 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "BF5AC37B50D416FD8D6D427E561426ED60291032" }, { "b" : "7FFF2C0FB000", "elfType" : 3, "buildId" : "341443CC258F9F27709CAA36C8CB321E4BBFFF95" }, { "b" : "7F6EA0FBD000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "9318E8AF0BFBE444731BB0461202EF57F7C39542" }, { "b" : "7F6EA0D5F000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "FF43D0947510134A8A494063A3C1CF3CEBB27791" }, { "b" : "7F6EA0984000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "B927879B878D90DD9FF4B15B00E7799AA8E0272F" }, { "b" : "7F6EA077C000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "92FCF41EFE012D6186E31A59AD05BDBB487769AB" }, { "b" : "7F6EA0578000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "C1AE4CB7195D337A77A3C689051DABAA3980CA0C" }, { "b" : "7F6EA0274000", "path" : "/usr/lib/x86_64-linux-gnu/libstdc++.so.6", "elfType" : 3, "buildId" : "4BF6F7ADD8244AD86008E6BF40D90F8873892197" }, { "b" : "7F6E9FF6E000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "1D76B71E905CB867B27CEF230FCB20F01A3178F5" }, { "b" : "7F6E9FD58000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "8D0AA71411580EE6C08809695C3984769F25725B" }, { "b" : "7F6E9F993000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "30C94DC66A1FE95180C3D68D2B89E576D5AE213C" }, { "b" : "7F6EA11DB000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "9F00581AB3C73E3AEA35995A0C50D24D59A01D47" } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x29) [0xf5bfc9]
 mongod(_ZN5mongo29reportOutOfMemoryErrorAndExitEv+0x49) [0xf5baf9]
 mongod(tc_new+0x1AF) [0x1405c4f]
 mongod(_ZNSt6vectorIN5mongo8IntervalESaIS1_EE19_M_emplace_back_auxIIS1_EEEvDpOT_+0x3D) [0xa06bed]
 mongod(_ZN5mongo18IndexBoundsBuilder17translateEqualityERKNS_11BSONElementEbPNS_19OrderedIntervalListEPNS0_15BoundsTightnessE+0xAB) [0xbb149b]
 mongod(_ZN5mongo18IndexBoundsBuilder9translateEPKNS_15MatchExpressionERKNS_11BSONElementERKNS_10IndexEntryEPNS_19OrderedIntervalListEPNS0_15BoundsTightnessE+0xC8B) [0xbb281b]
 mongod(_ZN5mongo18IndexBoundsBuilder21translateAndIntersectEPKNS_15MatchExpressionERKNS_11BSONElementERKNS_10IndexEntryEPNS_19OrderedIntervalListEPNS0_15BoundsTightnessE+0x45) [0xbb32e5]
 mongod(_ZN5mongo18QueryPlannerAccess17mergeWithLeafNodeEPNS_15MatchExpressionEPNS0_17ScanBuildingStateE+0x269) [0xbd2959]
 mongod(_ZN5mongo18QueryPlannerAccess17processIndexScansERKNS_14CanonicalQueryEPNS_15MatchExpressionEbRKSt6vectorINS_10IndexEntryESaIS7_EERKNS_18QueryPlannerParamsEPS6_IPNS_17QuerySolutionNodeESaISG_EE+0xB7) [0xbd5837]
 mongod(_ZN5mongo18QueryPlannerAccess15buildIndexedAndERKNS_14CanonicalQueryEPNS_15MatchExpressionEbRKSt6vectorINS_10IndexEntryESaIS7_EERKNS_18QueryPlannerParamsE+0x8E) [0xbd5b9e]
 mongod(_ZN5mongo18QueryPlannerAccess22buildIndexedDataAccessERKNS_14CanonicalQueryEPNS_15MatchExpressionEbRKSt6vectorINS_10IndexEntryESaIS7_EERKNS_18QueryPlannerParamsE+0x10D) [0xbd4e7d]
 mongod(_ZN5mongo12QueryPlanner4planERKNS_14CanonicalQueryERKNS_18QueryPlannerParamsEPSt6vectorIPNS_13QuerySolutionESaIS9_EE+0x1EF6) [0xbe0ae6]
 mongod(+0x79F4DE) [0xb9f4de]
 mongod(_ZN5mongo16getExecutorCountEPNS_16OperationContextEPNS_10CollectionERKNS_12CountRequestENS_12PlanExecutor11YieldPolicyEPPS7_+0x2CE) [0xba33ce]
 mongod(_ZN5mongo8CmdCount3runEPNS_16OperationContextERKSsRNS_7BSONObjEiRSsRNS_14BSONObjBuilderEb+0x134) [0x953434]
 mongod(_ZN5mongo12_execCommandEPNS_16OperationContextEPNS_7CommandERKSsRNS_7BSONObjEiRSsRNS_14BSONObjBuilderEb+0x34) [0x9d6234]
 mongod(_ZN5mongo7Command11execCommandEPNS_16OperationContextEPS0_iPKcRNS_7BSONObjERNS_14BSONObjBuilderEb+0xC1D) [0x9d71bd]
 mongod(_ZN5mongo12_runCommandsEPNS_16OperationContextEPKcRNS_7BSONObjERNS_11_BufBuilderINS_16TrivialAllocatorEEERNS_14BSONObjBuilderEbi+0x28B) [0x9d7ecb]
 mongod(_ZN5mongo8runQueryEPNS_16OperationContextERNS_7MessageERNS_12QueryMessageERKNS_15NamespaceStringERNS_5CurOpES3_+0x746) [0xb9cac6]
 mongod(_ZN5mongo16assembleResponseEPNS_16OperationContextERNS_7MessageERNS_10DbResponseERKNS_11HostAndPortE+0xB10) [0xab2e20]
 mongod(_ZN5mongo16MyMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortEPNS_9LastErrorE+0xDD) [0x80e0ad]
 mongod(_ZN5mongo17PortMessageServer17handleIncomingMsgEPv+0x34B) [0xf0ef0b]
 libpthread.so.0(+0x8182) [0x7f6ea0fc5182]
 libc.so.6(clone+0x6D) [0x7f6e9fa8d47d]
-----  END BACKTRACE  -----
 
 
Result of `grep conn275713 mongod.log`
----------------------------------------
2015-09-05T14:22:59.368+0000 I NETWORK  [conn275713] end connection 10.0.250.179:40190 (214 connections now open)
2015-10-08T20:01:52.834+0000 I QUERY    [conn275713] query pandabot_production.newsletters query: { merchant_id: "330024", _id: ObjectId('5616b22d353234002d630200') } planSummary: IXSCAN { _id: 1 } cursorid:36044368289 ntoreturn:0 ntoskip:0 nscanned:1 nscannedObjects:1 keyUpdates:0 writeConflicts:0 numYields:0 nreturned:1 reslen:14124344 locks:{ Global: { acquireCount: { r: 2 } }, MMAPV1Journal: { acquireCount: { r: 1 } }, Database: { acquireCount: { r: 1 } }, Collection: { acquireCount: { R: 1 } } } 104ms
2015-10-08T20:02:16.281+0000 F -        [conn275713] out of memory.
----------------------------------------



 Comments   
Comment by Ramon Fernandez Marina [ 09/Jan/16 ]

m.santerre, we haven't heard back from you for some time, so I'm going to close this ticket for now. I don't see anything unusual in the data you sent us; the connection that triggers the out of memory condition is a scan of the _id index, so I can only think that, given the size of the pandabot_production, this server may need more memory. If this is still an issue for you I'd recommend upgrading to 3.0.8 and letting us know if the problem persists.

Regards,
Ramón.

Comment by Kelsey Schubert [ 07/Dec/15 ]

Thank you, m.santerre.

Can you please provide the following information?

  • The level of memory consumption preceding the crash.
  • The logs preceding the crash, preferably with verbosity 1.

Kind regards,
Thomas

Comment by Maxime Santerre [ 04/Dec/15 ]

Hi Thomas,

I'm using MMAPv1.

output

rs0:PRIMARY> db.serverStatus()["storageEngine"]
{ "name" : "mmapv1" )

Comment by Kelsey Schubert [ 04/Dec/15 ]

Hi m.santerre,

Sorry for the long delay getting back to you.

To continue to examine this issue can you please specify whether you are using MMAPv1 or WiredTiger?

Please note that if you are you using WiredTiger there have been a number of memory consumption bugs which have been fixed in subsequent versions. I would recommend that you consider upgrading to 3.0.7 and if the issue persists we will continue to investigate.

Thank you,
Thomas

Comment by Maxime Santerre [ 28/Oct/15 ]

Hi Ramon,

We have a 3 machine replica-set, no sharding.

We're running on ubuntu 14.04 LTS on AWS (m4.2xlarge)

m4.2xlarge

mongod.conf (no comments)

ubuntu@sequoia2:~$ grep ^[^#] /etc/mongod.conf
dbpath=/data
logpath=/log/mongod.log
logappend=true
bind_ip = 0.0.0.0
replSet=rs0

show dbs

rs0:PRIMARY> show dbs
admin                  0.078GB
local                 48.055GB
pandabot_production  251.831GB

db.stats

rs0:SECONDARY> db.stats()
{
	"db" : "pandabot_production",
	"collections" : 34,
	"objects" : 401804777,
	"avgObjSize" : 356.7545534482284,
	"dataSize" : 143345683792,
	"storageSize" : 154461244528,
	"numExtents" : 347,
	"indexes" : 64,
	"indexSize" : 110308474416,
	"fileSize" : 285409804288,
	"nsSizeMB" : 16,
	"extentFreeList" : {
		"num" : 1068,
		"totalSize" : 1318690816
	},
	"dataFileVersion" : {
		"major" : 4,
		"minor" : 22
	},
	"ok" : 1
}

db.newsletters.stats

{
	"ns" : "pandabot_production.newsletters",
	"count" : 1397,
	"size" : 195639984,
	"avgObjSize" : 140042,
	"numExtents" : 6,
	"storageSize" : 222617600,
	"lastExtentSize" : 82563072,
	"paddingFactor" : 1,
	"paddingFactorNote" : "paddingFactor is unused and unmaintained in 3.0. It remains hard coded to 1.0 for compatibility only.",
	"userFlags" : 1,
	"capped" : false,
	"nindexes" : 2,
	"totalIndexSize" : 122640,
	"indexSizes" : {
		"_id_" : 73584,
		"mid_1" : 49056
	},
	"ok" : 1
}

db.newsletters.getIndexes()

[
	{
		"v" : 1,
		"key" : {
			"_id" : 1
		},
		"name" : "_id_",
		"ns" : "pandabot_production.newsletters"
	},
	{
		"v" : 1,
		"key" : {
			"mid" : 1
		},
		"name" : "mid_1",
		"ns" : "pandabot_production.newsletters"
	}
]

Comment by Ramon Fernandez Marina [ 09/Oct/15 ]

m.santerre, we'll need more details to understand this issue. Please provide the following information

  • Your deployment configuration (stand-alone node, replica set, sharded cluster)
  • Hardware configuration information for your node(s)
  • mongod.conf configuration file(s) and/or startup flags
  • the output of the following commands:

    show dbs
    use pandabot_production
    db.stats()
    db.newsletters.stats()
    db.newsletters.getIndexes()
    

Thanks,
Ramón.

Generated at Thu Feb 08 03:55:26 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.