[SERVER-19230] WT seg fault on pure read work load Created: 30/Jun/15  Updated: 04/Aug/15  Resolved: 06/Jul/15

Status: Closed
Project: Core Server
Component/s: WiredTiger
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Charlie Page Assignee: Alexander Gorrod
Resolution: Cannot Reproduce Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Attachments: File cut.log.tgz     Text File repair.txt    
Issue Links:
Related
related to SERVER-12259 startup option to provide core dumps Closed
Backwards Compatibility: Fully Compatible
Operating System: ALL
Participants:

 Description   

0xf51af9 0xf513c2 0xf51776 0x7fc34d557650 0x7fc34d5575d7 0x7fc34d558cc8 0x7fc34d550546 0x7fc34d5505f2 0x10463e9 0x1046440 0x104423e 0x12ced25 0x12f04f8 0x12f4b95 0x12f1e93 0x130c489 0x12e4700 0x1323173 0xd690c9 0x913390 0xa0524b 0xbd2824 0xbd2bd4 0xb9f02a 0xab28de 0xab7c8d 0x80e96d 0xf04c2b 0x7fc34eb69df5 0x7fc34d6181ad
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"B51AF9"},{"b":"400000","o":"B513C2"},{"b":"400000","o":"B51776"},{"b":"7FC34D522000","o":"35650"},{"b":"7FC34D522000","o":"355D7"},{"b":"7FC34D522000","o":"36CC8"},{"b":"7FC34D522000","o":"2E546"},{"b":"7FC34D522000","o":"2E5F2"},{"b":"400000","o":"C463E9"},{"b":"400000","o":"C46440"},{"b":"400000","o":"C4423E"},{"b":"400000","o":"ECED25"},{"b":"400000","o":"EF04F8"},{"b":"400000","o":"EF4B95"},{"b":"400000","o":"EF1E93"},{"b":"400000","o":"F0C489"},{"b":"400000","o":"EE4700"},{"b":"400000","o":"F23173"},{"b":"400000","o":"9690C9"},{"b":"400000","o":"513390"},{"b":"400000","o":"60524B"},{"b":"400000","o":"7D2824"},{"b":"400000","o":"7D2BD4"},{"b":"400000","o":"79F02A"},{"b":"400000","o":"6B28DE"},{"b":"400000","o":"6B7C8D"},{"b":"400000","o":"40E96D"},{"b":"400000","o":"B04C2B"},{"b":"7FC34EB62000","o":"7DF5"},{"b":"7FC34D522000","o":"F61AD"}],"processInfo":{ "mongodbVersion" : "3.0.3", "gitVersion" : "b40106b36eecd1b4407eb1ad1af6bc60593c6105", "uname" : { "sysname" : "Linux", "release" : "3.10.0-229.1.2.el7.x86_64", "version" : "#1 SMP Fri Mar 27 03:04:26 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "4A59CC17954D13BF1713A509D071A50E6BD1B3FF" }, { "b" : "7FFF3D9F9000", "elfType" : 3, "buildId" : "64DE62EAA6D0191EAD9358297D64406988D7ED66" }, { "b" : "7FC34EB62000", "path" : "/lib64/libpthread.so.0", "elfType" : 3, "buildId" : "12F30315D4F4A2FE58B1977405C8B5515861E66B" }, { "b" : "7FC34E8F5000", "path" : "/lib64/libssl.so.10", "elfType" : 3, "buildId" : "B54FE20525AE27B81127E04A2B006FD758E42E55" }, { "b" : "7FC34E50E000", "path" : "/lib64/libcrypto.so.10", "elfType" : 3, "buildId" : "D3ED02D380B3CDCF52EC6E23DD35CDF03B6E046A" }, { "b" : "7FC34E306000", "path" : "/lib64/librt.so.1", "elfType" : 3, "buildId" : "7376A07360DC57189A8F92B20AA4AA1CAEA80551" }, { "b" : "7FC34E102000", "path" : "/lib64/libdl.so.2", "elfType" : 3, "buildId" : "4DFEE4EA9AE8FDD4C71BD4CCC0727222F19DF810" }, { "b" : "7FC34DDFB000", "path" : "/lib64/libstdc++.so.6", "elfType" : 3, "buildId" : "405EACD649720B8668FFBBA197CBF030A7EF6296" }, { "b" : "7FC34DAF9000", "path" : "/lib64/libm.so.6", "elfType" : 3, "buildId" : "A1AA62B29765BE03A36BF927B047EEEF8696EEC6" }, { "b" : "7FC34D8E3000", "path" : "/lib64/libgcc_s.so.1", "elfType" : 3, "buildId" : "5D3D7256AE68BCFF41E312A24825ED80ECA88A73" }, { "b" : "7FC34D522000", "path" : "/lib64/libc.so.6", "elfType" : 3, "buildId" : "C31FFE7942BFD77B2FCA8F9BD5709D387A86D3BC" }, { "b" : "7FC34ED7E000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "9866E1D2BA61EBB4CE4F009FACDAACC24EF3B804" }, { "b" : "7FC34D2D6000", "path" : "/lib64/libgssapi_krb5.so.2", "elfType" : 3, "buildId" : "34672D541C8C9C5C1C25CB4F3F332CC9D3E604AD" }, { "b" : "7FC34CFF3000", "path" : "/lib64/libkrb5.so.3", "elfType" : 3, "buildId" : "45CB7F6CD322F5B55FF8B635F7EC1578631CCAEA" }, { "b" : "7FC34CDEF000", "path" : "/lib64/libcom_err.so.2", "elfType" : 3, "buildId" : "3A1166709F88740C49E060731832E3FAD2DFB66B" }, { "b" : "7FC34CBBD000", "path" : "/lib64/libk5crypto.so.3", "elfType" : 3, "buildId" : "23A2D854538903E2B84EF0882046DD95522C8B59" }, { "b" : "7FC34C9A7000", "path" : "/lib64/libz.so.1", "elfType" : 3, "buildId" : "E45643F27F3B3E960F3691AFC6EC27A98EF7B46B" }, { "b" : "7FC34C798000", "path" : "/lib64/libkrb5support.so.0", "elfType" : 3, "buildId" : "F4A3D5E7E23F871751CA8F250421F8CF83447AD2" }, { "b" : "7FC34C594000", "path" : "/lib64/libkeyutils.so.1", "elfType" : 3, "buildId" : "2E01D5AC08C1280D013AAB96B292AC58BC30A263" }, { "b" : "7FC34C37A000", "path" : "/lib64/libresolv.so.2", "elfType" : 3, "buildId" : "AC596E865AF0D14B10F7B707F47D2031AD6D68DC" }, { "b" : "7FC34C155000", "path" : "/lib64/libselinux.so.1", "elfType" : 3, "buildId" : "82FF6B18E1E42825CC2D060F969479AD4AF2F62C" }, { "b" : "7FC34BEF4000", "path" : "/lib64/libpcre.so.1", "elfType" : 3, "buildId" : "298B19C64B19995F2AA4DA7B852E90BA5302F630" }, { "b" : "7FC34BCCF000", "path" : "/lib64/liblzma.so.5", "elfType" : 3, "buildId" : "218D03D1F6CF1A099A4D467B5E8ECF4F2BF45750" } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x29) [0xf51af9]
 mongod(+0xB513C2) [0xf513c2]
 mongod(+0xB51776) [0xf51776]
 libc.so.6(+0x35650) [0x7fc34d557650]
 libc.so.6(gsignal+0x37) [0x7fc34d5575d7]
 libc.so.6(abort+0x148) [0x7fc34d558cc8]
 libc.so.6(+0x2E546) [0x7fc34d550546]
 libc.so.6(+0x2E5F2) [0x7fc34d5505f2]
 mongod(_ZN6snappy13RawUncompressEPNS_6SourceEPc+0x5E9) [0x10463e9]
 mongod(_ZN6snappy13RawUncompressEPKcmPc+0x30) [0x1046440]
 mongod(snappy_uncompress+0x5E) [0x104423e]
 mongod(+0xECED25) [0x12ced25]
 mongod(__wt_bt_read+0x3E8) [0x12f04f8]
 mongod(__wt_cache_read+0x1C5) [0x12f4b95]
 mongod(__wt_page_in_func+0x403) [0x12f1e93]
 mongod(__wt_row_search+0xA59) [0x130c489]
 mongod(__wt_btcur_search+0x670) [0x12e4700]
 mongod(+0xF23173) [0x1323173]
 mongod(_ZNK5mongo21WiredTigerRecordStore7dataForEPNS_16OperationContextERKNS_8RecordIdE+0x69) [0xd690c9]
 mongod(_ZNK5mongo10Collection6docForEPNS_16OperationContextERKNS_8RecordIdE+0x20) [0x913390]
 mongod(_ZN5mongo10FetchStage4workEPm+0x2CB) [0xa0524b]
 mongod(_ZN5mongo12PlanExecutor18getNextSnapshottedEPNS_11SnapshottedINS_7BSONObjEEEPNS_8RecordIdE+0xA4) [0xbd2824]
 mongod(_ZN5mongo12PlanExecutor7getNextEPNS_7BSONObjEPNS_8RecordIdE+0x34) [0xbd2bd4]
 mongod(_ZN5mongo7getMoreEPNS_16OperationContextEPKcixRNS_5CurOpEiRbPb+0x49A) [0xb9f02a]
 mongod(_ZN5mongo15receivedGetMoreEPNS_16OperationContextERNS_10DbResponseERNS_7MessageERNS_5CurOpE+0x35E) [0xab28de]
 mongod(_ZN5mongo16assembleResponseEPNS_16OperationContextERNS_7MessageERNS_10DbResponseERKNS_11HostAndPortE+0x13CD) [0xab7c8d]
 mongod(_ZN5mongo16MyMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortEPNS_9LastErrorE+0xDD) [0x80e96d]
 mongod(_ZN5mongo17PortMessageServer17handleIncomingMsgEPv+0x34B) [0xf04c2b]
 libpthread.so.0(+0x7DF5) [0x7fc34eb69df5]
 libc.so.6(clone+0x6D) [0x7fc34d6181ad]
-----  END BACKTRACE  -----



 Comments   
Comment by Charlie Page [ 03/Jul/15 ]

OK, thanks for the update, I've removed the data.

Comment by Alexander Gorrod [ 03/Jul/15 ]

charlie.page@10gen.com I can't think of any other information we can get from the databases. Feel free to blow them away and restart other workloads.

It's frustrating because the crash came out of Snappy - and we don't expect snappy to crash. We couldn't uncover any corruption in the database, so without a way to replay the crashing read there isn't much we can discover.

We will review the WiredTiger wrapper around Snappy to ensure that we never call it with invalid options. Apart from that I'm stumped on the root cause.

Comment by Alexander Gorrod [ 03/Jul/15 ]

What we are wondering is whether you can reproduce the crash via mongod. We have verified the content of the underlying database files - a corrupted block in one of those pages was the obvious cause for the crash you reported. So we are looking for another way to track down the problem.

One other thing I noticed was that the database files in that directory are owned by a combination of root and mongod. Is it possible that the problem was a symptom of a file permissions issue?

Comment by Michael Cahill (Inactive) [ 02/Jul/15 ]

charlie.page@10gen.com I have completed low-level verification and the only issue I have seen is this one:

[1435875972:356588][1031676:0x7f9cd0bca740], file:collection/2-2274131897907400228.wt, session.verify: checkpoint ranges never verified: 3
[1435875980:765484][1031676:0x7f9cd0bca740], file:collection/2-2274131897907400228.wt, session.verify: file ranges never verified: 3

That is basically benign and does not explain the segfault.

alexander.gorrod is going to follow up, but what I'm looking for now is a way to catch this again so that we can see what's going on. Can you tell from the query that was running when the crash happened where it was up to? In other words, could you construct a MongoDB query that should repeat the query starting from near where it failed?

Comment by Michael Cahill (Inactive) [ 02/Jul/15 ]

I've run a verify on the failing table overnight and that succeeded without either crashing or producing any errors.

I'll try the other collections in case I am missing something, but if there is a page image that causes snappy_uncompress to segfault, I would have expected this to find it.

Comment by Michael Cahill (Inactive) [ 02/Jul/15 ]

As I said, I've never seen snappy segfault during uncompress before. We will need to get to the bottom of that to figure out what is going wrong. A bad read does indeed cause an error message and the read operation fails.

Comment by Charlie Page [ 01/Jul/15 ]

Yes, I'll follow up in an email with the login credentials.

Should a bad read bring down the server? It seems reporting it into the log is a better alternative.

Comment by Charlie Page [ 30/Jun/15 ]

xfs_repair output (repair.txt), I believe it's clean, but it's the first time I've had to run it.

Comment by Ramon Fernandez Marina [ 30/Jun/15 ]

michael.cahill, can you please look at the stack trace for clues? If we need more information to track this one down maybe bruce.lucas@10gen.com can work with Charlie to find the root cause of this issue.

Thanks,
Ramón.

Comment by Charlie Page [ 30/Jun/15 ]

Failed again in 3.0.4, but now it seg faults. I uploaded the last 100k lines of the log. I'll run fsck on the disk tonight, just to be sure, but I doubt it's file system corruption given it's new disks with xfs. I can save the data files for a few days, but I need to remove them to get a (hopefully) working ones in the near future. (As a reminder the data is ~400G compressed.)

2015-06-30T17:37:34.481-0400 F -        [conn32] Invalid access at address: 0xffffea03a7fb7920
2015-06-30T17:37:34.684-0400 I QUERY    [conn66] getmore hub.inv_i_trans_60_days cursorid:10471517199 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:279 nreturned:5448 reslen:4194832 locks:{ Global: { acquireCount: { r: 560 } }, Database: { acquireCount: { r: 280 } }, Collection: { acquireCount: { r: 280 } } } 11441ms
2015-06-30T17:37:34.749-0400 I QUERY    [conn42] getmore hub.inv_i_trans_60_days cursorid:10683984646 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:286 nreturned:5448 reslen:4194812 locks:{ Global: { acquireCount: { r: 574 } }, Database: { acquireCount: { r: 287 } }, Collection: { acquireCount: { r: 287 } } } 12435ms
2015-06-30T17:37:34.786-0400 F -        [conn32] Got signal: 11 (Segmentation fault).
 
 0xf5e489 0xf5dd52 0xf5e0ae 0x7fa52fee9130 0x7fa52e9e3c4d 0x1052dd5 0x1053050 0x1050e4e 0x12db935 0x12fe268 0x1302925 0x12ffc0b 0x131b067 0x12f1920 0x1332053 0xd75e49 0x912a80 0xa0320b 0xbd09a4 0xbd0d54 0xb9d062 0xab079e 0xab5b4d 0x80fc1d 0xf115bb 0x7fa52fee1df5 0x7fa52e9901ad
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"B5E489"},{"b":"400000","o":"B5DD52"},{"b":"400000","o":"B5E0AE"},{"b":"7FA52FEDA000","o":"F130"},{"b":"7FA52E89A000","o":"149C4D"},{"b":"400000","o":"C52DD5"},{"b":"400000","o":"C53050"},{"b":"400000","o":"C50E4E"},{"b":"400000","o":"EDB935"},{"b":"400000","o":"EFE268"},{"b":"400000","o":"F02925"},{"b":"400000","o":"EFFC0B"},{"b":"400000","o":"F1B067"},{"b":"400000","o":"EF1920"},{"b":"400000","o":"F32053"},{"b":"400000","o":"975E49"},{"b":"400000","o":"512A80"},{"b":"400000","o":"60320B"},{"b":"400000","o":"7D09A4"},{"b":"400000","o":"7D0D54"},{"b":"400000","o":"79D062"},{"b":"400000","o":"6B079E"},{"b":"400000","o":"6B5B4D"},{"b":"400000","o":"40FC1D"},{"b":"400000","o":"B115BB"},{"b":"7FA52FEDA000","o":"7DF5"},{"b":"7FA52E89A000","o":"F61AD"}],"processInfo":{ "mongodbVersion" : "3.0.4", "gitVersion" : "0481c958daeb2969800511e7475dc66986fa9ed5", "uname" : { "sysname" : "Linux", "release" : "3.10.0-229.1.2.el7.x86_64", "version" : "#1 SMP Fri Mar 27 03:04:26 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "69E53C6FFEE320204F7099893CA3B40309400897" }, { "b" : "7FFF1F0FE000", "elfType" : 3, "buildId" : "64DE62EAA6D0191EAD9358297D64406988D7ED66" }, { "b" : "7FA52FEDA000", "path" : "/lib64/libpthread.so.0", "elfType" : 3, "buildId" : "12F30315D4F4A2FE58B1977405C8B5515861E66B" }, { "b" : "7FA52FC6D000", "path" : "/lib64/libssl.so.10", "elfType" : 3, "buildId" : "BB96EE99138B19FECDAB55E80A1728B648ECAD50" }, { "b" : "7FA52F886000", "path" : "/lib64/libcrypto.so.10", "elfType" : 3, "buildId" : "B154203FB7C05AEE29D5D6F6C000305191209FE4" }, { "b" : "7FA52F67E000", "path" : "/lib64/librt.so.1", "elfType" : 3, "buildId" : "7376A07360DC57189A8F92B20AA4AA1CAEA80551" }, { "b" : "7FA52F47A000", "path" : "/lib64/libdl.so.2", "elfType" : 3, "buildId" : "4DFEE4EA9AE8FDD4C71BD4CCC0727222F19DF810" }, { "b" : "7FA52F173000", "path" : "/lib64/libstdc++.so.6", "elfType" : 3, "buildId" : "405EACD649720B8668FFBBA197CBF030A7EF6296" }, { "b" : "7FA52EE71000", "path" : "/lib64/libm.so.6", "elfType" : 3, "buildId" : "A1AA62B29765BE03A36BF927B047EEEF8696EEC6" }, { "b" : "7FA52EC5B000", "path" : "/lib64/libgcc_s.so.1", "elfType" : 3, "buildId" : "5D3D7256AE68BCFF41E312A24825ED80ECA88A73" }, { "b" : "7FA52E89A000", "path" : "/lib64/libc.so.6", "elfType" : 3, "buildId" : "C31FFE7942BFD77B2FCA8F9BD5709D387A86D3BC" }, { "b" : "7FA5300F6000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "9866E1D2BA61EBB4CE4F009FACDAACC24EF3B804" }, { "b" : "7FA52E64E000", "path" : "/lib64/libgssapi_krb5.so.2", "elfType" : 3, "buildId" : "34672D541C8C9C5C1C25CB4F3F332CC9D3E604AD" }, { "b" : "7FA52E36B000", "path" : "/lib64/libkrb5.so.3", "elfType" : 3, "buildId" : "45CB7F6CD322F5B55FF8B635F7EC1578631CCAEA" }, { "b" : "7FA52E167000", "path" : "/lib64/libcom_err.so.2", "elfType" : 3, "buildId" : "3A1166709F88740C49E060731832E3FAD2DFB66B" }, { "b" : "7FA52DF35000", "path" : "/lib64/libk5crypto.so.3", "elfType" : 3, "buildId" : "23A2D854538903E2B84EF0882046DD95522C8B59" }, { "b" : "7FA52DD1F000", "path" : "/lib64/libz.so.1", "elfType" : 3, "buildId" : "E45643F27F3B3E960F3691AFC6EC27A98EF7B46B" }, { "b" : "7FA52DB10000", "path" : "/lib64/libkrb5support.so.0", "elfType" : 3, "buildId" : "F4A3D5E7E23F871751CA8F250421F8CF83447AD2" }, { "b" : "7FA52D90C000", "path" : "/lib64/libkeyutils.so.1", "elfType" : 3, "buildId" : "2E01D5AC08C1280D013AAB96B292AC58BC30A263" }, { "b" : "7FA52D6F2000", "path" : "/lib64/libresolv.so.2", "elfType" : 3, "buildId" : "AC596E865AF0D14B10F7B707F47D2031AD6D68DC" }, { "b" : "7FA52D4CD000", "path" : "/lib64/libselinux.so.1", "elfType" : 3, "buildId" : "82FF6B18E1E42825CC2D060F969479AD4AF2F62C" }, { "b" : "7FA52D26C000", "path" : "/lib64/libpcre.so.1", "elfType" : 3, "buildId" : "298B19C64B19995F2AA4DA7B852E90BA5302F630" }, { "b" : "7FA52D047000", "path" : "/lib64/liblzma.so.5", "elfType" : 3, "buildId" : "218D03D1F6CF1A099A4D467B5E8ECF4F2BF45750" } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x29) [0xf5e489]
 mongod(+0xB5DD52) [0xf5dd52]
 mongod(+0xB5E0AE) [0xf5e0ae]
 libpthread.so.0(+0xF130) [0x7fa52fee9130]
 libc.so.6(+0x149C4D) [0x7fa52e9e3c4d]
 mongod(_ZN6snappy13RawUncompressEPNS_6SourceEPc+0x3C5) [0x1052dd5]
 mongod(_ZN6snappy13RawUncompressEPKcmPc+0x30) [0x1053050]
 mongod(snappy_uncompress+0x5E) [0x1050e4e]
 mongod(+0xEDB935) [0x12db935]
 mongod(__wt_bt_read+0x3E8) [0x12fe268]
 mongod(__wt_cache_read+0x1C5) [0x1302925]
 mongod(__wt_page_in_func+0x40B) [0x12ffc0b]
 mongod(__wt_row_search+0xA27) [0x131b067]
 mongod(__wt_btcur_search+0x760) [0x12f1920]
 mongod(+0xF32053) [0x1332053]
 mongod(_ZNK5mongo21WiredTigerRecordStore7dataForEPNS_16OperationContextERKNS_8RecordIdE+0x69) [0xd75e49]
 mongod(_ZNK5mongo10Collection6docForEPNS_16OperationContextERKNS_8RecordIdE+0x20) [0x912a80]
 mongod(_ZN5mongo10FetchStage4workEPm+0x2BB) [0xa0320b]
 mongod(_ZN5mongo12PlanExecutor18getNextSnapshottedEPNS_11SnapshottedINS_7BSONObjEEEPNS_8RecordIdE+0xA4) [0xbd09a4]
 mongod(_ZN5mongo12PlanExecutor7getNextEPNS_7BSONObjEPNS_8RecordIdE+0x34) [0xbd0d54]
 mongod(_ZN5mongo7getMoreEPNS_16OperationContextEPKcixRNS_5CurOpEiRbPb+0x522) [0xb9d062]
 mongod(_ZN5mongo15receivedGetMoreEPNS_16OperationContextERNS_10DbResponseERNS_7MessageERNS_5CurOpE+0x35E) [0xab079e]
 mongod(_ZN5mongo16assembleResponseEPNS_16OperationContextERNS_7MessageERNS_10DbResponseERKNS_11HostAndPortE+0x13CD) [0xab5b4d]
 mongod(_ZN5mongo16MyMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortEPNS_9LastErrorE+0xDD) [0x80fc1d]
 mongod(_ZN5mongo17PortMessageServer17handleIncomingMsgEPv+0x34B) [0xf115bb]
 libpthread.so.0(+0x7DF5) [0x7fa52fee1df5]
 libc.so.6(clone+0x6D) [0x7fa52e9901ad]
-----  END BACKTRACE  -----

conn32

2015-06-30T17:36:26.173-0400 I QUERY    [conn32] getmore hub.inv_i_trans_60_days cursorid:8975538869 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:257 nreturned:5448 reslen:4194808 locks:{ Global: { acquireCount: { r: 516 } }, Database: { acquireCount: { r: 258 } }, Collection: { acquireCount: { r: 258 } } } 10950ms
2015-06-30T17:36:32.206-0400 I QUERY    [conn32] getmore hub.inv_i_trans_60_days cursorid:8975538869 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:206 nreturned:5448 reslen:4194764 locks:{ Global: { acquireCount: { r: 414 } }, Database: { acquireCount: { r: 207 } }, Collection: { acquireCount: { r: 207 } } } 5890ms
2015-06-30T17:36:48.265-0400 I QUERY    [conn32] getmore hub.inv_i_trans_60_days cursorid:8975538869 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:285 nreturned:5448 reslen:4194800 locks:{ Global: { acquireCount: { r: 572 } }, Database: { acquireCount: { r: 286 } }, Collection: { acquireCount: { r: 286 } } } 11925ms
2015-06-30T17:36:56.201-0400 I QUERY    [conn32] getmore hub.inv_i_trans_60_days cursorid:8975538869 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:273 nreturned:5448 reslen:4194828 locks:{ Global: { acquireCount: { r: 548 } }, Database: { acquireCount: { r: 274 } }, Collection: { acquireCount: { r: 274 } } } 6709ms
2015-06-30T17:37:04.245-0400 I QUERY    [conn32] getmore hub.inv_i_trans_60_days cursorid:8975538869 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:258 nreturned:5448 reslen:4194760 locks:{ Global: { acquireCount: { r: 518 } }, Database: { acquireCount: { r: 259 } }, Collection: { acquireCount: { r: 259 } } } 5666ms
2015-06-30T17:37:21.071-0400 I QUERY    [conn32] getmore hub.inv_i_trans_60_days cursorid:8975538869 ntoreturn:0 keyUpdates:0 writeConflicts:0 numYields:314 nreturned:5448 reslen:4194848 locks:{ Global: { acquireCount: { r: 630 } }, Database: { acquireCount: { r: 315 } }, Collection: { acquireCount: { r: 315 } } } 10447ms
2015-06-30T17:37:34.481-0400 F -        [conn32] Invalid access at address: 0xffffea03a7fb7920
2015-06-30T17:37:34.786-0400 F -        [conn32] Got signal: 11 (Segmentation fault).

Comment by Charlie Page [ 30/Jun/15 ]

ramon.fernandez full logs are ~65G (currently), too large to upload even compressed. Setup is mongoD WT (otherwise defaults) 3.0.3 with ~450G collection 30 reader threads. I've upgraded to 3.0.4 and am trying again (60 reader threads).

The couple of log lines before the signal 6 (abort) are operations taking ~7000ms.

Comment by Ramon Fernandez Marina [ 30/Jun/15 ]

charlie.page@10gen.com, can you please upload full logs and post details of your setup?

Generated at Thu Feb 08 03:50:15 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.