[SERVER-18379] DB fails to recover when specifying LSM, after system crash Created: 08/May/15  Updated: 06/Dec/22  Resolved: 15/Apr/19

Status: Closed
Project: Core Server
Component/s: Storage, WiredTiger
Affects Version/s: 3.1.2
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Jonathan Abrahams Assignee: Backlog - Storage Execution Team
Resolution: Won't Fix Votes: 0
Labels: 32powercycle, 32qa
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Attachments: File crashserver.sh     Text File mongod-wiredTiger-recovery.log     Text File mongod-wiredTiger-recovery.log     Text File mongod-wiredTiger.log     Text File mongod-wiredTiger.log     File powertest.sh     File startmongo.sh     File wiredTiger-afterrecovery.tar.gz     File wiredTiger-afterrecovery.tar.gz     File wiredTiger-beforerecovery.tar.gz     File wiredTiger-beforerecovery.tar.gz     File wiredTiger-firstrun.tar.gz     File wiredTiger-firstrun.tar.gz     File wiredTiger.tar.gz     File wiredTiger.tar.gz    
Issue Links:
Tested
Assigned Teams:
Storage Execution
Operating System: ALL
Steps To Reproduce:
  1. Start mongod with specified options on secret port (recovery mode)
    --storageEngine wiredTiger --wiredTigerCollectionConfigString type=lsm --wiredTigerIndexConfigString type=lsm
  2. If startup is successful, kill and restart on start port
  3. Run mongo shell to randomly perform CRUD operations
  4. Crash host system at a random period (40-50 seconds after step 2)
  5. After reboot start from step 1
Participants:

 Description   

DB recovery failed for WT when the following options were specified:
--wiredTigerCollectionConfigString type=lsm --wiredTigerIndexConfigString type=lsm

2015-05-07T15:57:46.891-0400 I CONTROL  ***** SERVER RESTARTED *****
2015-05-07T15:57:46.932-0400 W -        [initandlisten] Detected unclean shutdown - /data/wiredTiger/mongod.lock is not empty.
2015-05-07T15:57:46.932-0400 W STORAGE  [initandlisten] Recovering data from the last clean checkpoint.
2015-05-07T15:57:46.932-0400 I STORAGE  [initandlisten] wiredtiger_open config: create,cache_size=1G,session_max=20000,eviction=(threads_max=4),statistics=(fast),log=(enabled=true,archive=true,path=journal,compressor=snappy),checkpoint=(wait=60,log_size=2GB),statistics_log=(wait=0),
2015-05-07T15:57:49.803-0400 F -        [initandlisten] Invalid access at address: 0x18
2015-05-07T15:57:49.932-0400 F -        [initandlisten] Got signal: 11 (Segmentation fault).
 
 0xf4ba16 0xf4b132 0xf4b46e 0x7fad6a0d3340 0x12c6cd3 0x12c2b5c 0x12c3982 0x12ea313 0x134d0d3 0x134d25d 0x12f6191 0x134b4da 0x13102b5 0x13120dc 0xd5f6a3 0xd5f85c 0xd5ff13 0xd60216 0xd5a498 0xcdbf86 0xcdffa9 0xd5940c 0xcaa860 0x8291ce 0x7ed989 0x7fad69d1fec5 0x8272f7
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"B4BA16"},{"b":"400000","o":"B4B132"},{"b":"400000","o":"B4B46E"},{"b":"7FAD6A0C3000","o":"10340"},{"b":"400000","o":"EC6CD3"},{"b":"400000","o":"EC2B5C"},{"b":"400000","o":"EC3982"},{"b":"400000","o":"EEA313"},{"b":"400000","o":"F4D0D3"},{"b":"400000","o":"F4D25D"},{"b":"400000","o":"EF6191"},{"b":"400000","o":"F4B4DA"},{"b":"400000","o":"F102B5"},{"b":"400000","o":"F120DC"},{"b":"400000","o":"95F6A3"},{"b":"400000","o":"95F85C"},{"b":"400000","o":"95FF13"},{"b":"400000","o":"960216"},{"b":"400000","o":"95A498"},{"b":"400000","o":"8DBF86"},{"b":"400000","o":"8DFFA9"},{"b":"400000","o":"95940C"},{"b":"400000","o":"8AA860"},{"b":"400000","o":"4291CE"},{"b":"400000","o":"3ED989"},{"b":"7FAD69CFE000","o":"21EC5"},{"b":"400000","o":"4272F7"}],"processInfo":{ "mongodbVersion" : "3.1.2", "gitVersion" : "aa0066050f0a9db81aa47181d0fbd18c109ae991", "uname" : { "sysname" : "Linux", "release" : "3.16.0-31-generic", "version" : "#41~14.04.1-Ubuntu SMP Wed Feb 11 19:30:13 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "5A1AB53E82C3717157B58AE3409311D6B24478D5" }, { "b" : "7FFFE552A000", "elfType" : 3, "buildId" : "5552B9335DDE9349419BA10896C1E75C9432A946" }, { "b" : "7FAD6B2E7000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "FF43D0947510134A8A494063A3C1CF3CEBB27791" }, { "b" : "7FAD6AF0D000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "379F80D2768BA6A21F52781895EE9F47B34A0A85" }, { "b" : "7FAD6AD05000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "92FCF41EFE012D6186E31A59AD05BDBB487769AB" }, { "b" : "7FAD6AB01000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "C1AE4CB7195D337A77A3C689051DABAA3980CA0C" }, { "b" : "7FAD6A7FD000", "path" : "/usr/lib/x86_64-linux-gnu/libstdc++.so.6", "elfType" : 3, "buildId" : "19EFDDAB11B3BF5C71570078C59F91CF6592CE9E" }, { "b" : "7FAD6A4F7000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "1D76B71E905CB867B27CEF230FCB20F01A3178F5" }, { "b" : "7FAD6A2E1000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "8D0AA71411580EE6C08809695C3984769F25725B" }, { "b" : "7FAD6A0C3000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "9318E8AF0BFBE444731BB0461202EF57F7C39542" }, { "b" : "7FAD69CFE000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "30C94DC66A1FE95180C3D68D2B89E576D5AE213C" }, { "b" : "7FAD6B545000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "9F00581AB3C73E3AEA35995A0C50D24D59A01D47" } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x26) [0xf4ba16]
 mongod(+0xB4B132) [0xf4b132]
 mongod(+0xB4B46E) [0xf4b46e]
 libpthread.so.0(+0x10340) [0x7fad6a0d3340]
 mongod(__wt_page_inmem+0x23) [0x12c6cd3]
 mongod(__wt_btree_tree_open+0x8C) [0x12c2b5c]
 mongod(__wt_btree_open+0xD02) [0x12c3982]
 mongod(__wt_conn_btree_get+0x243) [0x12ea313]
 mongod(__wt_session_get_btree+0x2C3) [0x134d0d3]
 mongod(__wt_session_get_btree_ckpt+0xBD) [0x134d25d]
 mongod(__wt_curfile_open+0xE1) [0x12f6191]
 mongod(__wt_open_cursor+0x26A) [0x134b4da]
 mongod(+0xF102B5) [0x13102b5]
 mongod(+0xF120DC) [0x13120dc]
 mongod(_ZN5mongo21WiredTigerRecordStore8Iterator7_locateERKNS_8RecordIdEb+0x173) [0xd5f6a3]
 mongod(_ZN5mongo21WiredTigerRecordStore8IteratorC2ERKS0_PNS_16OperationContextERKNS_8RecordIdERKNS_20CollectionScanParams9DirectionEb+0x9C) [0xd5f85c]
 mongod(_ZNK5mongo21WiredTigerRecordStore11getIteratorEPNS_16OperationContextERKNS_8RecordIdERKNS_20CollectionScanParams9DirectionE+0x43) [0xd5ff13]
 mongod(_ZN5mongo21WiredTigerRecordStoreC1EPNS_16OperationContextENS_10StringDataES3_bllPNS_28CappedDocumentDeleteCallbackEPNS_20WiredTigerSizeStorerE+0x2B6) [0xd60216]
 mongod(_ZN5mongo18WiredTigerKVEngine14getRecordStoreEPNS_16OperationContextENS_10StringDataES3_RKNS_17CollectionOptionsE+0x158) [0xd5a498]
 mongod(_ZN5mongo22KVDatabaseCatalogEntry14initCollectionEPNS_16OperationContextERKSsb+0x1D6) [0xcdbf86]
 mongod(_ZN5mongo15KVStorageEngineC1EPNS_8KVEngineERKNS_22KVStorageEngineOptionsE+0x579) [0xcdffa9]
 mongod(+0x95940C) [0xd5940c]
 mongod(_ZN5mongo20ServiceContextMongoD22setGlobalStorageEngineERKSs+0x350) [0xcaa860]
 mongod(_ZN5mongo13initAndListenEi+0x3EE) [0x8291ce]
 mongod(main+0x139) [0x7ed989]
 libc.so.6(__libc_start_main+0xF5) [0x7fad69d1fec5]
 mongod(+0x4272F7) [0x8272f7]
-----  END BACKTRACE  -----



 Comments   
Comment by Jonathan Abrahams [ 26/May/15 ]

Still seeing a recovery failure in 3.1.3:

2015-05-26T13:35:59.848-0400 I CONTROL  ***** SERVER RESTARTED *****
2015-05-26T13:35:59.909-0400 W -        [initandlisten] Detected unclean shutdown - /home/jonathan/pt-lsm/data/wiredTiger/mongod.lock is not empty.
2015-05-26T13:35:59.909-0400 W STORAGE  [initandlisten] Recovering data from the last clean checkpoint.
2015-05-26T13:35:59.909-0400 I STORAGE  [initandlisten] wiredtiger_open config: create,cache_size=7G,session_max=20000,eviction=(threads_max=4),statistics=(fast),log=(enabled=true,archive=true,path=journal,compressor=snappy),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics_log=(wait=0),
2015-05-26T13:36:08.654-0400 E STORAGE  [initandlisten] WiredTiger (0) [1432661768:654809][2582:0x7f8dad135b80], file:collection-2--4585210651247373089-004620.bf, WT_CURSOR.prev: read checksum error [4096B @ 4096, 3440205268 != 1611150116]
2015-05-26T13:36:08.654-0400 E STORAGE  [initandlisten] WiredTiger (0) [1432661768:654884][2582:0x7f8dad135b80], file:collection-2--4585210651247373089-004620.bf, WT_CURSOR.prev: collection-2--4585210651247373089-004620.bf: encountered an illegal file format or internal value
2015-05-26T13:36:08.654-0400 E STORAGE  [initandlisten] WiredTiger (-31804) [1432661768:654908][2582:0x7f8dad135b80], file:collection-2--4585210651247373089-004620.bf, WT_CURSOR.prev: the process must exit and restart: WT_PANIC: WiredTiger library panic
2015-05-26T13:36:08.654-0400 I -        [initandlisten] Fatal Assertion 28558
2015-05-26T13:36:08.702-0400 I CONTROL  [initandlisten]
 0xfbbeb6 0xf70828 0xf577cd 0xdd1194 0x13c4e19 0x13c4fc5 0x13c53d4 0x1321ee2 0x133a2a0 0x133ed55 0x133bfcb 0x134f3cf 0x134fc9c 0x133917f 0x1361147 0x13c3de6 0x13c416f 0x13c42cd 0x136d185 0x13c23a2 0x1323e9e 0x13243cc 0x1386e64 0x138a8d9 0xdc1953 0xdc1b6c 0xdc2813 0xdc2b18 0xdbc088 0xd2a466 0xd2f66e 0xdbabac 0xcf3b00 0x803191 0x8097a4 0x7f8dabd46af5 0x801319
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"BBBEB6"},{"b":"400000","o":"B70828"},{"b":"400000","o":"B577CD"},{"b":"400000","o":"9D1194"},{"b":"400000","o":"FC4E19"},{"b":"400000","o":"FC4FC5"},{"b":"400000","o":"FC53D4"},{"b":"400000","o":"F21EE2"},{"b":"400000","o":"F3A2A0"},{"b":"400000","o":"F3ED55"},{"b":"400000","o":"F3BFCB"},{"b":"400000","o":"F4F3CF"},{"b":"400000","o":"F4FC9C"},{"b":"400000","o":"F3917F"},{"b":"400000","o":"F61147"},{"b":"400000","o":"FC3DE6"},{"b":"400000","o":"FC416F"},{"b":"400000","o":"FC42CD"},{"b":"400000","o":"F6D185"},{"b":"400000","o":"FC23A2"},{"b":"400000","o":"F23E9E"},{"b":"400000","o":"F243CC"},{"b":"400000","o":"F86E64"},{"b":"400000","o":"F8A8D9"},{"b":"400000","o":"9C1953"},{"b":"400000","o":"9C1B6C"},{"b":"400000","o":"9C2813"},{"b":"400000","o":"9C2B18"},{"b":"400000","o":"9BC088"},{"b":"400000","o":"92A466"},{"b":"400000","o":"92F66E"},{"b":"400000","o":"9BABAC"},{"b":"400000","o":"8F3B00"},{"b":"400000","o":"403191"},{"b":"400000","o":"4097A4"},{"b":"7F8DABD25000","o":"21AF5"},{"b":"400000","o":"401319"}],"processInfo":{ "mongodbVersion" : "3.1.3", "gitVersion" : "51ad8eff0d50387f0565d23a15e7ba1db0ea962c", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "3.10.0-229.4.2.el7.x86_64", "version" : "#1 SMP Wed May 13 10:06:09 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000" }, { "b" : "7FFFE66FE000", "elfType" : 3 }, { "b" : "7F8DACD25000", "path" : "/lib64/librt.so.1", "elfType" : 3 }, { "b" : "7F8DACB21000", "path" : "/lib64/libdl.so.2", "elfType" : 3 }, { "b" : "7F8DAC81A000", "path" : "/lib64/libstdc++.so.6", "elfType" : 3 }, { "b" : "7F8DAC518000", "path" : "/lib64/libm.so.6", "elfType" : 3 }, { "b" : "7F8DAC302000", "path" : "/lib64/libgcc_s.so.1", "elfType" : 3 }, { "b" : "7F8DAC0E6000", "path" : "/lib64/libpthread.so.0", "elfType" : 3 }, { "b" : "7F8DABD25000", "path" : "/lib64/libc.so.6", "elfType" : 3 }, { "b" : "7F8DACF2D000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3 } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x26) [0xfbbeb6]
 mongod(_ZN5mongo10logContextEPKc+0x138) [0xf70828]
 mongod(_ZN5mongo13fassertFailedEi+0x4D) [0xf577cd]
 mongod(+0x9D1194) [0xdd1194]
 mongod(__wt_eventv+0x3E9) [0x13c4e19]
 mongod(__wt_err+0x95) [0x13c4fc5]
 mongod(__wt_panic+0x24) [0x13c53d4]
 mongod(__wt_bm_read+0x72) [0x1321ee2]
 mongod(__wt_bt_read+0x1D0) [0x133a2a0]
 mongod(__wt_cache_read+0x1C5) [0x133ed55]
 mongod(__wt_page_in_func+0x41B) [0x133bfcb]
 mongod(+0xF4F3CF) [0x134f3cf]
 mongod(__wt_tree_walk+0x2CC) [0x134fc9c]
 mongod(__wt_btree_open+0x11FF) [0x133917f]
 mongod(__wt_conn_btree_open+0x147) [0x1361147]
 mongod(__wt_session_get_btree+0x176) [0x13c3de6]
 mongod(__wt_session_get_btree+0x4FF) [0x13c416f]
 mongod(__wt_session_get_btree_ckpt+0xBD) [0x13c42cd]
 mongod(__wt_curfile_open+0x265) [0x136d185]
 mongod(__wt_open_cursor+0x292) [0x13c23a2]
 mongod(+0xF23E9E) [0x1323e9e]
 mongod(__wt_bloom_open+0x4C) [0x13243cc]
 mongod(+0xF86E64) [0x1386e64]
 mongod(+0xF8A8D9) [0x138a8d9]
 mongod(_ZN5mongo21WiredTigerRecordStore8Iterator7_locateERKNS_8RecordIdEb+0x163) [0xdc1953]
 mongod(_ZN5mongo21WiredTigerRecordStore8IteratorC2ERKS0_PNS_16OperationContextERKNS_8RecordIdERKNS_20CollectionScanParams9DirectionEb+0x9C) [0xdc1b6c]
 mongod(_ZNK5mongo21WiredTigerRecordStore11getIteratorEPNS_16OperationContextERKNS_8RecordIdERKNS_20CollectionScanParams9DirectionE+0x43) [0xdc2813]
 mongod(_ZN5mongo21WiredTigerRecordStoreC1EPNS_16OperationContextENS_10StringDataES3_bllPNS_28CappedDocumentDeleteCallbackEPNS_20WiredTigerSizeStorerE+0x2B8) [0xdc2b18]
 mongod(_ZN5mongo18WiredTigerKVEngine14getRecordStoreEPNS_16OperationContextENS_10StringDataES3_RKNS_17CollectionOptionsE+0x158) [0xdbc088]
 mongod(_ZN5mongo22KVDatabaseCatalogEntry14initCollectionEPNS_16OperationContextERKSsb+0x1D6) [0xd2a466]
 mongod(_ZN5mongo15KVStorageEngineC1EPNS_8KVEngineERKNS_22KVStorageEngineOptionsE+0x65E) [0xd2f66e]
 mongod(+0x9BABAC) [0xdbabac]
 mongod(_ZN5mongo20ServiceContextMongoD22setGlobalStorageEngineERKSs+0x350) [0xcf3b00]
 mongod(_ZN5mongo13initAndListenEi+0x3E1) [0x803191]
 mongod(main+0x134) [0x8097a4]
 libc.so.6(__libc_start_main+0xF5) [0x7f8dabd46af5]
 mongod(+0x401319) [0x801319]
-----  END BACKTRACE  -----
2015-05-26T13:36:08.702-0400 I -        [initandlisten]
 
***aborting after fassert() failure

Generated at Thu Feb 08 03:47:29 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.