Description
A couple of Jenkins jobs are hung running column store formats.
The configuration file:
############################################
|
# RUN PARAMETERS
|
############################################
|
abort=0
|
auto_throttle=1
|
firstfit=0
|
bitcnt=4
|
bloom=1
|
bloom_bit_count=48
|
bloom_hash_count=12
|
bloom_oldest=0
|
cache=40
|
checkpoints=1
|
checksum=uncompressed
|
chunk_size=1
|
compaction=0
|
compression=none
|
data_extend=0
|
data_source=file
|
delete_pct=43
|
dictionary=0
|
evict_max=4
|
file_type=variable-length column-store
|
backups=0
|
huffman_key=0
|
huffman_value=0
|
insert_pct=16
|
internal_key_truncation=1
|
internal_page_max=14
|
isolation=read-committed
|
key_gap=18
|
key_max=27
|
key_min=27
|
leak_memory=0
|
leaf_page_max=9
|
logging=0
|
logging_archive=0
|
logging_prealloc=0
|
lsm_worker_threads=4
|
merge_max=9
|
mmap=0
|
ops=100000
|
prefix_compression=1
|
prefix_compression_min=7
|
repeat_data_pct=68
|
reverse=0
|
rows=100000
|
runs=100
|
split_pct=75
|
statistics=0
|
statistics_server=0
|
threads=20
|
timer=20
|
value_max=910
|
value_min=8
|
wiredtiger_config=
|
write_pct=73
|
############################################
|
The call stacks from the process. It's not that obvious how it's stuck - i.e, I don't think this is a deadlock:
(gdb) thread apply all where
|
|
Thread 10 (Thread 0x7f333d2c7700 (LWP 17262)):
|
#0 0x0000003467a0b98e in pthread_cond_timedwait@@GLIBC_2.3.2 ()
|
from /lib64/libpthread.so.0
|
WT-1 0x00000000004421d8 in __wt_cond_wait (session=0x2144040, cond=0x215dbc0,
|
usecs=100000) at ../src/os_posix/os_mtx_cond.c:79
|
WT-2 0x000000000042b09b in __evict_server (arg=0x2144040)
|
at ../src/evict/evict_lru.c:210
|
WT-3 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-4 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 9 (Thread 0x7f33361fc700 (LWP 17263)):
|
#0 0x0000003467a0b98e in pthread_cond_timedwait@@GLIBC_2.3.2 ()
|
from /lib64/libpthread.so.0
|
WT-1 0x00000000004421d8 in __wt_cond_wait (session=0x2144b40, cond=0x218ee50,
|
usecs=10000000) at ../src/os_posix/os_mtx_cond.c:79
|
WT-2 0x000000000042151a in __sweep_server (arg=0x2144b40)
|
at ../src/conn/conn_sweep.c:171
|
WT-3 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-4 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 8 (Thread 0x7f33357fb700 (LWP 17284)):
|
#0 0x00000000004eb9b3 in __col_insert_search_match (inshead=0x7f32e0080a70,
|
recno=64587) at ../src/include/column.i:111
|
WT-1 0x00000000004eca07 in __cursor_var_prev (cbt=0x7f32c403aaf0, newpage=0)
|
at ../src/btree/bt_curprev.c:341
|
WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32c403aaf0, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32c403aaf0)
|
at ../src/cursor/cur_file.c:156
|
WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f32c403aaf0, next=0,
|
notfoundp=0x7f33357fada4) at ../../../test/format/ops.c:674
|
WT-5 0x0000000000410337 in ops (arg=0x24f3400) at ../../../test/format/ops.c:475
|
WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 7 (Thread 0x7f333dcc8700 (LWP 17287)):
|
#0 __col_insert_search_lt (inshead=0x7f32f40851b0, recno=39196)
|
at ../src/include/column.i:82
|
WT-1 0x00000000004ecb5f in __cursor_var_prev (cbt=0x7f32b403aaf0, newpage=0)
|
at ../src/btree/bt_curprev.c:379
|
WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32b403aaf0, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32b403aaf0)
|
at ../src/cursor/cur_file.c:156
|
WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f32b403aaf0, next=0,
|
notfoundp=0x7f333dcc7da4) at ../../../test/format/ops.c:674
|
WT-5 0x0000000000410337 in ops (arg=0x24f3508) at ../../../test/format/ops.c:475
|
WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
---Type <return> to continue, or q <return> to quit---
|
WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 6 (Thread 0x7f3337fff700 (LWP 17288)):
|
#0 0x00000000004eca73 in __cursor_var_prev (cbt=0x7f32ac03aaf0, newpage=0)
|
at ../src/btree/bt_curprev.c:346
|
WT-1 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32ac03aaf0, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-2 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32ac03aaf0)
|
at ../src/cursor/cur_file.c:156
|
WT-3 0x0000000000410ab9 in nextprev (cursor=0x7f32ac03aaf0, next=0,
|
notfoundp=0x7f3337ffeda4) at ../../../test/format/ops.c:674
|
WT-4 0x0000000000410337 in ops (arg=0x24f3560) at ../../../test/format/ops.c:475
|
WT-5 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-6 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 5 (Thread 0x7f3336bfd700 (LWP 17290)):
|
#0 __col_insert_search_match (inshead=0x7f3300000e60, recno=38371)
|
at ../src/include/column.i:118
|
WT-1 0x00000000004eca07 in __cursor_var_prev (cbt=0x7f33241896f0, newpage=0)
|
at ../src/btree/bt_curprev.c:341
|
WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f33241896f0, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f33241896f0)
|
at ../src/cursor/cur_file.c:156
|
WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f33241896f0, next=0,
|
notfoundp=0x7f3336bfcda4) at ../../../test/format/ops.c:674
|
WT-5 0x0000000000410337 in ops (arg=0x24f3610) at ../../../test/format/ops.c:475
|
WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 4 (Thread 0x7f3331bf5700 (LWP 17296)):
|
#0 0x00000000004e9f3e in __wt_cell_unpack_safe (cell=0x238794d,
|
unpack=0x7f3331bf4ad0, end=0x0) at ../src/include/cell.i:622
|
WT-1 0x00000000004ea1c3 in __wt_cell_unpack (cell=0x238794d,
|
unpack=0x7f3331bf4ad0) at ../src/include/cell.i:710
|
WT-2 0x00000000004ecb15 in __cursor_var_prev (cbt=0x7f332c086a10, newpage=0)
|
at ../src/btree/bt_curprev.c:365
|
WT-3 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f332c086a10, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-4 0x00000000004b8e8e in __curfile_prev (cursor=0x7f332c086a10)
|
at ../src/cursor/cur_file.c:156
|
WT-5 0x0000000000410ab9 in nextprev (cursor=0x7f332c086a10, next=0,
|
notfoundp=0x7f3331bf4da4) at ../../../test/format/ops.c:674
|
WT-6 0x0000000000410337 in ops (arg=0x24f3820) at ../../../test/format/ops.c:475
|
WT-7 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-8 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 3 (Thread 0x7f33311f4700 (LWP 17297)):
|
#0 __cursor_var_prev (cbt=0x7f3320086610, newpage=1)
|
---Type <return> to continue, or q <return> to quit---
|
at ../src/btree/bt_curprev.c:342
|
WT-1 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f3320086610, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-2 0x00000000004b8e8e in __curfile_prev (cursor=0x7f3320086610)
|
at ../src/cursor/cur_file.c:156
|
WT-3 0x0000000000410ab9 in nextprev (cursor=0x7f3320086610, next=0,
|
notfoundp=0x7f33311f3da4) at ../../../test/format/ops.c:674
|
WT-4 0x0000000000410337 in ops (arg=0x24f3878) at ../../../test/format/ops.c:475
|
WT-5 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-6 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 2 (Thread 0x7f332a1fc700 (LWP 17301)):
|
#0 0x00000000004ebddf in __col_var_search (page=0x25332e0, recno=84228,
|
start_recnop=0x7f332a1fbac8) at ../src/include/column.i:259
|
WT-1 0x00000000004ec94a in __cursor_var_prev (cbt=0x7f32ec03aaf0, newpage=0)
|
at ../src/btree/bt_curprev.c:335
|
WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32ec03aaf0, truncating=0)
|
at ../src/btree/bt_curprev.c:589
|
WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32ec03aaf0)
|
at ../src/cursor/cur_file.c:156
|
WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f32ec03aaf0, next=0,
|
notfoundp=0x7f332a1fbda4) at ../../../test/format/ops.c:674
|
WT-5 0x0000000000410337 in ops (arg=0x24f39d8) at ../../../test/format/ops.c:475
|
WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0
|
WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6
|
|
Thread 1 (Thread 0x7f333eed2720 (LWP 13068)):
|
#0 0x00000034676accdd in nanosleep () from /lib64/libc.so.6
|
WT-1 0x00000034676e1e54 in usleep () from /lib64/libc.so.6
|
WT-2 0x000000000040f8b7 in wts_ops (lastrun=0) at ../../../test/format/ops.c:168
|
WT-3 0x0000000000412937 in main (argc=0, argv=0x7ffff9709f40)
|
at ../../../test/format/t.c:202
|
(gdb) thread 2
|
[Switching to thread 2 (Thread 0x7f332a1fc700 (LWP 17301))]#0 0x00000000004ebddf in __col_var_search (page=0x25332e0, recno=84228,
|
start_recnop=0x7f332a1fbac8) at ../src/include/column.i:259
|
259 repeat = page->pg_var_repeats + indx;
|
(gdb) p page->pg_var_repeats
|
There is no member named pg_var_repeats.
|
(gdb) p page
|
$1 = (WT_PAGE *) 0x25332e0
|
(gdb) p page->pg_var_repeats
|
There is no member named pg_var_repeats.
|
(gdb) p page->u.colv_var.repeats
|
There is no member named colv_var.
|
(gdb) p page->u.col_var.repeats
|
$2 = (WT_COL_RLE *) 0x2533610
|
(gdb) p *$2
|
$3 = {recno = 84216, rle = 4, indx = 4}
|
(gdb) p limit
|
$4 = 1
|
(gdb) n
|
260 if (recno >= repeat->recno &&
|
(gdb)
|
261 recno < repeat->recno + repeat->rle) {
|
(gdb)
|
260 if (recno >= repeat->recno &&
|
(gdb)
|
262 if (start_recnop != NULL)
|
(gdb)
|
263 *start_recnop = repeat->recno;
|
(gdb)
|
264 return (page->pg_var_d + repeat->indx);
|
(gdb)
|
289 }
|
(gdb)
|
__cursor_var_prev (cbt=0x7f32ec03aaf0, newpage=0)
|
at ../src/btree/bt_curprev.c:334
|
334 if ((cip =
|
(gdb) n
|
337 cbt->slot = WT_COL_SLOT(page, cip);
|
(gdb) p cip
|
$5 = (WT_COL *) 0x2533350
|
(gdb) n
|
340 cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
|
(gdb)
|
341 cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
|
The failed builds:
http://build.wiredtiger.com:8080/job/wiredtiger-test-format-stress/6524/console
http://build.wiredtiger.com:8080/job/wiredtiger-test-format-recovery/306/
It seems likely due to the recent column store changes from WT-1833
Attachments
Issue Links
- related to
-
WT-1833 Column store name-space gap handling.
- Closed