Details
Description
Our Jenkins testing revealed a case where the new eviction server random walk point selection code could dereference a NULL pointer.
The relevant call stacks:
Thread 2 (Thread 0x3ffb9475710 (LWP 7786)):
|
#0 __wt_block_buffer_to_addr (block=0x8c815900,
|
p=0x8d27261f "\315M\201\344VM\253\016+0000002928`\210\315N\201\344\361-l\356G0000002929/LMNOPQ`\210\315O\201\344G\221\251\220[0000002930/LMNOPQRSTUV02903/LMN", offsetp=0x3ffc2f7cfe8, sizep=0x3ffc2f7cfe0, checksump=0x3ffc2f7cfe4) at ../src/block/block_addr.c:83
|
#1 0x0000000080186d80 in __wt_block_verify_addr (session=0x8c5083b0, block=0x8c815900,
|
addr=0x8d27261f "\315M\201\344VM\253\016+0000002928`\210\315N\201\344\361-l\356G0000002929/LMNOPQ`\210\315O\201\344G\221\251\220[0000002930/LMNOPQRSTUV02903/LMN", addr_size=8) at ../src/block/block_vrfy.c:352
|
#2 0x0000000080182f20 in __bm_verify_addr (bm=0x8c8157e0, session=0x8c5083b0,
|
addr=0x8d27261f "\315M\201\344VM\253\016+0000002928`\210\315N\201\344\361-l\356G0000002929/LMNOPQ`\210\315O\201\344G\221\251\220[0000002930/LMNOPQRSTUV02903/LMN", addr_size=8) at ../src/block/block_mgr.c:453
|
#3 0x0000000080116d12 in __verify_overflow (session=0x8c5083b0,
|
addr=0x8d27261f "\315M\201\344VM\253\016+0000002928`\210\315N\201\344\361-l\356G0000002929/LMNOPQ`\210\315O\201\344G\221\251\220[0000002930/LMNOPQRSTUV02903/LMN", addr_size=8, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:740
|
#4 0x0000000080116ade in __verify_overflow_cell (session=0x8c5083b0, ref=0x8d2713c0, found=0x3ffc2f7d397, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:697
|
#5 0x0000000080115df4 in __verify_tree (session=0x8c5083b0, ref=0x8d2713c0, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:459
|
#6 0x000000008011633a in __verify_tree (session=0x8c5083b0, ref=0x8d269da0, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:539
|
#7 0x000000008011633a in __verify_tree (session=0x8c5083b0, ref=0x8cee7ec0, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:539
|
#8 0x000000008011633a in __verify_tree (session=0x8c5083b0, ref=0x8d068c10, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:539
|
#9 0x000000008011633a in __verify_tree (session=0x8c5083b0, ref=0x8c574f28, vs=0x3ffc2f7dd78) at ../src/btree/bt_vrfy.c:539
|
#10 0x0000000080115328 in __wt_verify (session=0x8c5083b0, cfg=0x3ffc2f7e638) at ../src/btree/bt_vrfy.c:233
|
#11 0x0000000080099796 in __wt_schema_worker (session=0x8c5083b0, uri=0x8c4ee4f0 "file:wt", file_func=0x80114d18 <__wt_verify>, name_func=0x0,
|
cfg=0x3ffc2f7e638, open_flags=2097160) at ../src/schema/schema_worker.c:60
|
#12 0x00000000800a54e2 in __session_verify (wt_session=0x8c5083b0, uri=0x8c4ee4f0 "file:wt", config=0x801b3b34 "strict") at ../src/session/session_api.c:1372
|
#13 0x0000000080010da2 in wts_verify (tag=0x801b2796 "post-ops verify") at ../../../test/format/wts.c:529
|
#14 0x000000008000cb50 in main (argc=6, argv=0x3ffc2f7ea90) at ../../../test/format/t.c:230
|
|
Thread 1 (Thread 0x3ffb558b910 (LWP 7796)):
|
#0 0x00000000800f0da4 in __wt_random_descent (session=0x8c506470, refp=0x8c574fa8, eviction=true) at ../src/btree/bt_random.c:204
|
#1 0x000000008003c674 in __evict_walk_file (session=0x8c506470, queue=0x8c505138, max_entries=400, slotp=0x3ffb558ab0c) at ../src/evict/evict_lru.c:1665
|
#2 0x000000008003bdec in __evict_walk (session=0x8c506470, queue=0x8c505138) at ../src/evict/evict_lru.c:1435
|
#3 0x000000008003b490 in __evict_lru_walk (session=0x8c507410) at ../src/evict/evict_lru.c:1167
|
#4 0x0000000080039dba in __evict_pass (session=0x8c507410) at ../src/evict/evict_lru.c:664
|
#5 0x0000000080039276 in __evict_server (session=0x8c507410, did_work=0x3ffb558af07) at ../src/evict/evict_lru.c:387
|
#6 0x0000000080038e8e in __wt_evict_thread_run (session=0x8c507410, thread=0x8c56d4c0) at ../src/evict/evict_lru.c:308
|
#7 0x00000000800bb92c in __wt_thread_run (arg=0x8c56d4c0) at ../src/support/thread_group.c:25
|
#8 0x000003ffb91881f2 in start_thread (arg=0x3ffb558b910) at pthread_create.c:310
|
#9 0x000003ffb8f02cea in thread_start () at ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:76
|
The random descent code hasn't needed to handle NULL page pointers in the past, because it isn't used for handles that have exclusive access held.