Description
core dump from 3c153e9162bc57858ee7e4f2b0ae97d2c9b503dc with this CONFIG
file_type=row
|
data_source=file
|
cache=5
|
compression=none
|
leaf_page_max=9
|
internal_page_max=9
|
ops=200000
|
rows=1000
|
key_min=200
|
threads=8
|
insert_pct=0
|
Here's the stack:
#0 0x00000000004b5e92 in __ovfl_txnc_skip_search (head=0x803fa3850,
|
addr=0x80279c472, addr_size=8) at ../src/btree/rec_track.c:631
|
WT-1 0x00000000004b6287 in __wt_ovfl_txnc_search (page=0x8098c0b40,
|
addr=0x80279c472 "?5\202?\227?*`\210? \201????]?\210?\234\201?\001\022@??\210ư\201???\2113?\210?J\201?\001cfR?\210?V\201??\016??\210?w\202?k7???\210?\"\201?\025\020\031??\210?[\202?\224??\020?\210?[\201?\231&?$?\210?f\204?3b\202\016?\210??\201??J*??\210ͧ\202??d?\237`\210?l\201?\v\214\220?\210?\235\202?????`\210?)\201?\237@H??\210??\204?av?B`\210?\020\201?2??", addr_size=8,
|
store=0x8033f39e8) at ../src/btree/rec_track.c:765
|
WT-2 0x00000000004ae54c in __wt_ovfl_read (session=0x802500180,
|
page=0x8098c0b40, unpack=0x7fffff3f9c80, store=0x8033f39e8)
|
at ../src/btree/bt_ovfl.c:64
|
WT-3 0x00000000004b09c4 in __cell_data_ref (session=0x802500180,
|
page=0x8098c0b40, page_type=7, unpack=0x7fffff3f9c80, store=0x8033f39e8)
|
at cell.i:788
|
WT-4 0x00000000004b087b in __wt_page_cell_data_ref (session=0x802500180,
|
page=0x8098c0b40, unpack=0x7fffff3f9c80, store=0x8033f39e8) at cell.i:823
|
WT-5 0x00000000004af93f in __wt_kv_return (session=0x802500180, cbt=0x8033f3900)
|
at ../src/btree/bt_ret.c:107
|
WT-6 0x00000000004a6cee in __wt_btcur_search (cbt=0x8033f3900)
|
at ../src/btree/bt_cursor.c:246
|
WT-7 0x000000000047da3a in __curfile_search (cursor=0x8033f3900)
|
at ../src/cursor/cur_file.c:177
|
WT-8 0x0000000000407cd2 in row_remove (cursor=0x8033f3900, key=0x7fffff3f9f30,
|
keyno=83, notfoundp=0x7fffff3f9ee4) at ../../../test/format/ops.c:979
|
|
|
(gdb) frame 0
|
#0 0x00000000004b5e92 in __ovfl_txnc_skip_search (head=0x803fa3850,
|
addr=0x80279c472, addr_size=8) at ../src/btree/rec_track.c:631
|
631 len = WT_MIN((*e)->addr_size, addr_size);
|
(gdb) p e
|
$19 = (WT_OVFL_TXNC **) 0x803fa3850
|
(gdb) p *e
|
$20 = (WT_OVFL_TXNC *) 0x0
|
Since the code tests *e == NULL a few lines before the core dump:
for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
|
if (*e == NULL) { /* Empty levels */
|
--i;
|
--e;
|
continue;
|
}
|
|
/*
|
* Return any exact matches: we don't care in what search level
|
* we found a match.
|
*/
|
len = WT_MIN((*e)->addr_size, addr_size);
|
cmp = memcmp(WT_OVFL_TXNC_ADDR(*e), addr, len);
|
if (cmp == 0 && (*e)->addr_size == addr_size)
|
return (*e);
|
I'm guessing it's a race. The caller is not protected against a race with a thread removing the overflow key from the cache:
/*
|
* WT_CELL_VALUE_OVFL_RM cells: If reconciliation deleted an overflow
|
* value, but there was still a reader in the system that might need it,
|
* the on-page cell type will have been reset to WT_CELL_VALUE_OVFL_RM
|
* and we will be passed a page so we can look-aside into the cache of
|
* such values.
|
*/
|
if (unpack->raw == WT_CELL_VALUE_OVFL_RM)
|
return (
|
__wt_ovfl_txnc_search(page, unpack->data, unpack->size, store));
|
|
/*
|
* Acquire the overflow lock, and retest the on-page cell's value inside
|
* the lock.
|
*/
|
WT_RET(__wt_readlock(session, S2BT(session)->ovfl_lock));
|
ret = __wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM ?
|
__wt_ovfl_txnc_search(page, unpack->data, unpack->size, store) :
|
__ovfl_read(session, unpack->data, unpack->size, store);
|
WT_TRET(__wt_rwunlock(session, S2BT(session)->ovfl_lock));
|
But that should be OK – if the on-page cell is set to VALUE_OVFL_RM, it can't be removed until all readers that might access it have left the system?
This may mean there's a case where we can delete an item from the cache based on the transaction tests in that code, but there's still a reader in the system that needs the item.
@michaelcahill, can you please review?
Attachments
Issue Links
- related to
-
WT-1 placeholder WT-1
- Closed
-
WT-2 What does metadata look like?
- Closed
-
WT-3 What file formats are required?
- Closed
-
WT-4 Flexible cursor traversals
- Closed
-
WT-5 How does pget work: is it necessary?
- Closed
-
WT-6 Complex schema example
- Closed
-
WT-7 Do we need the handle->err/errx methods?
- Closed
-
WT-8 Do we need table load, bulk-load and/or dump methods?
- Closed