diff --git a/dist/api_data.py b/dist/api_data.py index 7d8a58c..ffb8d89 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -585,7 +585,7 @@ connection_runtime_config = [ type='list', undoc=True, choices=[ 'checkpoint_slow', 'lookaside_sweep_race', 'split_1', 'split_2', - 'split_3', 'split_4', 'split_5', 'split_6', 'split_7']), + 'split_3', 'split_4', 'split_5', 'split_6', 'split_7', 'split_8']), Config('verbose', '', r''' enable messages for various events. Options are given as a list, such as "verbose=[evictserver,read]"''', diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 02cceab..3a031b4 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -429,7 +429,8 @@ __cursor_key_order_check_row( WT_ERR(__wt_scr_alloc(session, 512, &b)); WT_PANIC_ERR(session, EINVAL, - "WT_CURSOR.%s out-of-order returns: returned key %s then key %s", + "WT_CURSOR.%s out-of-order returns: returned key %.1024s then " + "key %.1024s", next ? "next" : "prev", __wt_buf_set_printable_format(session, cbt->lastkey->data, cbt->lastkey->size, btree->key_format, a), diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 566157a..16e25c1 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -805,11 +805,13 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_PAGE_INDEX *pindex; WT_PAGE_MODIFY *mod; WT_SESSION_IMPL *session; + uint64_t split_gen; uint32_t entries; session = ds->session; page = ref->page; mod = page->modify; + split_gen = 0; WT_RET(ds->f(ds, "%p", (void *)ref)); @@ -818,6 +820,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno)); WT_INTL_INDEX_GET(session, page, pindex); entries = pindex->entries; + split_gen = page->pg_intl_split_gen; break; case WT_PAGE_COL_FIX: WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno)); @@ -830,6 +833,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) case WT_PAGE_ROW_INT: WT_INTL_INDEX_GET(session, page, pindex); entries = pindex->entries; + split_gen = page->pg_intl_split_gen; break; case WT_PAGE_ROW_LEAF: entries = page->entries; @@ -845,8 +849,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_RET(ds->f(ds, ", entries %" PRIu32, entries)); WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean")); - WT_RET(ds->f(ds, - ", memory_size %" WT_SIZET_FMT, page->memory_footprint)); if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) WT_RET(ds->f(ds, ", keys-built")); @@ -878,9 +880,12 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) break; WT_ILLEGAL_VALUE(session); } + if (split_gen != 0) + WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen)); if (mod != NULL) - WT_RET( - ds->f(ds, ", write generation=%" PRIu32, mod->write_gen)); + WT_RET(ds->f(ds, ", write-gen=%" PRIu32, mod->write_gen)); + WT_RET(ds->f(ds, + ", memory-size %" WT_SIZET_FMT, page->memory_footprint)); WT_RET(ds->f(ds, "\n")); return (0); diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 1749756..4f310b2 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -262,7 +262,7 @@ restart: /* * holding nothing on failure. */ descend: if ((ret = __wt_page_swap( - session, current, descent, false, flags)) == 0) { + session, current, descent, flags)) == 0) { current = descent; continue; } diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index dfc5621..a2386d9 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -176,44 +176,96 @@ __ref_ascend(WT_SESSION_IMPL *session, } /* - * __ref_initial_descent_prev -- - * Descend the tree one level, when setting up the initial cursor position - * for a previous-cursor walk. + * __split_prev_race -- + * Check for races when descending the tree during a previous-cursor walk. */ static inline bool -__ref_initial_descent_prev( +__split_prev_race( WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp) { WT_PAGE_INDEX *pindex; /* - * When splitting an internal page into its parent, we move the WT_REF - * structures and update the parent's page index before updating the - * split page's page index, and it's not an atomic update. A thread can - * read the parent page's replacement page index, then read the split - * page's original index, or the parent page's original and the split - * page's replacement. + * Handle a cursor moving backwards through the tree or setting up at + * the end of the tree. We're passed the child page into which we're + * descending, and the parent page's page-index we used to find that + * child page. * - * This isn't a problem for a cursor setting up at the start of the tree - * because we do right-hand splits on internal pages and the initial - * part of the split page's namespace won't change as part of a split. - * A thread reading the parent page's and split page's indexes will move - * to the same slot no matter what order of indexes are read. - * - * Handle a cursor setting up at the end of the tree. + * When splitting an internal page into its parent, we move the split + * pages WT_REF structures, then update the parent's page index, then + * update the split page's page index, and nothing is atomic. A thread + * can read the parent page's replacement page index and then the split + * page's original index, or vice-versa, and either change can cause a + * cursor moving backwards through the tree to skip pages. * - * We're passed a child page into which we're descending, and on which - * we have a hazard pointer. + * This isn't a problem for a cursor setting up at the start of the tree + * or moving forward through the tree because we do right-hand splits on + * internal pages and the initial part of the split page's namespace + * won't change as part of a split (in other words, a thread reading the + * parent page's and split page's indexes will move to the same slot no + * matter what order of indexes are read. * - * Acquire a page index for the child page and then confirm we haven't - * raced with a parent split. + * Acquire the child's page index, then confirm the parent's page index + * hasn't changed, to check for reading an old version of the parent's + * page index and then reading a new version of the child's page index. */ WT_INTL_INDEX_GET(session, ref->page, pindex); if (__wt_split_descent_race(session, ref, *pindexp)) - return (false); + return (true); + + /* + * That doesn't check if we read a new version of parent's page index + * and then an old version of the child's page index. For example, if + * a thread were in a newly created split page subtree, the split + * completes into the parent before the thread reads it and descends + * into the child (where the split hasn't yet completed). + * + * Imagine an internal page with 3 child pages, with the namespaces a-f, + * g-h and i-j; the first child page splits. The parent starts out with + * the following page-index: + * + * | ... | a | g | i | ... | + * + * The split page starts out with the following page-index: + * + * | a | b | c | d | e | f | + * + * The first step is to move the c-f ranges into a new subtree, so, for + * example we might have two new internal pages 'c' and 'e', where the + * new 'c' page references the c-d namespace and the new 'e' page + * references the e-f namespace. The top of the subtree references the + * parent page, but until the parent's page index is updated, threads in + * the subtree won't be able to ascend out of the subtree. However, once + * the parent page's page index is updated to this: + * + * | ... | a | c | e | g | i | ... | + * + * threads in the subtree can ascend into the parent. Imagine a cursor + * in the c-d part of the namespace that ascends to the parent's 'c' + * slot. It would then decrement to the slot before the 'c' slot, the + * 'a' slot. + * + * The previous-cursor movement selects the last slot in the 'a' page; + * if the split page's page-index hasn't been updated yet, it selects + * the 'f' slot, which is incorrect. Once the split page's page index is + * updated to this: + * + * | a | b | + * + * the previous-cursor movement will select the 'b' slot, which is + * correct. + * + * If the last slot on the page no longer points to the current page as + * its "home", the page is being split and part of its namespace moved, + * restart. (We probably don't have to restart, I think we could spin + * until the page-index is updated, but I'm not willing to debug that + * one if I'm wrong.) + */ + if (pindex->index[pindex->entries - 1]->home != ref->page) + return (true); *pindexp = pindex; - return (true); + return (false); } /* @@ -229,22 +281,21 @@ __tree_walk_internal(WT_SESSION_IMPL *session, WT_BTREE *btree; WT_DECL_RET; WT_PAGE_INDEX *pindex; - WT_REF *couple, *couple_orig, *ref; + WT_REF *couple, *ref, *ref_orig; uint64_t sleep_usecs, yield_count; uint32_t current_state, slot; - bool empty_internal, initial_descent, prev, skip; + bool empty_internal, prev, skip; btree = S2BT(session); pindex = NULL; sleep_usecs = yield_count = 0; - empty_internal = initial_descent = false; + empty_internal = false; /* - * Tree walks are special: they look inside page structures that splits - * may want to free. Publish that the tree is active during this - * window. + * We're not supposed to walk trees without root pages. As this has not + * always been the case, assert to debug that change. */ - WT_ENTER_PAGE_INDEX(session); + WT_ASSERT(session, btree->root.page != NULL); /* Check whether deleted pages can be skipped. */ if (!LF_ISSET(WT_READ_DELETED_SKIP)) @@ -284,36 +335,41 @@ __tree_walk_internal(WT_SESSION_IMPL *session, * new leaf, couple to the next page to which we're descending, it * saves a hazard-pointer swap for each cursor page movement. * - * !!! - * NOTE: we depend on the fact it's OK to release a page we don't hold, - * that is, it's OK to release couple when couple is set to NULL. - * - * Take a copy of any held page and clear the return value. Remember - * the hazard pointer we're currently holding. - * - * Clear the returned value, it makes future error handling easier. + * The hazard pointer on the original location is held until the end of + * the movement, in case we have to restart the movement. Take a copy + * of any held page and clear the return value (it makes future error + * handling easier). */ - couple = couple_orig = ref = *refp; + couple = NULL; + ref_orig = *refp; *refp = NULL; + /* + * Tree walks are special: they look inside page structures that splits + * may want to free. Publish the tree is active during this window. + */ + WT_ENTER_PAGE_INDEX(session); + /* If no page is active, begin a walk from the start/end of the tree. */ - if (ref == NULL) { -restart: /* - * We can be here with a NULL or root WT_REF; the page release - * function handles them internally, don't complicate this code - * by calling them out. - */ - WT_ERR(__wt_page_release(session, couple, flags)); + if ((ref = ref_orig) == NULL) { + if (0) { +restart: /* + * Yield before retrying, and if we've yielded enough + * times, start sleeping so we don't burn CPU to no + * purpose. + */ + __wt_spin_backoff(&yield_count, &sleep_usecs); - /* - * We're not supposed to walk trees without root pages. As this - * has not always been the case, assert to debug that change. - */ - WT_ASSERT(session, btree->root.page != NULL); + WT_ERR(__wt_page_release(session, couple, flags)); + couple = NULL; + } - couple = couple_orig = ref = &btree->root; - initial_descent = true; - goto descend; + if ((ref = ref_orig) == NULL) { + ref = &btree->root; + WT_INTL_INDEX_GET(session, ref->page, pindex); + slot = prev ? pindex->entries - 1 : 0; + goto descend; + } } /* @@ -340,12 +396,9 @@ restart: /* /* * If at the root and returning internal pages, return - * the root page, otherwise we're done. Regardless, no - * hazard pointer is required, release the one we hold. + * the root page, otherwise we're done. */ if (__wt_ref_is_root(ref)) { - WT_ERR(__wt_page_release( - session, couple, flags)); if (!LF_ISSET(WT_READ_SKIP_INTL)) *refp = ref; goto done; @@ -367,17 +420,18 @@ restart: /* * handle restart or not-found returns, it would require * additional complexity and is not a possible return: * we're moving to the parent of the current child page, - * the parent can't have been evicted. (This is why we - * don't pass "prev" to the page-swap function, we can't - * handle the restart error returned if the parent page - * is currently splitting.) + * the parent can't have been evicted. */ if (!LF_ISSET(WT_READ_SKIP_INTL)) { WT_ERR(__wt_page_swap( - session, couple, ref, false, flags)); + session, couple, ref, flags)); + couple = NULL; *refp = ref; goto done; } + + /* Encourage races. */ + __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_8); } if (prev) @@ -389,9 +443,9 @@ restart: /* ++*walkcntp; for (;;) { - /* - * Move to the next slot, and set the reference hint if - * it's wrong (used when we continue the walk). We don't +descend: /* + * Get a reference, setting the reference hint if it's + * wrong (used when we continue the walk). We don't * always update the hints when splitting, it's expected * for them to be incorrect in some workloads. */ @@ -452,12 +506,41 @@ restart: /* break; } - ret = __wt_page_swap(session, couple, ref, prev, + ret = __wt_page_swap(session, couple, ref, WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags); + if (ret == 0) { + /* Success, so "couple" has been released. */ + couple = NULL; + + /* Return leaf pages to our caller. */ + if (!WT_PAGE_IS_INTERNAL(ref->page)) { + *refp = ref; + goto done; + } + + /* Set the new "couple" value. */ + couple = ref; + + /* Configure traversal of any internal page. */ + empty_internal = true; + if (prev) { + if (__split_prev_race( + session, ref, &pindex)) + goto restart; + slot = pindex->entries - 1; + } else { + WT_INTL_INDEX_GET( + session, ref->page, pindex); + slot = 0; + } + continue; + } /* - * Not-found is an expected return when only walking + * Not-found is an expected return when walking only * in-cache pages, or if we see a deleted page. + * + * An expected error, so "couple" is unchanged. */ if (ret == WT_NOTFOUND) { WT_NOT_READ(ret, 0); @@ -466,94 +549,24 @@ restart: /* /* * The page we're moving to might have split, in which - * case move to the last position we held. - */ - if (ret == WT_RESTART) { - ret = 0; - - /* - * Yield before retrying, and if we've yielded - * enough times, start sleeping so we don't burn - * CPU to no purpose. - */ - __wt_spin_backoff( - &yield_count, &sleep_usecs); - - /* - * If a cursor is setting up at the end of the - * tree, we can't use our parent page's index, - * because it may have already split; restart - * the walk. - */ - if (prev && initial_descent) - goto restart; - - /* - * If a new walk that never coupled from the - * root to a new saved position in the tree, - * restart the walk. - */ - if (couple == &btree->root) - goto restart; - - /* - * If restarting from some original position, - * repeat the increment or decrement we made at - * that time. Otherwise, couple is an internal - * page we've acquired after moving from that - * starting position and we can treat it as a - * new page. This works because we never acquire - * a hazard pointer on a leaf page we're not - * going to return to our caller, this will quit - * working if that ever changes. - */ - WT_ASSERT(session, - couple == couple_orig || - WT_PAGE_IS_INTERNAL(couple->page)); - ref = couple; - __ref_index_slot(session, ref, &pindex, &slot); - if (couple == couple_orig) - break; - } - WT_ERR(ret); - couple = ref; - - /* - * A new page: configure for traversal of any internal - * page's children, else return the leaf page. + * case restart the movement. + * + * An expected error, so "couple" is unchanged. */ - if (WT_PAGE_IS_INTERNAL(ref->page)) { -descend: empty_internal = true; - - /* - * There's a split race when a cursor is setting - * up at the end of the tree. - */ - if (prev && initial_descent) { - if (!__ref_initial_descent_prev( - session, ref, &pindex)) - goto restart; - } else - WT_INTL_INDEX_GET( - session, ref->page, pindex); - slot = prev ? pindex->entries - 1 : 0; - continue; - } + if (ret == WT_RESTART) + goto restart; - /* - * The tree-walk restart code knows we return any leaf - * page we acquire (never hazard-pointer coupling on - * after acquiring a leaf page), and asserts no restart - * happens while holding a leaf page. This page must be - * returned to our caller. - */ - *refp = ref; - goto done; + /* Unexpected error, so "couple" was released. */ + couple = NULL; + goto err; } } done: -err: WT_LEAVE_PAGE_INDEX(session); +err: + WT_TRET(__wt_page_release(session, couple, flags)); + WT_TRET(__wt_page_release(session, ref_orig, flags)); + WT_LEAVE_PAGE_INDEX(session); return (ret); } diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index 8cc6630..f57e49c 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -192,7 +192,7 @@ descend: /* * holding nothing on failure. */ if ((ret = __wt_page_swap(session, - current, descent, false, WT_READ_RESTART_OK)) == 0) { + current, descent, WT_READ_RESTART_OK)) == 0) { current = descent; continue; } diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 20acda8..9b8e206 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -444,7 +444,7 @@ descend: /* * holding nothing on failure. */ if ((ret = __wt_page_swap(session, - current, descent, false, WT_READ_RESTART_OK)) == 0) { + current, descent, WT_READ_RESTART_OK)) == 0) { current = descent; continue; } diff --git a/src/config/config_def.c b/src/config/config_def.c index 7dee7a5..7451d3f 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -189,7 +189,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "timing_stress_for_test", "list", NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\"," "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," - "\"split_6\",\"split_7\"]", + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\"," @@ -879,7 +879,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "timing_stress_for_test", "list", NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\"," "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," - "\"split_6\",\"split_7\"]", + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0 }, { "transaction_sync", "category", NULL, NULL, @@ -982,7 +982,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "timing_stress_for_test", "list", NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\"," "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," - "\"split_6\",\"split_7\"]", + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0 }, { "transaction_sync", "category", NULL, NULL, @@ -1082,7 +1082,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "timing_stress_for_test", "list", NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\"," "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," - "\"split_6\",\"split_7\"]", + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0 }, { "transaction_sync", "category", NULL, NULL, @@ -1180,7 +1180,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "timing_stress_for_test", "list", NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\"," "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," - "\"split_6\",\"split_7\"]", + "\"split_6\",\"split_7\",\"split_8\"]", NULL, 0 }, { "transaction_sync", "category", NULL, NULL, diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index d322caa..a398d50 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -2024,6 +2024,7 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) { "split_5", WT_TIMING_STRESS_SPLIT_5 }, { "split_6", WT_TIMING_STRESS_SPLIT_6 }, { "split_7", WT_TIMING_STRESS_SPLIT_7 }, + { "split_8", WT_TIMING_STRESS_SPLIT_8 }, { NULL, 0 } }; WT_CONFIG_ITEM cval, sval; diff --git a/src/include/btree.i b/src/include/btree.i index 81c166e..d05fd4d 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1605,6 +1605,8 @@ __wt_split_descent_race( * update. A thread can read the parent page's original page index and * then read the split page's replacement index. * + * For example, imagine a search descending the tree. + * * Because internal page splits work by truncating the original page to * the initial part of the original page, the result of this race is we * will have a search key that points past the end of the current page. @@ -1649,73 +1651,17 @@ __wt_split_descent_race( * work by truncating the split page, so the split page search is for * content the split page retains after the split, and we ignore this * race. - */ - WT_INTL_INDEX_GET(session, ref->home, pindex); - return (pindex != saved_pindex); -} - -/* - * __wt_split_prev_race -- - * Return if we raced with an internal page split when moving backwards - * through the tree. - */ -static inline bool -__wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref) -{ - WT_PAGE_INDEX *pindex; - - /* - * There's a split race when a cursor moving backwards through the tree - * descends the tree. If we're splitting an internal page into its - * parent, we move the WT_REF structures and update the parent's page - * index before updating the split page's page index, and it's not an - * atomic update. A thread can read the parent and split page's original - * indexes during a split, or read the parent page's replacement page - * index and then read the split page's original index, either of which - * can lead to skipping pages. - * - * For example, imagine an internal page with 3 child pages, with the - * namespaces a-f, g-h and i-j; the first child page splits. The parent - * starts out with the following page-index: - * - * | ... | a | g | i | ... | - * - * The split page starts out with the following page-index: - * - * | a | b | c | d | e | f | - * - * The first step is to move the c-f ranges into a new subtree, so, for - * example we might have two new internal pages 'c' and 'e', where the - * new 'c' page references the c-d namespace and the new 'e' page - * references the e-f namespace. The top of the subtree references the - * parent page, but until the parent's page index is updated, threads in - * the subtree won't be able to ascend out of the subtree. However, once - * the parent page's page index is updated to this: - * - * | ... | a | c | e | g | i | ... | - * - * threads in the subtree can ascend into the parent. Imagine a cursor - * in the c-d part of the namespace that ascends to the parent's 'c' - * slot. It would then decrement to the slot before the 'c' slot, the - * 'a' slot. - * - * The previous-cursor movement selects the last slot in the 'a' page; - * if the split page's page-index hasn't been updated yet, it selects - * the 'f' slot, which is incorrect. Once the split page's page index is - * updated to this: * - * | a | b | - * - * the previous-cursor movement will select the 'b' slot, which is - * correct. + * This code is a general purpose check for a descent race and we call + * it in other cases, for example, a cursor traversing backwards through + * the tree. * - * This function takes an argument which is the internal page into which - * we're coupling. If the last slot on the page no longer points to - * the current page as its "home", the page is being split and part of - * its namespace moved, we have to restart. + * Presumably we acquired a page index on the child page before calling + * this code, don't re-order that acquisition with this check. */ - WT_INTL_INDEX_GET(session, ref->page, pindex); - return (pindex->index[pindex->entries - 1]->home != ref->page); + WT_BARRIER(); + WT_INTL_INDEX_GET(session, ref->home, pindex); + return (pindex != saved_pindex); } /* @@ -1724,8 +1670,8 @@ __wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref) * coupling up/down the tree. */ static inline int -__wt_page_swap_func(WT_SESSION_IMPL *session, - WT_REF *held, WT_REF *want, bool prev_race, uint32_t flags +__wt_page_swap_func( + WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags #ifdef HAVE_DIAGNOSTIC , const char *file, int line #endif @@ -1755,18 +1701,6 @@ __wt_page_swap_func(WT_SESSION_IMPL *session, ); /* - * We can race when descending into an internal page as part of moving - * backwards through the tree, and we have to detect that race before - * releasing the page from which we are coupling, else we can't restart - * the movement. - */ - if (ret == 0 && prev_race && WT_PAGE_IS_INTERNAL(want->page) && - __wt_split_prev_race(session, want)) { - ret = WT_RESTART; - WT_TRET(__wt_page_release(session, want, flags)); - } - - /* * Expected failures: page not found or restart. Our callers list the * errors they're expecting to handle. */ diff --git a/src/include/connection.h b/src/include/connection.h index d0bebe8..c67f4a9 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -458,6 +458,7 @@ struct __wt_connection_impl { #define WT_TIMING_STRESS_SPLIT_5 0x040u #define WT_TIMING_STRESS_SPLIT_6 0x080u #define WT_TIMING_STRESS_SPLIT_7 0x100u +#define WT_TIMING_STRESS_SPLIT_8 0x200u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint64_t timing_stress_flags; diff --git a/src/include/misc.h b/src/include/misc.h index 1507e2d..d76560d 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -294,16 +294,15 @@ typedef void wt_timestamp_t; __wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__) #define __wt_page_in(session, ref, flags) \ __wt_page_in_func(session, ref, flags, __func__, __LINE__) -#define __wt_page_swap(session, held, want, prev_race, flags) \ - __wt_page_swap_func( \ - session, held, want, prev_race, flags, __func__, __LINE__) +#define __wt_page_swap(session, held, want, flags) \ + __wt_page_swap_func(session, held, want, flags, __func__, __LINE__) #else #define __wt_scr_alloc(session, size, scratchp) \ __wt_scr_alloc_func(session, size, scratchp) #define __wt_page_in(session, ref, flags) \ __wt_page_in_func(session, ref, flags) -#define __wt_page_swap(session, held, want, prev_race, flags) \ - __wt_page_swap_func(session, held, want, prev_race, flags) +#define __wt_page_swap(session, held, want, flags) \ + __wt_page_swap_func(session, held, want, flags) #endif /* Called on unexpected code path: locate the failure. */ diff --git a/test/format/config.h b/test/format/config.h index 51dc906..e1b081a 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -366,6 +366,10 @@ static CONFIG c[] = { "configure slow splits (#7)", /* 2% */ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_7, NULL }, + { "timing_stress_split_8", + "configure slow splits (#8)", /* 2% */ + C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL }, + { "transaction_timestamps", /* 10% */ "enable transaction timestamp support", C_BOOL, 10, 0, 0, &g.c_txn_timestamps, NULL }, diff --git a/test/format/format.h b/test/format/format.h index 0eca665..83b75f7 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -221,6 +221,7 @@ typedef struct { uint32_t c_timing_stress_split_5; uint32_t c_timing_stress_split_6; uint32_t c_timing_stress_split_7; + uint32_t c_timing_stress_split_8; uint32_t c_truncate; uint32_t c_txn_freq; uint32_t c_txn_timestamps; diff --git a/test/format/wts.c b/test/format/wts.c index dd87ade..6452f74 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -262,6 +262,8 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) CONFIG_APPEND(p, ",split_6"); if (g.c_timing_stress_split_7) CONFIG_APPEND(p, ",split_7"); + if (g.c_timing_stress_split_8) + CONFIG_APPEND(p, ",split_8"); CONFIG_APPEND(p, "]"); /* Extensions. */