diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 74c66d730..b087f2189 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -267,7 +267,7 @@ __verify_key_hs(WT_SESSION_IMPL *session, WT_ITEM *key, WT_CELL_UNPACK *unpack, hs_btree_id = btree->id; /* Set the data store timestamp and transactions to initiate timestamp range verification */ prev_start.timestamp = unpack->start_ts; - prev_start.txnid = unpack->start_txn; + prev_start.txnid = WT_TXN_MAX; session_flags = 0; stop.timestamp = 0; stop.txnid = 0; @@ -604,7 +604,7 @@ __verify_btree_id_with_meta(WT_SESSION_IMPL *session, uint32_t btree_id, const c const char *meta_value; meta_cursor = NULL; - WT_RET(__wt_metadata_cursor(session, &meta_cursor)); + WT_ERR(__wt_metadata_cursor(session, &meta_cursor)); while ((ret = meta_cursor->next(meta_cursor)) == 0) { WT_ERR(meta_cursor->get_value(meta_cursor, &meta_value)); if ((ret = __wt_config_getones(session, meta_value, "id", &id)) == 0 && @@ -626,36 +626,64 @@ err: * storing the previous key. */ int -__wt_verify_history_store_tree(WT_SESSION_IMPL *session) +__wt_verify_history_store_tree(WT_SESSION_IMPL *session, const char* uri) { WT_CURSOR *cursor, *data_cursor; + WT_CURSOR_BTREE *cbt; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_ITEM hs_key, prev_hs_key; WT_TIME_PAIR hs_start, hs_stop; - uint32_t btree_id, session_flags, prev_btree_id; + uint32_t session_flags, curr_btree_id, prev_btree_id; int cmp; - const char *uri; + int exact; + uint32_t btree_id; + bool early_exit; session_flags = 0; + exact = 0; prev_btree_id = 0; /* [-Wconditional-uninitialized] */ + curr_btree_id = 0; /* [-Wconditional-uninitialized] */ + btree_id = 0; data_cursor = NULL; - uri = NULL; + early_exit = false; WT_CLEAR(prev_hs_key); WT_CLEAR(hs_key); WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_hs_cursor(session, &session_flags)); cursor = session->hs_cursor; + if (uri != NULL) { + early_exit = true; + /* Open cursor to the data store and get the btree id */ + WT_ERR(__wt_open_cursor(session, uri, NULL, NULL, &data_cursor)); + cbt = (WT_CURSOR_BTREE *)data_cursor; + btree_id = cbt->btree->id; + + /* Position the history store cursor */ + cursor->set_key(cursor, btree_id, &hs_key, 0, 0, 0, 0); + WT_ERR(cursor->search_near(cursor, &exact)); + if (exact < 0) { + WT_ERR(cursor->next(cursor)); + cursor->get_key(cursor, &curr_btree_id, &hs_key, 0, 0, 0, 0); + if (curr_btree_id > btree_id) { + goto done; /* btree id does not exist */ + } + } + WT_ERR_NOTFOUND_OK(cursor->prev(cursor)); /* while loop will iterate cursor */ + prev_btree_id = curr_btree_id; + } while ((ret = cursor->next(cursor)) == 0) { - WT_ERR(cursor->get_key(cursor, &btree_id, &hs_key, &hs_start.timestamp, &hs_start.txnid, + WT_ERR(cursor->get_key(cursor, &curr_btree_id, &hs_key, &hs_start.timestamp, &hs_start.txnid, &hs_stop.timestamp, &hs_stop.txnid)); /* * Keep track of the previous comparison. The history store is stored in order, so we can * avoid redundant comparisons. Previous btree ID isn't set, until data cursor is open. */ - if (data_cursor == NULL || (prev_btree_id != btree_id)) { + if (data_cursor == NULL || (prev_btree_id != curr_btree_id)) { + if (early_exit && (curr_btree_id != btree_id)) + break; /* * Find the URI from the metadata and validate the btree ID. Using this URI, verify the * history store key with the data store. @@ -665,8 +693,10 @@ __wt_verify_history_store_tree(WT_SESSION_IMPL *session) /* Setting data_cursor to null, to avoid double free */ data_cursor = NULL; } - WT_ERR(__verify_btree_id_with_meta(session, btree_id, &uri)); - WT_ERR(__wt_open_cursor(session, uri, NULL, NULL, &data_cursor)); + if (!early_exit) { + WT_ERR(__verify_btree_id_with_meta(session, curr_btree_id, &uri)); + WT_ERR(__wt_open_cursor(session, uri, NULL, NULL, &data_cursor)); + } F_SET(data_cursor, WT_CURSOR_RAW_OK); } else { WT_ERR(__wt_compare(session, NULL, &hs_key, &prev_hs_key, &cmp)); @@ -674,7 +704,7 @@ __wt_verify_history_store_tree(WT_SESSION_IMPL *session) continue; } WT_ERR(__wt_buf_set(session, &prev_hs_key, hs_key.data, hs_key.size)); - prev_btree_id = btree_id; + prev_btree_id = curr_btree_id; data_cursor->set_key(data_cursor, &hs_key); ret = data_cursor->search(data_cursor); @@ -687,6 +717,8 @@ __wt_verify_history_store_tree(WT_SESSION_IMPL *session) WT_ERR(ret); } + WT_ERR_NOTFOUND_OK(ret); +done: err: if (data_cursor != NULL) data_cursor->close(data_cursor); diff --git a/src/include/extern.h b/src/include/extern.h index 3bf75a242..1cbdf4f2a 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1548,7 +1548,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, WT_ADDR *addr, bool empty_page_ok) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verify_history_store_tree(WT_SESSION_IMPL *session) +extern int __wt_verify_history_store_tree(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/txn.i b/src/include/txn.i index 355cb291b..9e7208cf7 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -883,9 +883,8 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, WT * * Return null not tombstone if nothing is found in history store. */ - WT_ASSERT(session, (*updp) == NULL || + WT_ASSERT(session, (*updp) == NULL || ((*updp)->type != WT_UPDATE_BIRTHMARK && (*updp)->type != WT_UPDATE_TOMBSTONE)); - /* * FIXME-PM-1521: We call transaction read in a lot of places so we can't do this yet. When we * refactor this function to return a byte array, we should tackle this at the same time. diff --git a/src/session/session_api.c b/src/session/session_api.c index f3a064cb3..881f6a838 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1580,11 +1580,13 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config) WT_ERR(__wt_config_gets(session, cfg, "hs_verify", &cval)); if (cval.val == true) { WT_WITH_CHECKPOINT_LOCK( - session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_verify_history_store_tree(session))); + session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_verify_history_store_tree(session, NULL))); } else { WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_verify, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY))); + WT_WITH_CHECKPOINT_LOCK( + session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_verify_history_store_tree(session, uri))); } err: if (ret != 0)