diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 53b68ae1c..65c562818 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -408,17 +408,18 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) * We no longer need the WT_PAGE_DELETED structure, all of its information should have been * transferred to the list of WT_UPDATE structures (if any). * - * Except when the tree is read-only; in a read-only tree, eviction will just discard the + * Except when the page is clean; in a read-only tree, eviction will just discard the * instantiated page instead of saving it, so instead of this being a permanent transition we * need to be able to regenerate the instantiated page arbitrarily many times. Note that keeping * the structure around would cause horrible things to happen in reconciliation if we ever * reached that code; but we won't. */ - if (!F_ISSET(btree, WT_BTREE_READONLY)) + if (page->modify->page_state != WT_PAGE_CLEAN) __wt_overwrite_and_free(session, ref->ft_info.del); if (update_list != NULL) ref->ft_info.update = update_list; + F_SET(ref, WT_REF_FLAG_WAS_DELETED); return (0); err: diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index e944952f6..aa5262fba 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -324,8 +324,12 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) if (ref->addr == NULL) { WT_WITH_PAGE_INDEX(session, ret = __evict_delete_ref(session, ref, flags)); WT_RET_BUSY_OK(ret); - } else - WT_REF_SET_STATE(ref, WT_REF_DISK); + } else { + if (F_ISSET(ref, WT_REF_FLAG_WAS_DELETED)) + WT_REF_SET_STATE(ref, WT_REF_DELETED); + else + WT_REF_SET_STATE(ref, WT_REF_DISK); + } return (0); } diff --git a/src/include/btmem.h b/src/include/btmem.h index 68c688729..ab47f499a 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -927,10 +927,11 @@ struct __wt_ref { * depending on it to be "!leaf" instead. */ /* AUTOMATIC FLAG VALUE GENERATION START 0 */ -#define WT_REF_FLAG_INTERNAL 0x1u /* Page is an internal page */ -#define WT_REF_FLAG_LEAF 0x2u /* Page is a leaf page */ -#define WT_REF_FLAG_READING 0x4u /* Page is being read in */ - /* AUTOMATIC FLAG VALUE GENERATION STOP 8 */ +#define WT_REF_FLAG_INTERNAL 0x1u /* Page is an internal page */ +#define WT_REF_FLAG_LEAF 0x2u /* Page is a leaf page */ +#define WT_REF_FLAG_READING 0x4u /* Page is being read in */ +#define WT_REF_FLAG_WAS_DELETED 0x8u /* Page was in deleted state */ + /* AUTOMATIC FLAG VALUE GENERATION STOP 8 */ uint8_t flags; #define WT_REF_DISK 0 /* Page is on disk */ diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index fad9e8fd3..196f47223 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -64,6 +64,14 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage /* It's an error to be called with a clean page. */ WT_ASSERT(session, __wt_page_is_modified(page)); + /* + * Free up the WT_PAGE_DELETED structure that was retained as part of the page instantiation. + */ + if (F_ISSET(ref, WT_REF_FLAG_WAS_DELETED)) { + F_CLR(ref, WT_REF_FLAG_WAS_DELETED); + __wt_overwrite_and_free(session, ref->ft_info.del); + } + /* * Reconciliation acquires and releases pages, and in rare cases that page release triggers * eviction. If the page is dirty, eviction can trigger reconciliation, and we re-enter this diff --git a/test/suite/test_truncate10.py b/test/suite/test_truncate10.py index 1270bd83d..e5761e4c9 100644 --- a/test/suite/test_truncate10.py +++ b/test/suite/test_truncate10.py @@ -117,6 +117,18 @@ class test_truncate10(wttest.WiredTigerTestCase): self.session.rollback_transaction() cursor.close() + def evict_cursor(self, uri, ds, nrows, ts): + s = self.conn.open_session() + s.begin_transaction('read_timestamp=' + self.timestamp_str(ts)) + # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. + evict_cursor = s.open_cursor(uri, None, "debug=(release_evict)") + for i in range(1, nrows + 1): + evict_cursor.set_key(ds.key(i)) + evict_cursor.search() + evict_cursor.reset() + s.rollback_transaction() + evict_cursor.close() + def test_truncate10(self): nrows = 10000 @@ -178,17 +190,21 @@ class test_truncate10(wttest.WiredTigerTestCase): # At time 10 we should see all value_a. self.check(ds.uri, ds.key, nrows, 0, value_a, 10) + self.evict_cursor(ds.uri, ds, nrows, 10) # At time 20 we should still see all value_a. self.check(ds.uri, ds.key, nrows, 0, value_a, 20) + self.evict_cursor(ds.uri, ds, nrows, 20) # At time 25 we should still see half value_a, and for FLCS, half zeros. # (Note that reading between commit and durable can be problematic, but for # now at least it remains permitted.) self.check(ds.uri, ds.key, nrows // 2, nrows // 2, value_a, 25) + self.evict_cursor(ds.uri, ds, nrows // 2, 25) # At time 30 we should also see half value_a, and for FLCS, half zeros. self.check(ds.uri, ds.key, nrows // 2, nrows // 2, value_a, 30) + self.evict_cursor(ds.uri, ds, nrows // 2, 30) # Move the stable timestamp forward before exiting so we don't waste time rolling # back the changes during shutdown.