Fix disagg split rewrite may change the ref address even we fail

XMLWordPrintableJSON

    • Storage Engines - Transactions
    • SE Transactions - 2026-01-16
    • 3

          case WT_PM_REC_REPLACE:
              /*
               * 1-for-1 page swap: Update the parent to reference the replacement page.
               *
               * It's possible to see an empty disk address if the previous reconciliation skipped writing
               * the page.
               */
              if (mod->mod_replace.block_cookie != NULL) {
                  WT_RET(__wt_calloc_one(session, &addr));
                  *addr = mod->mod_replace;
                  mod->mod_replace.block_cookie = NULL;
                  mod->mod_replace.block_cookie_size = 0;
                  __wt_tsan_suppress_store_wt_addr_ptr(&ref->addr, addr);
              } else
                  WT_ASSERT(session, F_ISSET(S2BT(session), WT_BTREE_DISAGGREGATED) && ref->addr != NULL);
      
              /*
               * Eviction wants to keep this page if we have a disk image, re-instantiate the page in
               * memory, else discard the page.
               */
              if (mod->mod_disk_image == NULL) {
                  __wt_page_modify_clear(session, ref->page);
                  __wt_ref_out(session, ref);
                  WT_REF_SET_STATE(ref, WT_REF_DISK);
              } else {
                  /* The split code works with WT_MULTI structures, build one for the disk image. */
                  memset(&multi, 0, sizeof(multi));
                  multi.disk_image = mod->mod_disk_image;
                  if (ref->page->disagg_info != NULL) {
                      WT_RET(__wt_calloc_one(session, &multi.block_meta));
                      *multi.block_meta = ref->page->disagg_info->block_meta;
                  }
                  WT_ASSERT(session, mod->mod_replace.block_cookie == NULL);
                  /*
                   * Store the disk image to a temporary pointer in case we fail to rewrite the page and
                   * we need to link the new disk image back to the old disk image.
                   */
                  tmp = mod->mod_disk_image;
                  mod->mod_disk_image = NULL;
                  ret = __wt_split_rewrite(session, ref, &multi, true);
                  __wt_free(session, multi.block_meta);
                  if (ret != 0) {
                      mod->mod_disk_image = tmp;
                      return (ret);
                  }
              }
      
              break;
      

      When doing a page rewrite for eviction, we change the ref address and ref pages in two steps. This may lead to us successfully changing the ref address but fail to update the ref page. We should ensure these values are always updated together.

            Assignee:
            Chenhao Qu
            Reporter:
            Chenhao Qu
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: