-
Type:
Bug
-
Resolution: Done
-
Priority:
Major - P3
-
Affects Version/s: None
-
Component/s: None
-
Storage - Ra 2022-04-04, Storage - Ra 2022-04-18
-
8
While working on WT-8001, haribabu.kommi and I thought to check if the global durable_timestamp moves back while we update it's value during a transaction_commit.
I made this change on top of change for WT-8001:
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 30fdebef6..11bcfe558 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -1572,6 +1572,37 @@ __txn_mod_compare(const void *a, const void *b)
return (aopt->u.op_col.recno < bopt->u.op_col.recno);
}
+/*
+ * __get_all_durable_ts --
+ * blah blah
+ */
+static wt_timestamp_t
+__get_all_durable_ts(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_SHARED *s;
+ wt_timestamp_t ts, tmpts;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ ts = txn_global->durable_timestamp;
+ __wt_readlock(session, &txn_global->rwlock);
+
+ /* Walk the array of concurrent transactions. */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_ORDERED_READ(tmpts, s->pinned_durable_timestamp);
+ if (tmpts != WT_TS_NONE && --tmpts < ts)
+ ts = tmpts;
+ }
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ return (ts);
+}
+
/*
* __wt_txn_commit --
* Commit the current transaction.
@@ -1588,6 +1619,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_OP *op;
WT_UPDATE *upd;
wt_timestamp_t candidate_durable_timestamp, prev_durable_timestamp;
+ wt_timestamp_t ts_prev, ts_after;
uint32_t fileid;
uint8_t previous_state;
u_int i, ft_resolution;
@@ -1847,6 +1879,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
update_durable_ts = candidate_durable_timestamp > prev_durable_timestamp;
}
+ ts_prev = __get_all_durable_ts(session);
+
/*
* If it looks like we'll need to move the global durable timestamp, attempt atomic cas and
* re-check.
@@ -1861,6 +1895,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
prev_durable_timestamp = txn_global->durable_timestamp;
}
+ ts_after = __get_all_durable_ts(session);
+ if (ts_after < ts_prev)
+ WT_ERR_PANIC(session, WT_PANIC,
+ "All durable timestamp moved backwards from %"PRIu64" to %"PRIu64".", ts_prev, ts_after);
+
+
/*
* We're between transactions, if we need to block for eviction, it's a good time to do so. Note
* that we must ignore any error return because the user's data is committed.
I already see that though python tests pass, test/checkpoint/smoke.sh fails:
[1631171089:109686][7063:0x7efcf77fe700], t, WT_SESSION.commit_transaction: __wt_txn_commit, 1901: All durable timestamp moved backwards from 5 to 4.: WT_PANIC: WiredTiger library panic [1631171089:109715][7063:0x7efcf77fe700], t, WT_SESSION.commit_transaction: __wt_txn_commit, 1901: the process must exit and restart: WT_PANIC: WiredTiger library panic [1631171089:109729][7063:0x7efcf77fe700], t, WT_SESSION.commit_transaction: __wt_abort, 28: aborting WiredTiger library Aborted (core dumped)
It will worthwhile running full WT test suite and stress test to investigate when and why global durable moves back in our testing.
- related to
-
WT-9066 format uses all_durable to set the stable timestamp
-
- Backlog
-