Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-7053

Fix race that could result in eviction and checkpoint deadlocking

    • 5
    • 2023-05-30 - 7.0 Readiness

      In the testing of migrating the old history store cursor to the new history store cursor, we found test_wt4333_handle_locks is failing with checkpoint blocked by waiting the eviction gen to drain. While one eviction thread is blocked by opening a hs dhandle.

      #0  0x00007f07609b1fb9 in futex_reltimed_wait_cancelable (private=<optimized out>, reltime=0x7f0737ffdac0, expected=0, futex_word=0x22bfadc) at ../sysdeps/unix/sysv/linux/futex-internal.h:142
      #1  __pthread_cond_wait_common (abstime=0x7f0737ffdb80, mutex=0x22bfa88, cond=0x22bfab0) at pthread_cond_wait.c:533
      #2  __pthread_cond_timedwait (cond=cond@entry=0x22bfab0, mutex=mutex@entry=0x22bfa88, abstime=abstime@entry=0x7f0737ffdb80) at pthread_cond_wait.c:667
      #3  0x000000000048fcb2 in __wt_cond_wait_signal () at ../src/os_posix/os_mtx_cond.c:115
      #4  0x00000000004c5f4e in __wt_cond_wait (run_func=0x0, usecs=10000, cond=<optimized out>, session=0x7f07613ba770) at ../src/include/misc_inline.h:19
      #5  __wt_readlock () at ../src/support/mtx_rw.c:219
      #6  0x00000000004c12e2 in __session_find_shared_dhandle (checkpoint=0x0, uri=0x61aa78 "file:WiredTigerHS.wt", session=0x7f07613ba770) at ../src/session/session_dhandle.c:408
      #7  __session_get_dhandle (checkpoint=0x0, uri=0x61aa78 "file:WiredTigerHS.wt", session=0x7f07613ba770) at ../src/session/session_dhandle.c:445
      #8  __wt_session_get_dhandle () at ../src/session/session_dhandle.c:474
      #9  0x00000000004c193e in __wt_session_get_btree_ckpt (session=session@entry=0x7f07613ba770, uri=uri@entry=0x61aa78 "file:WiredTigerHS.wt", cfg=cfg@entry=0x7f0737ffded0, flags=0) at ../src/session/session_dhandle.c:319
      #10 0x0000000000456080 in __wt_curfile_open () at ../src/cursor/cur_file.c:801
      #11 0x00000000004bbf2f in __session_open_cursor_int () at ../src/session/session_api.c:479
      #12 0x00000000004bbc07 in __wt_open_cursor (session=session@entry=0x7f07613ba770, uri=uri@entry=0x61aa78 "file:WiredTigerHS.wt", owner=owner@entry=0x0, cfg=cfg@entry=0x7f0737ffded0,
          cursorp=cursorp@entry=0x7f0737ffdec8) at ../src/session/session_api.c:543
      #13 0x00000000005992e5 in __hs_cursor_open_int (cursorp=0x7f072447aca8, session=0x7f07613ba770) at ../src/cursor/cur_hs.c:26
      #14 __wt_curhs_open (session=session@entry=0x7f07613ba770, owner=owner@entry=0x0, cursorp=cursorp@entry=0x7f0737ffdf70) at ../src/cursor/cur_hs.c:1015
      #15 0x00000000005bf00f in __wt_hs_delete_key_from_ts () at ../src/history/hs_rec.c:652
      #16 0x00000000005bfea1 in __wt_hs_insert_updates () at ../src/history/hs_rec.c:458
      #17 0x000000000049fff6 in __rec_hs_wrapup (r=0x7f0724b100e0, session=0x7f07613ba770) at ../src/reconcile/rec_write.c:2302
      #18 __rec_write_wrapup () at ../src/reconcile/rec_write.c:2127
      #19 0x00000000004a1ffe in __reconcile (page_lockedp=<synthetic pointer>, flags=560, salvage=<optimized out>, ref=0x7f06e4105ce0, session=0x7f07613ba770) at ../src/reconcile/rec_write.c:214
      #20 __wt_reconcile () at ../src/reconcile/rec_write.c:99
      #21 0x000000000046ddef in __evict_review (inmem_splitp=<synthetic pointer>, evict_flags=0, ref=0x7f06e4105ce0, session=0x7f07613ba770) at ../src/evict/evict_page.c:709
      #22 __wt_evict () at ../src/evict/evict_page.c:160
      #23 0x00000000004653c3 in __evict_page () at ../src/evict/evict_lru.c:2280
      #24 0x000000000046696e in __wt_cache_eviction_worker () at ../src/evict/evict_lru.c:2386
      #25 0x00000000004d84fa in __wt_cache_eviction_check (busy=false, readonly=false, didworkp=0x0, didworkp=0x0, readonly=false, busy=false, session=0x7f07613ba770) at ../src/include/cache_inline.h:512
      #26 __wt_txn_commit () at ../src/txn/txn.c:1653
      #27 0x000000000045209a in __curfile_insert () at ../src/cursor/cur_file.c:282
      #28 0x00000000004324b9 in op (session=<optimized out>, rnd=rnd@entry=0x7f0737ffee80, cpp=cpp@entry=0x7f0737ffeea0) at ../../../test/csuite/wt4333_handle_locks/main.c:133
      #29 0x000000000043277b in wthread (arg=<optimized out>) at ../../../test/csuite/wt4333_handle_locks/main.c:166
      #30 0x00007f07609ab6db in start_thread (arg=0x7f0737fff700) at pthread_create.c:463
      #31 0x00007f07602af71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
      
      #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
      #1  0x00007f07601ce921 in __GI_abort () at abort.c:79
      #2  0x000000000041076d in __wt_abort (session=session@entry=0x7f07613b87f0) at ../src/os_common/os_abort.c:30
      #3  0x00000000004189d4 in __wt_gen_drain (session=0x7f07613b87f0, which=2, generation=1639) at ../src/support/generation.c:169
      #4  0x00000000004c3111 in __wt_gen_next_drain (session=session@entry=0x7f07613b87f0, which=which@entry=2) at ../src/support/generation.c:97
      #5  0x0000000000561423 in __wt_sync_file () at ../src/btree/bt_sync.c:543
      #6  0x00000000004dbf33 in __checkpoint_tree.constprop.11 () at ../src/txn/txn_ckpt.c:1672
      #7  0x00000000004dc3b0 in __wt_checkpoint () at ../src/txn/txn_ckpt.c:1832
      #8  0x0000000000484c1c in __wt_meta_track_off () at ../src/meta/meta_track.c:287
      #9  0x00000000004de8e4 in __wt_checkpoint_close () at ../src/txn/txn_ckpt.c:1912
      #10 0x0000000000440cf3 in __wt_conn_dhandle_close () at ../src/conn/conn_dhandle.c:342
      #11 0x00000000004410d6 in __conn_dhandle_close_one (session=0x7f07613bcae0, uri=<optimized out>, checkpoint=<optimized out>, removed=<optimized out>, mark_dead=<optimized out>) at ../src/conn/conn_dhandle.c:707
      #12 0x0000000000442047 in __wt_conn_dhandle_close_all () at ../src/conn/conn_dhandle.c:746
      #13 0x00000000004adb63 in __wt_exclusive_handle_operation () at ../src/schema/schema_worker.c:26
      #14 0x00000000004ade9f in __wt_schema_worker.localalias.0 () at ../src/schema/schema_worker.c:70
      #15 0x00000000004ae063 in __wt_schema_worker.localalias.0 () at ../src/schema/schema_worker.c:100
      #16 0x000000000041590b in __session_verify (wt_session=0x7f07613bcae0, uri=0x232af30 "table:466", config=0x0) at ../src/session/session_api.c:1475
      #17 0x0000000000432679 in vthread (arg=<optimized out>) at ../../../test/csuite/wt4333_handle_locks/main.c:195
      #18 0x00007f07609ab6db in start_thread (arg=0x7f07237fe700) at pthread_create.c:463
      #19 0x00007f07602af71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
      (gdb) f 4
      #4  0x00000000004c3111 in __wt_gen_next_drain (session=session@entry=0x7f07613b87f0, which=which@entry=2) at ../src/support/generation.c:97
      97	../src/support/generation.c: No such file or directory.
      (gdb) p session->dhandle
      $2 = (WT_DATA_HANDLE *) 0x22df1e0
      (gdb) p *session->dhandle
      $3 = {rwlock = {u = {v = 81604378881, s = {current = 1 '\001', next = 1 '\001', reader = 0 '\000', readers_queued = 0 '\000', readers_active = 19}}, stat_read_count_off = -1, stat_write_count_off = -1,
          stat_app_usecs_off = -1, stat_int_usecs_off = -1, stat_session_usecs_off = 0, cond_readers = 0x22df660, cond_writers = 0x22e0970}, q = {tqe_next = 0x0, tqe_prev = 0x22f40a8}, hashq = {tqe_next = 0x0,
          tqe_prev = 0x40d00b8}, name = 0x22c98d0 "file:WiredTiger.wt", name_hash = 1045034099109282882, checkpoint = 0x0, cfg = 0x22c82a0,
        meta_base = 0x22e3190 "access_pattern_hint=none,allocation_size=4KB,app_metadata=,assert=(read_timestamp=none,write_timestamp=off),block_allocation=best,block_compressor=,cache_resident=false,checksum=uncompressed,collator="..., session_ref = 19, session_inuse = 19, excl_ref = 0, timeofdeath = 0, excl_session = 0x0, dsrc = 0x0, handle = 0x22df3a0, type = WT_DHANDLE_TYPE_BTREE, compact_skip = false, close_lock = {lock = {__data = {
              __lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0},
          name = 0x61a987 "data handle close", stat_count_off = -1, stat_app_usecs_off = -1, stat_int_usecs_off = -1, stat_session_usecs_off = 0, initialized = 1 '\001'}, stats = {0x22e34b0, 0x22e3b18, 0x22e4180, 0x22e47e8,
          0x22e4e50, 0x22e54b8, 0x22e5b20, 0x22e6188, 0x22e67f0, 0x22e6e58, 0x22e74c0, 0x22e7b28, 0x22e8190, 0x22e87f8, 0x22e8e60, 0x22e94c8, 0x22e9b30, 0x22ea198, 0x22ea800, 0x22eae68, 0x22eb4d0, 0x22ebb38, 0x22ec1a0},
        stat_array = 0x22e34b0, flags = 160, ts_flags = 0}
      (gdb)
      

      The failure signature is the same as WT-5785. We attempted to solve it by caching the history store cursor in session before each eviction in WT-5785. However, it is not perfect as there is a chance that the cached hs cursor might be swept away by __session_dhandle_sweep.

      This is likely what is happening in this failure. Here's a stack that I found the hs cursor might be swept away:

      #0  0x00007fc25329ffb7 in raise () from /lib/x86_64-linux-gnu/libc.so.6
      #1  0x00007fc2532a1921 in abort () from /lib/x86_64-linux-gnu/libc.so.6
      #2  0x00000000004107bd in __wt_abort (session=session@entry=0x297e800) at ../src/os_common/os_abort.c:30
      #3  0x000000000041807b in __session_dhandle_sweep (session=0x297e800) at ../src/session/session_dhandle.c:395
      #4  __session_get_dhandle (checkpoint=0x0, uri=0x2a24620 "table:49", session=0x297e800) at ../src/session/session_dhandle.c:443
      #5  __wt_session_get_dhandle (session=session@entry=0x297e800, uri=uri@entry=0x2a24620 "table:49", checkpoint=checkpoint@entry=0x0, cfg=cfg@entry=0x0, flags=flags@entry=0) at ../src/session/session_dhandle.c:478
      #6  0x00000000004a8221 in __wt_schema_get_table_uri (session=session@entry=0x297e800, uri=uri@entry=0x2a24620 "table:49", ok_incomplete=ok_incomplete@entry=false, flags=flags@entry=0,
          tablep=tablep@entry=0x7fc1f47f7c28) at ../src/schema/schema_list.c:27
      #7  0x00000000005ba7fa in __wt_curtable_open (session=session@entry=0x297e800, uri=<optimized out>, uri@entry=0x2a24620 "table:49", owner=owner@entry=0x0, cfg=cfg@entry=0x7fc1f47f7d40,
          cursorp=cursorp@entry=0x7fc1f47f7d30) at ../src/cursor/cur_table.c:970
      #8  0x00000000004bbd23 in __session_open_cursor_int (session=0x297e800, uri=<optimized out>, owner=0x0, other=<optimized out>, cfg=0x7fc1f47f7d40, cursorp=0x7fc1f47f7d30) at ../src/session/session_api.c:447
      #9  0x00000000004bc685 in __session_open_cursor (wt_session=0x297e800, uri=<optimized out>, to_dup=0x0, config=<optimized out>, cursorp=0x7fc1f47f7db8) at ../src/session/session_api.c:563
      #10 0x0000000000432456 in op (session=0x297e800, rnd=rnd@entry=0x7fc1f47f7e80, cpp=cpp@entry=0x7fc1f47f7eb0) at ../../../test/csuite/wt4333_handle_locks/main.c:115
      #11 0x00000000004327fb in wthread (arg=<optimized out>) at ../../../test/csuite/wt4333_handle_locks/main.c:166
      #12 0x00007fc253a7e6db in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
      #13 0x00007fc25338271f in clone () from /lib/x86_64-linux-gnu/libc.so.6
      

      I managed to reproduce the issue on an evergreen spawn host. Disabling dhandle sweep prevented the problem from reproducing.

            Assignee:
            alexander.gorrod@mongodb.com Alexander Gorrod
            Reporter:
            chenhao.qu@mongodb.com Chenhao Qu
            Votes:
            0 Vote for this issue
            Watchers:
            11 Start watching this issue

              Created:
              Updated:
              Resolved: