Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-4299

Segfault in test/format stress test

    • Type: Icon: Improvement Improvement
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • Labels:
      None
    • Storage Engines 2018-09-10

      There was a Jenkins test failure:

      t: process 16794
         1: table, row-store (16 seconds)
      ../../../test/format/smoke.sh: line 10: 22777 Segmentation fault      (core dumped) $TEST_WRAPPER ./t $args file_type=row data_source=lsm 
      

      Despite the above message, the test wasn't running using LSM. The failure didn't reproduce quickly for me locally.

      Some interesting GDB output:

      Thread 1 (Thread 0x7ff73f2d0700 (LWP 25331)):
      #0  0x00007ff7427350f6 in __wt_ref_info (ref=0x264c380, addrp=0x7ff73f2cd6a0, 
          sizep=0x7ff73f2cd6b0, typep=0x0) at ../src/include/btree.i:1109
      #1  0x00007ff742736cc2 in __page_read (session=0x7ff742cf24f0, ref=0x264c380, flags=3234)
          at ../src/btree/bt_read.c:468
      #2  0x00007ff74273746e in __wt_page_in_func (session=0x7ff742cf24f0, ref=0x264c380, 
          flags=3234, func=0x7ff74289b9c0 <__func__.33543> "__tree_walk_internal", line=527)
          at ../src/btree/bt_read.c:652
      #3  0x00007ff7427581c0 in __wt_page_swap_func (session=0x7ff742cf24f0, held=0x0, 
          want=0x264c380, flags=3234, func=0x7ff74289b9c0 <__func__.33543> "__tree_walk_internal", 
          line=527) at ../src/include/btree.i:1692
      
      (gdb) p *ref
      $1 = {page = 0x0, home = 0x2800fa0, pindex_hint = 59, state = 3, addr = 0x3f71811da00, key = {
          recno = 55834577657, ikey = 0xd00000af9}, page_del = 0x7ff728072860, page_las = 0x0}
      (gdb) p *addrp
      $2 = (const uint8_t *) 0x1007ff73f2cd6d0 <error: Cannot access memory at address 0x1007ff73f2cd6d0>
      (gdb) p addrp
      $3 = (const uint8_t **) 0x7ff73f2cd6a0
      (gdb) up
      #1  0x00007ff742736cc2 in __page_read (session=0x7ff742cf24f0, ref=0x264c380, flags=3234)
          at ../src/btree/bt_read.c:468
      468		__wt_ref_info(ref, &addr, &addr_size, NULL);
      (gdb) p previous_state
      $4 = 1
      

      It's dealing with a WT_REF_DELETED ref. That points towards WT-4259, but WT-4243 was also merged recently and it played around with ref states as well.

      All the call stacks from the failure:

      (gdb) thread apply all bt 
      
      Thread 11 (Thread 0x7ff73fad1700 (LWP 25332)):
      #0  0x00007ff7420ae730 in futex_wait (private=<optimized out>, expected=17424, 
          futex_word=0x61e7dc <g+8412>) at ../sysdeps/unix/sysv/linux/futex-internal.h:61
      #1  futex_wait_simple (private=<optimized out>, expected=17424, futex_word=0x61e7dc <g+8412>)
          at ../sysdeps/nptl/futex-internal.h:135
      #2  __pthread_rwlock_wrlock_slow (rwlock=0x61e7d0 <g+8400>) at pthread_rwlock_wrlock.c:67
      #3  0x0000000000406d91 in begin_transaction (tinfo=0x2870e10, session=0x7ff742cf2958, 
          iso_configp=0x7ff73faceacc) at ../../../test/format/ops.c:548
      #4  0x0000000000407678 in ops (arg=0x2870e10) at ../../../test/format/ops.c:784
      #5  0x00007ff7420a96ba in start_thread (arg=0x7ff73fad1700) at pthread_create.c:333
      #6  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 10 (Thread 0x7ff73dacd700 (LWP 25336)):
      #0  0x00007ff741dd55d3 in select () at ../sysdeps/unix/syscall-template.S:84
      #1  0x00007ff74281fb2a in __wt_sleep (seconds=0, micro_seconds=1500000)
          at ../src/os_posix/os_sleep.c:32
      #2  0x000000000040c2d5 in random_sleep (rnd=0x61e7c8 <g+8392>, max_seconds=15)
          at ../../../test/format/format.h:411
      #3  0x000000000040da26 in timestamp (arg=0x25f3240) at ../../../test/format/util.c:615
      #4  0x00007ff7420a96ba in start_thread (arg=0x7ff73dacd700) at pthread_create.c:333
      #5  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 9 (Thread 0x7ff73e2ce700 (LWP 25335)):
      #0  0x00007ff741dd55d3 in select () at ../sysdeps/unix/syscall-template.S:84
      #1  0x00007ff74281fb2a in __wt_sleep (seconds=1, micro_seconds=0)
          at ../src/os_posix/os_sleep.c:32
      #2  0x000000000040d6a4 in checkpoint (arg=0x0) at ../../../test/format/util.c:524
      #3  0x00007ff7420a96ba in start_thread (arg=0x7ff73e2ce700) at pthread_create.c:333
      #4  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 8 (Thread 0x7ff741ad5700 (LWP 23048)):
      #0  0x00007ff74286c9dd in __wt_huffman_decode (session=0x7ff742cf1350, huffman_arg=0x2585fe0, 
          from_arg=0x7ff718199e28 "G\217\036<x\366\211\343ǀ=\275\276\n\320nQ+\327\001\233\367'\033\265\264.\350_\312\033\233\261\305Z\r\312%z\340\063~\344\343v\266\205\335\v\371Csv8\253A\271D\257\\\006oܜn\326л\241\177(nn\307\025h7(\225\353\200\315\373\223\215\332\332\027t/\345\r\315\330\342\255\006\345\022\275p\031\277rq\273[B\356\205\374\241\271\273\034U\240ܢW\256\003\067\356N7kh]п\224\067\067c\212\264\033\224J\365\300f\375\311\306\355m\v\272\027\362\206\346\354qV\203r\211^\270\f߹8ݭ\241wB\376P\334ݎ*\320nQ+\327\001\233\367'\033\265\264.\350_\312\033\233\261\305Z", <incomplete sequence \312>..., from_len=64200, to_buf=0x7ff718002590)
          at ../src/support/huffman.c:808
      #1  0x00007ff742754304 in __cell_data_ref (session=0x7ff742cf1350, page=0x0, page_type=7, 
          unpack=0x7ff741ad4750, store=0x7ff718002590) at ../src/include/cell.i:784
      #2  0x00007ff742754391 in __wt_dsk_cell_data_ref (session=0x7ff742cf1350, page_type=7, 
          unpack=0x7ff741ad4750, store=0x7ff718002590) at ../src/include/cell.i:808
      #3  0x00007ff742755310 in __verify_dsk_row (session=0x7ff742cf1350, 
          tag=0x7ff7428b7de0 "[reconcile-image]", dsk=0x7ff71821a400)
          at ../src/btree/bt_vrfy_dsk.c:322
      #4  0x00007ff742754d1c in __wt_verify_dsk_image (session=0x7ff742cf1350, 
          tag=0x7ff7428b7de0 "[reconcile-image]", dsk=0x7ff71821a400, size=0, empty_page_ok=true)
          at ../src/btree/bt_vrfy_dsk.c:166
      #5  0x00007ff74283753e in __rec_split_write (session=0x7ff742cf1350, r=0x7ff718219260, 
          chunk=0x7ff718219320, compressed_image=0x0, last_block=true)
          at ../src/reconcile/rec_write.c:3846
      #6  0x00007ff742836390 in __rec_split_finish (session=0x7ff742cf1350, r=0x7ff718219260)
          at ../src/reconcile/rec_write.c:3334
      #7  0x00007ff74283bb65 in __rec_row_leaf (session=0x7ff742cf1350, r=0x7ff718219260, 
          page=0x7ff724030e10, salvage=0x0) at ../src/reconcile/rec_write.c:5779
      ---Type <return> to continue, or q <return> to quit---
      #8  0x00007ff74283086a in __wt_reconcile (session=0x7ff742cf1350, ref=0x27825b0, salvage=0x0, 
          flags=162, lookaside_retryp=0x0) at ../src/reconcile/rec_write.c:481
      #9  0x00007ff7427e1520 in __evict_review (session=0x7ff742cf1350, ref=0x27825b0, 
          closing=false, inmem_splitp=0x7ff741ad4cee) at ../src/evict/evict_page.c:669
      #10 0x00007ff7427e01e5 in __wt_evict (session=0x7ff742cf1350, ref=0x27825b0, closing=false, 
          previous_state=5) at ../src/evict/evict_page.c:174
      #11 0x00007ff7427dd28e in __evict_page (session=0x7ff742cf1350, is_server=true)
          at ../src/evict/evict_lru.c:2322
      #12 0x00007ff7427d99a9 in __evict_lru_pages (session=0x7ff742cf1350, is_server=true)
          at ../src/evict/evict_lru.c:1173
      #13 0x00007ff7427d8935 in __evict_pass (session=0x7ff742cf1350) at ../src/evict/evict_lru.c:729
      #14 0x00007ff7427d7d6a in __evict_server (session=0x7ff742cf1350, did_work=0x7ff741ad4eef)
          at ../src/evict/evict_lru.c:384
      #15 0x00007ff7427d797a in __wt_evict_thread_run (session=0x7ff742cf1350, thread=0x25b56c0)
          at ../src/evict/evict_lru.c:290
      #16 0x00007ff7428792c5 in __thread_run (arg=0x25b56c0) at ../src/support/thread_group.c:31
      #17 0x00007ff7420a96ba in start_thread (arg=0x7ff741ad5700) at pthread_create.c:333
      #18 0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 7 (Thread 0x7ff737fff700 (LWP 25333)):
      #0  0x00007ff741dc2827 in sched_yield () at ../sysdeps/unix/syscall-template.S:84
      #1  0x00007ff74282001c in __wt_yield () at ../src/os_posix/os_yield.c:27
      #2  0x00007ff7427672e2 in __wt_row_search (session=0x7ff742cf3af8, srch_key=0x7ff72c19f1e0, 
          leaf=0x0, cbt=0x7ff72c19f0e0, insert=true, restore=false) at ../src/btree/row_srch.c:435
      #3  0x00007ff74271738e in __cursor_row_search (session=0x7ff742cf3af8, cbt=0x7ff72c19f0e0, 
          leaf=0x0, insert=true) at ../src/btree/bt_cursor.c:385
      #4  0x00007ff742717ebf in __wt_btcur_search_near (cbt=0x7ff72c19f0e0, exactp=0x7ff737ffca60)
          at ../src/btree/bt_cursor.c:638
      #5  0x00007ff7427a0352 in __curfile_search_near (cursor=0x7ff72c19f0e0, exact=0x7ff737ffca60)
          at ../src/cursor/cur_file.c:231
      #6  0x0000000000408ad0 in read_row_worker (cursor=0x7ff72c19f0e0, keyno=3460, key=0x2871a90, 
          value=0x2871ac0, sn=true) at ../../../test/format/ops.c:1196
      #7  0x0000000000408c98 in read_row (tinfo=0x2871a10, cursor=0x7ff72c19f0e0)
          at ../../../test/format/ops.c:1269
      #8  0x0000000000407d19 in ops (arg=0x2871a10) at ../../../test/format/ops.c:906
      #9  0x00007ff7420a96ba in start_thread (arg=0x7ff737fff700) at pthread_create.c:333
      #10 0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 6 (Thread 0x7ff7412d4700 (LWP 23049)):
      #0  pthread_cond_timedwait@@GLIBC_2.3.2 ()
          at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
      #1  0x00007ff74281f5ab in __wt_cond_wait_signal (session=0x7ff742cf17b8, cond=0x25ab860, 
          usecs=10000000, run_func=0x7ff7427d78a2 <__wt_evict_thread_chk>, signalled=0x7ff7412d3f0f)
          at ../src/os_posix/os_mtx_cond.c:122
      #2  0x00007ff742879244 in __wt_cond_wait (session=0x7ff742cf17b8, cond=0x25ab860, 
          usecs=10000000, run_func=0x7ff7427d78a2 <__wt_evict_thread_chk>)
          at ../src/include/misc.i:19
      #3  0x00007ff7428792ad in __thread_run (arg=0x25b5b30) at ../src/support/thread_group.c:29
      #4  0x00007ff7420a96ba in start_thread (arg=0x7ff7412d4700) at pthread_create.c:333
      #5  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 5 (Thread 0x7ff740ad3700 (LWP 23050)):
      #0  pthread_cond_timedwait@@GLIBC_2.3.2 ()
          at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
      #1  0x00007ff74281f5ab in __wt_cond_wait_signal (session=0x7ff742cf1c20, cond=0x25b4f40, 
          usecs=10000000, run_func=0x7ff7427d78a2 <__wt_evict_thread_chk>, signalled=0x7ff740ad2f0f)
          at ../src/os_posix/os_mtx_cond.c:122
      #2  0x00007ff742879244 in __wt_cond_wait (session=0x7ff742cf1c20, cond=0x25b4f40, 
      ---Type <return> to continue, or q <return> to quit---
          usecs=10000000, run_func=0x7ff7427d78a2 <__wt_evict_thread_chk>)
          at ../src/include/misc.i:19
      #3  0x00007ff7428792ad in __thread_run (arg=0x25aba10) at ../src/support/thread_group.c:29
      #4  0x00007ff7420a96ba in start_thread (arg=0x7ff740ad3700) at pthread_create.c:333
      #5  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 4 (Thread 0x7ff73eacf700 (LWP 25334)):
      #0  0x00007ff742764f10 in __wt_ref_key (page=0x25b5860, ref=0x1b, keyp=0xa, 
          sizep=0x7ff72408aec8) at ../src/include/btree.i:672
      #1  0x00007ff742767136 in __wt_row_search (session=0x7ff742cf3690, srch_key=0x7ff72408ad10, 
          leaf=0x0, cbt=0x7ff72408ac10, insert=false, restore=false) at ../src/btree/row_srch.c:378
      #2  0x00007ff74271738e in __cursor_row_search (session=0x7ff742cf3690, cbt=0x7ff72408ac10, 
          leaf=0x0, insert=false) at ../src/btree/bt_cursor.c:385
      #3  0x00007ff7427179fc in __wt_btcur_search (cbt=0x7ff72408ac10)
          at ../src/btree/bt_cursor.c:530
      #4  0x00007ff742719c6d in __wt_btcur_modify (cbt=0x7ff72408ac10, entries=0x7ff73eacc980, 
          nentries=2) at ../src/btree/bt_cursor.c:1458
      #5  0x00007ff7427a187e in __curfile_modify (cursor=0x7ff72408ac10, entries=0x7ff73eacc980, 
          nentries=2) at ../src/cursor/cur_file.c:333
      #6  0x0000000000409814 in row_modify (tinfo=0x2872810, cursor=0x7ff72408ac10, positioned=false)
          at ../../../test/format/ops.c:1570
      #7  0x0000000000407bdc in ops (arg=0x2872810) at ../../../test/format/ops.c:892
      #8  0x00007ff7420a96ba in start_thread (arg=0x7ff73eacf700) at pthread_create.c:333
      #9  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 3 (Thread 0x7ff742d14700 (LWP 22777)):
      #0  0x00007ff741dd55d3 in select () at ../sysdeps/unix/syscall-template.S:84
      #1  0x00007ff74281fb2a in __wt_sleep (seconds=0, micro_seconds=250000)
          at ../src/os_posix/os_sleep.c:32
      #2  0x000000000040613b in wts_ops (lastrun=1) at ../../../test/format/ops.c:237
      #3  0x000000000040bac9 in main (argc=12, argv=0x7ffda8b1ce18) at ../../../test/format/t.c:209
      
      Thread 2 (Thread 0x7ff7402d2700 (LWP 23051)):
      #0  pthread_cond_timedwait@@GLIBC_2.3.2 ()
          at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
      #1  0x00007ff74281f5ab in __wt_cond_wait_signal (session=0x7ff742cf2088, cond=0x25be4e0, 
          usecs=2000000, run_func=0x7ff74278dd41 <__sweep_server_run_chk>, signalled=0x7ff7402d1edf)
          at ../src/os_posix/os_mtx_cond.c:122
      #2  0x00007ff74278cc72 in __wt_cond_wait (session=0x7ff742cf2088, cond=0x25be4e0, 
          usecs=2000000, run_func=0x7ff74278dd41 <__sweep_server_run_chk>)
          at ../src/include/misc.i:19
      #3  0x00007ff74278dde8 in __sweep_server (arg=0x7ff742cf2088) at ../src/conn/conn_sweep.c:295
      #4  0x00007ff7420a96ba in start_thread (arg=0x7ff7402d2700) at pthread_create.c:333
      #5  0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      
      Thread 1 (Thread 0x7ff73f2d0700 (LWP 25331)):
      #0  0x00007ff7427350f6 in __wt_ref_info (ref=0x264c380, addrp=0x7ff73f2cd6a0, 
          sizep=0x7ff73f2cd6b0, typep=0x0) at ../src/include/btree.i:1109
      #1  0x00007ff742736cc2 in __page_read (session=0x7ff742cf24f0, ref=0x264c380, flags=3234)
          at ../src/btree/bt_read.c:468
      #2  0x00007ff74273746e in __wt_page_in_func (session=0x7ff742cf24f0, ref=0x264c380, 
          flags=3234, func=0x7ff74289b9c0 <__func__.33543> "__tree_walk_internal", line=527)
          at ../src/btree/bt_read.c:652
      #3  0x00007ff7427581c0 in __wt_page_swap_func (session=0x7ff742cf24f0, held=0x0, 
          want=0x264c380, flags=3234, func=0x7ff74289b9c0 <__func__.33543> "__tree_walk_internal", 
          line=527) at ../src/include/btree.i:1692
      #4  0x00007ff742758d8c in __tree_walk_internal (session=0x7ff742cf24f0, refp=0x7ff7280ca848, 
          walkcntp=0x0, skip_func=0x0, func_cookie=0x0, flags=2178) at ../src/btree/bt_walk.c:526
      ---Type <return> to continue, or q <return> to quit---
      #5  0x00007ff742758fdc in __wt_tree_walk (session=0x7ff742cf24f0, refp=0x7ff7280ca848, 
          flags=2176) at ../src/btree/bt_walk.c:597
      #6  0x00007ff74270ec76 in __wt_btcur_next (cbt=0x7ff7280ca6d0, truncating=false)
          at ../src/btree/bt_curnext.c:696
      #7  0x00007ff74279ea1d in __curfile_next (cursor=0x7ff7280ca6d0) at ../src/cursor/cur_file.c:96
      #8  0x0000000000408d06 in nextprev (tinfo=0x2870210, cursor=0x7ff7280ca6d0, next=true)
          at ../../../test/format/ops.c:1291
      #9  0x0000000000408441 in ops (arg=0x2870210) at ../../../test/format/ops.c:1033
      #10 0x00007ff7420a96ba in start_thread (arg=0x7ff73f2d0700) at pthread_create.c:333
      #11 0x00007ff741ddf41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      

      Configuration file:

      ############################################
      #  RUN PARAMETERS
      ############################################
      abort=0
      alter=0
      auto_throttle=0
      backups=0
      bitcnt=7
      bloom=1
      bloom_bit_count=47
      bloom_hash_count=22
      bloom_oldest=0
      cache=31
      cache_minimum=0
      checkpoints=on
      checkpoint_log_size=152
      checkpoint_wait=94
      checksum=uncompressed
      chunk_size=2
      compaction=0
      compression=none
      data_extend=0
      data_source=table
      delete_pct=35
      dictionary=0
      direct_io=0
      encryption=none
      evict_max=3
      file_type=row-store
      firstfit=0
      huffman_key=1
      huffman_value=0
      independent_thread_rng=1
      in_memory=0
      insert_pct=40
      internal_key_truncation=1
      internal_page_max=12
      isolation=snapshot
      key_gap=7
      key_max=106
      key_min=18
      leaf_page_max=12
      leak_memory=0
      logging=0
      logging_archive=1
      logging_compression=none
      logging_file_max=18327
      logging_prealloc=0
      long_running_txn=0
      lsm_worker_threads=3
      merge_max=17
      mmap=1
      modify_pct=11
      ops=50000
      prefix_compression=1
      prefix_compression_min=2
      prepare=1
      quiet=1
      read_pct=10
      rebalance=1
      repeat_data_pct=6
      reverse=0
      rows=10000
      runs=1
      salvage=1
      split_pct=92
      statistics=0
      statistics_server=0
      threads=4
      timer=360
      timing_stress_checkpoint=0
      timing_stress_lookaside_sweep=0
      timing_stress_split_1=0
      timing_stress_split_2=0
      timing_stress_split_3=0
      timing_stress_split_4=0
      timing_stress_split_5=0
      timing_stress_split_6=0
      timing_stress_split_7=0
      timing_stress_split_8=0
      transaction_timestamps=1
      transaction-frequency=75
      truncate=1
      value_max=233
      value_min=6
      verify=1
      wiredtiger_config=
      write_pct=4
      ############################################
      

            Assignee:
            keith.bostic@mongodb.com Keith Bostic (Inactive)
            Reporter:
            alexander.gorrod@mongodb.com Alexander Gorrod
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: