(Disagg=switch) Test/format data mismatch during step-down

XMLWordPrintableJSON

    • Type: Bug
    • Resolution: Unresolved
    • Priority: Critical - P2
    • None
    • Affects Version/s: None
    • Component/s: None
    • Storage Engines - Foundations, Storage Engines - Transactions
    • None
    • None

      Branched off from WT-16531. WT-16531 has 166 BFGs and it was found that we have a different data mismatch from the one being investigated in the ticket.

      Config:

      ############################################
      #  RUN PARAMETERS: V3
      ############################################
      assert.read_timestamp=0
      background_compact=0
      background_compact.free_space_target=26
      backup=0
      backup.incremental=off
      backup.incr_granularity=7776
      backup.live_restore=0
      backup.live_restore_read_size=4
      backup.live_restore_threads=1
      block_cache=0
      block_cache.cache_on_checkpoint=1
      block_cache.cache_on_writes=0
      block_cache.size=21
      cache=3072
      cache.evict_max=5
      cache.eviction_dirty_target=0
      cache.eviction_dirty_trigger=0
      cache.eviction_updates_target=0
      cache.eviction_updates_trigger=0
      cache.minimum=0
      cache.maximum=0
      checkpoint=on
      checkpoint.log_size=155
      checkpoint.wait=24
      chunk_cache=0
      chunk_cache.capacity=1087
      chunk_cache.chunk_size=1
      chunk_cache.storage_path=off
      chunk_cache.type=off
      compact.free_space_target=99
      debug.background_compact=1
      debug.checkpoint_retention=8
      debug.cursor_reposition=0
      debug.eviction=0
      debug.log_retention=6
      debug.realloc_exact=0
      debug.realloc_malloc=0
      debug.slow_checkpoint=0
      debug.table_logging=0
      debug.update_restore_evict=0
      disagg.internal_page_delta=1
      disagg.leaf_page_delta=1
      disagg.multi=0
      disagg.multi_validation=0
      disagg.enabled=1
      disagg.layered=1
      disagg.mode=switch
      disagg.page_log=palite
      disagg.key_provider=1
      disagg.page_log.verbose=0
      disagg.drain_threads=7
      disk.data_extend=0
      disk.encryption=rotn-7
      disk.mmap=0
      disk.mmap_all=0
      eviction.evict_use_softptr=0
      file_manager.close_handle_minimum=25
      file_manager.close_idle_time=26
      file_manager.close_scan_interval=17
      format.abort=0
      format.independent_thread_rng=1
      format.major_timeout=0
      import=0
      logging=0
      logging.compression=none
      logging.file_max=198375
      logging.prealloc=1
      logging.remove=1
      obsolete_cleanup.method=off
      obsolete_cleanup.wait=378
      ops.alter=0
      ops.compaction=0
      ops.hs_cursor=0
      ops.pct.modify=0
      ops.bound_cursor=0
      ops.prepare=0
      ops.random_cursor=0
      ops.salvage=0
      ops.throttle=0
      ops.throttle.sleep_us=103713
      ops.verify=1
      prefetch=0
      precise_checkpoint=1
      preserve_prepared=0
      quiet=0
      random.data_seed=8042511
      random.extra_seed=16597361
      rollback_to_stable_threads=9
      runs.in_memory=0
      runs.ops=0
      runs.predictable_replay=0
      runs.source=layered
      runs.tables=3
      runs.threads=22
      runs.timer=2
      runs.type=row-store
      runs.verify_failure_dump=0
      statistics.mode=fast
      statistics_log.sources=off
      stress.aggressive_stash_free=0
      stress.aggressive_sweep=0
      stress.checkpoint=0
      stress.checkpoint_evict_page=0
      stress.checkpoint_prepare=0
      stress.compact_slow=0
      stress.evict_reposition=0
      stress.failpoint_eviction_split=0
      stress.failpoint_hs_delete_key_from_ts=0
      stress.failpoint_rec_before_wrapup=0
      stress.hs_checkpoint_delay=0
      stress.hs_search=0
      stress.hs_sweep=0
      stress.prefetch_delay=0
      stress.prepare_resolution_1=0
      stress.sleep_before_read_overflow_onpage=0
      stress.split_1=0
      stress.split_2=0
      stress.split_3=0
      stress.split_4=0
      stress.split_5=0
      stress.split_6=0
      stress.split_7=0
      stress.split_8=0
      tiered_storage.flush_frequency=0
      tiered_storage.storage_source=off
      transaction.implicit=0
      transaction.operation_timeout_ms=2000
      transaction.timestamps=1
      wiredtiger.config=off
      wiredtiger.rwlock=1
      wiredtiger.leak_memory=0
      ############################################
      #  TABLE PARAMETERS: table 1
      ############################################
      table1.btree.compression=none
      table1.btree.dictionary=1
      table1.btree.internal_key_truncation=1
      table1.btree.internal_page_max=10
      table1.btree.key_max=57
      table1.btree.key_min=32
      table1.btree.leaf_page_max=16
      table1.btree.memory_page_max=8
      table1.btree.prefix_len=0
      table1.btree.prefix_compression=1
      table1.btree.prefix_compression_min=0
      table1.btree.reverse=0
      table1.btree.split_pct=100
      table1.btree.value_max=1361
      table1.btree.value_min=14
      table1.disk.checksum=uncompressed
      table1.disk.firstfit=0
      table1.ops.pareto=0
      table1.ops.pareto.skew=16
      table1.ops.pct.delete=0
      table1.ops.pct.insert=100
      table1.ops.pct.read=0
      table1.ops.pct.write=0
      table1.ops.truncate=1
      table1.runs.mirror=1
      table1.runs.rows=239688
      ############################################
      #  TABLE PARAMETERS: table 2
      ############################################
      table2.btree.compression=none
      table2.btree.dictionary=0
      table2.btree.internal_key_truncation=1
      table2.btree.internal_page_max=14
      table2.btree.key_max=77
      table2.btree.key_min=19
      table2.btree.leaf_page_max=12
      table2.btree.memory_page_max=6
      table2.btree.prefix_len=0
      table2.btree.prefix_compression=1
      table2.btree.prefix_compression_min=7
      table2.btree.reverse=0
      table2.btree.split_pct=57
      table2.btree.value_max=2115
      table2.btree.value_min=18
      table2.disk.checksum=on
      table2.disk.firstfit=0
      table2.ops.pareto=0
      table2.ops.pareto.skew=51
      table2.ops.pct.delete=13
      table2.ops.pct.insert=83
      table2.ops.pct.read=0
      table2.ops.pct.write=4
      table2.ops.truncate=1
      table2.runs.mirror=1
      table2.runs.rows=239688
      ############################################
      #  TABLE PARAMETERS: table 3
      ############################################
      table3.btree.compression=none
      table3.btree.dictionary=0
      table3.btree.internal_key_truncation=1
      table3.btree.internal_page_max=12
      table3.btree.key_max=120
      table3.btree.key_min=16
      table3.btree.leaf_page_max=15
      table3.btree.memory_page_max=1
      table3.btree.prefix_len=0
      table3.btree.prefix_compression=1
      table3.btree.prefix_compression_min=4
      table3.btree.reverse=0
      table3.btree.split_pct=98
      table3.btree.value_max=1800
      table3.btree.value_min=4
      table3.disk.checksum=unencrypted
      table3.disk.firstfit=0
      table3.ops.pareto=0
      table3.ops.pareto.skew=65
      table3.ops.pct.delete=2
      table3.ops.pct.insert=15
      table3.ops.pct.read=74
      table3.ops.pct.write=9
      table3.ops.truncate=1
      table3.runs.mirror=1
      table3.runs.rows=239688
      

      This happens when performing test/format step-down. In test/format we shutdown and restart, and then fetch the latest checkpoint as the follower. The follower then performs a mirror check and finds mirror mismatch.

        1. Stack.txt
          87 kB
          Ravi Giri

            Assignee:
            Vamsi Boyapati
            Reporter:
            Jie Chen
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

              Created:
              Updated: