batch_write_command_update.js fails in disagg_passthrough with DataCorruptionDetected error - WT_Cursor::next -- returned out-of-order keys

XMLWordPrintableJSON

    • Type: Bug
    • Resolution: Gone away
    • Priority: Major - P3
    • None
    • Affects Version/s: None
    • Component/s: Btree
    • Security Level: Public (Available to anyone on the web)
    • Storage Engines
    • 8,852.16
    • None
    • None

      This test failure was reported by the CI system in the WT Disagg Integration Canary builder. The error is logged in WiredTigerRecordStoreCursor::reportOutOfOrderRead() and most likely originated in WiredTigerRecordStoreCursor::next() because this is happening in a non-capped user collection test.batch_write_update.

      Task: https://spruce.mongodb.com/task/mongod_v8.0_wt_disagg_integration_canary_disagg_passthrough_7098bf73d51c3cfcbe9276e4ab9f3e0f0c331bff_25_04_04_19_47_48?execution=0&sortBy=STATUS&sortDir=ASC

      Logs: https://parsley.mongodb.com/test/mongod_v8.0_wt_disagg_integration_canary_disagg_passthrough_7098bf73d51c3cfcbe9276e4ab9f3e0f0c331bff_25_04_04_19_47_48/0/01abc899e135161c3bd8fca9053a00c1?bookmarks=0,648707&shareLine=0

      [DisaggFixture:j2] | 2025-04-05T00:30:06.131+00:00 D3 DISAGG   1200123900     [Disagg-3] "PageLogProvider materialized LSN","attr":{"lsn":7489619831109451955}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.142+00:00 D3 DISAGG   1200094705     [Disagg-3] "LogServerManager::_primaryLoopRead read one","attr":{"segmentId":0,"response":"committed_lsns { lsn: 7489619831109451966 } committed_lsns { domain_id: 1 lsn: 7489619831109451955 } write_concern_lsns { key: \"majority\" value: 7489619831109451966 }"}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.142+00:00 D3 DISAGG   1200032302     [Disagg-3] "Deleting all up to LSN","attr":{"lsn":7489619831109451955}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.142+00:00 D3 DISAGG   1200032309     [Disagg-3] "Deleted up to LSN","attr":{"nRemoved":0,"lsn":7489619831109451955}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.142+00:00 D3 DISAGG   1200062301     [Disagg-3] "Set stable timestamp based on lastApplied, allDurable (no holes) and optime from log server","attr":{"newStableTimestamp":{"$timestamp":{"t":1743813006,"i":190}},"lastApplied":"{ ts: Timestamp(1743813006, 190), t: 2 }, 2025-04-05T00:30:06.126+00:00","allDurableOpTime":{"ts":{"$timestamp":{"t":1743813006,"i":190}},"t":2},"committedOpTime":{"ts":{"$timestamp":{"t":1743813006,"i":190}},"t":2}}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.264+00:00 E  STORAGE  22406       [conn65] "WT_Cursor::next -- returned out-of-order keys","attr":{"forward":true,"next":"11","last":"11","ident":"collection-ac3814e9-8c79-4b2e-b0c5-e4e201c59654","ns":"test.batch_write_update"}
      [DisaggFixture:j2] ERROR(22406): WT_Cursor::next -- returned out-of-order keys
      [DisaggFixture:j2] {"t":{"$date":"2025-04-05T00:30:06.265+00:00"},"s":"I",  "c":"STORAGE",  "id":20320,   "ctx":"healthlog writer0","msg":"createCollection","attr":{"namespace":"local.system.healthlog","uuidDisposition":"generated","uuid":{"uuid":{"$uuid":"a55ea4d8-e67f-462b-90b7-265950b94edc"}},"options":{"capped":true,"size":100000000,"autoIndexId":false}}}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.268+00:00 D4 DISAGG   1200032300     [conn65] "Pushing entry onto PageLogProvider queue","attr":{"lsn":7489619831109451967,"size":229,"qsize":1}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.268+00:00 D5 DISAGG   1200100201     [conn65] "paliHandlePut: Entry successfully written to pageLogProvider queue","attr":{"tableId":3577,"pageId":130,"backlinkLSN":7489619831109451955,"baseLSN":0,"LSN":7489619831109451967,"delta":false,"size":96}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.269+00:00 I  COMMAND  51803       [conn65] "Slow query","attr":{"type":"command","isFromUserConnection":true,"ns":"test.batch_write_update","collectionType":"normal","appName":"MongoDB Shell","command":{"count":"batch_write_update","query":{"a":100000},"lsid":{"id":{"$uuid":"0716d7d9-32ca-47fb-adcf-58d453724ca2"}},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1743813006,"i":190}},"signature":{"hash":{"$binary":{"base64":"AAAAAAAAAAAAAAAAAAAAAAAAAAA=","subType":"0"}},"keyId":0}},"$db":"test"},"planSummary":"COLLSCAN","planningTimeMicros":91,"numYields":0,"ok":0,"errMsg":"WT_Cursor::next -- returned out-of-order keys","errName":"DataCorruptionDetected","errCode":301,"reslen":268,"locks":{"Global":{"acquireCount":{"r":1}}},"readConcern":{"level":"local","provenance":"implicitDefault"},"storage":{"data":{"bytesWritten":96,"timeWritingMicros":427}},"cpuNanos":115860701,"remote":"127.0.0.1:58546","protocol":"op_msg","queues":{"ingress":{"admissions":1},"execution":{"admissions":2}},"workingMillis":125,"durationMillis":125}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.269+00:00 D4 DISAGG   1200032305     [PageLogProvider-0] "Popping entry from page log provider queue (delayed)","attr":{"lsn":7489619831109451967,"size":252,"qsize":2}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.269+00:00 D3 DISAGG   1200094702     [Disagg-5] "pushed result for provider","attr":{"domainId":1,"domainLastLSN":7489619831109451967,"error":{"code":0,"codeName":"OK"}}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.269+00:00 D3 DISAGG   1200094700     [Disagg-0] "Got log entry for provider","attr":{"domainId":1,"lsn_domain_id":1,"lsn_lsn":7489619831109451967,"entry_type":3}
      [DisaggFixture:j2] | 2025-04-05T00:30:06.269+00:00 D3 DISAGG   1200094703     [Disagg-0] "LogServerManager::sendBatch","attr":{"segmentId":0,"batchSize":252,"batchEntries":1,"previousLSN":"lsn: 7489619831109451966","batchLastLSN":"domain_id: 1 lsn: 7489619831109451967"}
      [js_test:batch_write_command_update] uncaught exception: Error: count failed: {
      [js_test:batch_write_command_update] 	"ok" : 0,
      [js_test:batch_write_command_update] 	"errmsg" : "WT_Cursor::next -- returned out-of-order keys",
      [js_test:batch_write_command_update] 	"code" : 301,
      [js_test:batch_write_command_update] 	"codeName" : "DataCorruptionDetected",
      [js_test:batch_write_command_update] 	"$clusterTime" : {
      [js_test:batch_write_command_update] 		"clusterTime" : Timestamp(1743813006, 191),
      [js_test:batch_write_command_update] 		"signature" : {
      [js_test:batch_write_command_update] 			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
      [js_test:batch_write_command_update] 			"keyId" : NumberLong(0)
      [js_test:batch_write_command_update] 		}
      [js_test:batch_write_command_update] 	},
      [js_test:batch_write_command_update] 	"operationTime" : Timestamp(1743813006, 190)
      [js_test:batch_write_command_update] } :
      [js_test:batch_write_command_update] _getErrorWithCode@src/mongo/shell/utils.js:24:13
      [js_test:batch_write_command_update] DBQuery.prototype.count@src/mongo/shell/query.js:346:11
      [js_test:batch_write_command_update] @jstests/core/write/update/batch_write_command_update.js:210:43
      [js_test:batch_write_command_update] failed to load: jstests/core/write/update/batch_write_command_update.js
      [js_test:batch_write_command_update] exiting with code -3
      

            Assignee:
            Chenhao Qu
            Reporter:
            Benety Goh
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

              Created:
              Updated:
              Resolved: