[SERVER-63145] Query $densify produces incorrect results with optimizations enabled Created: 31/Jan/22  Updated: 29/Oct/23  Resolved: 01/Feb/22

Status: Closed
Project: Core Server
Component/s: None
Affects Version/s: None
Fix Version/s: 5.3.0, 5.2.1

Type: Bug Priority: Major - P3
Reporter: Anna Wawrzyniak Assignee: Ted Tuckman
Resolution: Fixed Votes: 0
Labels: query-director-triage
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Attachments: File repro1.js     File repro1.out     File repro2.js     File repro2.out    
Issue Links:
Backports
Depends
Backwards Compatibility: Minor Change
Operating System: ALL
Backport Requested:
v5.2
Steps To Reproduce:

See attached repro1.js and repro2.js and their outputs. To repro locally run the following:

buildscripts/resmoke.py run --suites=generational_fuzzer --installDir=dist-test/bin --spawnUsing=python --jobs=1 --mongodSetParameters='{logComponentVerbosity: {command: 2}}' --mongodSetParameters='{internalQueryForceClassicEngine: true}' --storageEngineCacheSizeGB=1 repro1.js
 
buildscripts/resmoke.py run --suites=generational_fuzzer --installDir=dist-test/bin --spawnUsing=python --jobs=1 --mongodSetParameters='{logComponentVerbosity: {command: 2}}' --mongodSetParameters='{internalQueryForceClassicEngine: true}' --storageEngineCacheSizeGB=1 repro2.js

Participants:
Linked BF Score: 174

 Description   

$densify produces incorrect results, in some cases when used with $count or $count-like operators.
 
From initial investigation it appears like query optimization and plan translation in some cases is disregarding the fact that $densify can add more documents to the output and instead the $count appears to be based on documents that are below $densify in the pipeline.
 
 
The issue was discovered by fuzzer tests https://jira.mongodb.org/browse/BF-24110

We found at least 2 scenarios when this happen. For 
 
Scenario 1
Input

const documentList = [
{_id: 379, "date": new Date("2019-12-03T02:14:31.296Z"), "obj": {_id: 380, "obj": {_id: 381, "obj": {_id: 382, "date": new Date("2020-01-10T11:20:05.778Z"), }, }, }, }, // 61    
{_id: 409, "date": new Date("2019-08-29T01:15:43.084Z"), "obj": {_id: 412, "obj": {_id: 413, "obj": {_id: 414, "date": new Date("2020-01-10T00:20:05.778Z"), }, }, }, }, // 66];
]
 
const aggregationList = [    
[{$sort: {"date": 1, "date": 1, "obj.obj.obj.obj.str": 1, "obj.obj.obj.num": -1, _id : 1}}, {$densify: {field: "obj.obj.obj.date", range: {step: 1, unit: "hour", bounds: "full"}}}, {$sort: {_id: 1}}, {$sortByCount: {$firstN: {'n': NumberInt(7), 'input': []}}}], // 2233
];

Results

// There is index on {"date": 1}
 
// Result with optimization enabled (incorrect):
[ { "_id" : [ ], "count" : 2 } ]
 
// Results with optimization disabled (correct):
[ { "_id" : [ ], "count" : 12 } ]

Plan with optimization enabled:

 

WARNING: explain plans may not be the same ones as the plan chosen in the real query!
Explain method for version "5.3.0-alpha0" aggregation:{
	"explainVersion" : "1",
	"stages" : [
		{
			"$cursor" : {
				"queryPlanner" : {
					"namespace" : "fuzzer.fuzzer_coll",
					"indexFilterSet" : false,
					"parsedQuery" : {
 
					},
					"queryHash" : "D348D454",
					"planCacheKey" : "D348D454",
					"maxIndexedOrSolutionsReached" : false,
					"maxIndexedAndSolutionsReached" : false,
					"maxScansToExplodeReached" : false,
					"winningPlan" : {
						"stage" : "PROJECTION_SIMPLE",
						"transformBy" : {
							"_id" : 1
						},
						"inputStage" : {
							"stage" : "FETCH",
							"inputStage" : {
								"stage" : "IXSCAN",
								"keyPattern" : {
									"obj.obj.obj.date" : -1
								},
								"indexName" : "obj.obj.obj.date_-1",
								"isMultiKey" : false,
								"multiKeyPaths" : {
									"obj.obj.obj.date" : [ ]
								},
								"isUnique" : false,
								"isSparse" : false,
								"isPartial" : false,
								"indexVersion" : 2,
								"direction" : "backward",
								"indexBounds" : {
									"obj.obj.obj.date" : [
										"[MinKey, MaxKey]"
									]
								}
							}
						}
					},
					"rejectedPlans" : [ ]
				},
				"executionStats" : {
					"executionSuccess" : true,
					"nReturned" : 2,
					"executionTimeMillis" : 5,
					"totalKeysExamined" : 2,
					"totalDocsExamined" : 2,
					"executionStages" : {
						"stage" : "PROJECTION_SIMPLE",
						"nReturned" : 2,
						"executionTimeMillisEstimate" : 0,
						"works" : 3,
						"advanced" : 2,
						"needTime" : 0,
						"needYield" : 0,
						"saveState" : 1,
						"restoreState" : 1,
						"isEOF" : 1,
						"transformBy" : {
							"_id" : 1
						},
						"inputStage" : {
							"stage" : "FETCH",
							"nReturned" : 2,
							"executionTimeMillisEstimate" : 0,
							"works" : 3,
							"advanced" : 2,
							"needTime" : 0,
							"needYield" : 0,
							"saveState" : 1,
							"restoreState" : 1,
							"isEOF" : 1,
							"docsExamined" : 2,
							"alreadyHasObj" : 0,
							"inputStage" : {
								"stage" : "IXSCAN",
								"nReturned" : 2,
								"executionTimeMillisEstimate" : 0,
								"works" : 3,
								"advanced" : 2,
								"needTime" : 0,
								"needYield" : 0,
								"saveState" : 1,
								"restoreState" : 1,
								"isEOF" : 1,
								"keyPattern" : {
									"obj.obj.obj.date" : -1
								},
								"indexName" : "obj.obj.obj.date_-1",
								"isMultiKey" : false,
								"multiKeyPaths" : {
									"obj.obj.obj.date" : [ ]
								},
								"isUnique" : false,
								"isSparse" : false,
								"isPartial" : false,
								"indexVersion" : 2,
								"direction" : "backward",
								"indexBounds" : {
									"obj.obj.obj.date" : [
										"[MinKey, MaxKey]"
									]
								},
								"keysExamined" : 2,
								"seeks" : 1,
								"dupsTested" : 0,
								"dupsDropped" : 0
							}
						}
					},
					"allPlansExecution" : [ ]
				}
			},
			"nReturned" : NumberLong(2),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$_internalDensify" : {
				"field" : "obj.obj.obj.date",
				"partitionByFields" : [ ],
				"range" : {
					"step" : 1,
					"bounds" : "full",
					"unit" : "hour"
				}
			},
			"nReturned" : NumberLong(2),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$sort" : {
				"sortKey" : {
					"_id" : 1
				}
			},
			"totalDataSizeSortedBytesEstimate" : NumberLong(260),
			"usedDisk" : false,
			"spills" : NumberLong(0),
			"nReturned" : NumberLong(2),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$group" : {
				"_id" : {
					"$firstN" : {
						"n" : {
							"$const" : 7
						},
						"input" : [ ]
					}
				},
				"count" : {
					"$sum" : {
						"$const" : 1
					}
				}
			},
			"maxAccumulatorMemoryUsageBytes" : {
				"count" : NumberLong(80)
			},
			"totalOutputDataSizeBytes" : NumberLong(269),
			"usedDisk" : true,
			"spills" : NumberLong(1),
			"nReturned" : NumberLong(1),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$sort" : {
				"sortKey" : {
					"count" : -1
				}
			},
			"totalDataSizeSortedBytesEstimate" : NumberLong(285),
			"usedDisk" : false,
			"spills" : NumberLong(0),
			"nReturned" : NumberLong(1),
			"executionTimeMillisEstimate" : NumberLong(0)
		}
	],
	"serverInfo" : {
		"host" : "ip-10-122-11-183",
		"port" : 20020,
		"version" : "5.3.0-alpha0",
		"gitVersion" : "unknown"
	},
	"serverParameters" : {
		"internalQueryFacetBufferSizeBytes" : 104857600,
		"internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
		"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
		"internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
		"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
		"internalQueryProhibitBlockingMergeOnMongoS" : 0,
		"internalQueryMaxAddToSetBytes" : 104857600,
		"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
	},
	"command" : {
		"aggregate" : "fuzzer_coll",
		"pipeline" : [
			{
				"$sort" : {
					"date" : 1,
					"obj.obj.obj.obj.str" : 1,
					"obj.obj.obj.num" : -1,
					"_id" : 1
				}
			},
			{
				"$densify" : {
					"field" : "obj.obj.obj.date",
					"range" : {
						"step" : 1,
						"unit" : "hour",
						"bounds" : "full"
					}
				}
			},
			{
				"$sort" : {
					"_id" : 1
				}
			},
			{
				"$sortByCount" : {
					"$firstN" : {
						"n" : 7,
						"input" : [ ]
					}
				}
			}
		],
		"cursor" : {
 
		},
		"maxTimeMS" : 30000,
		"$db" : "fuzzer"
	},
	"ok" : 1
}
 

 
Scenario 2
Input

const documentList = [    
{_id: 409, "date": new Date("2019-12-03T01:15:43.084Z")}, // 66    {_id: 379, "date": new Date("2019-12-03T12:14:31.296Z")}, // 61
];
 
const aggregationList = [    
[{$sort: {"date": 1}}, {$densify: {field: "date", range: {step: 1, unit: "hour", bounds: "full"}}}, {$count: "count"}], // 2233  repros
];

Results

// There is index on {"date": 1} 
 
// Result with optimization enabled (incorrect):
[ { "count" : 2 } ]
 
 // Results with optimization disabled (correct): 
[ { "count" : 12 } ]

Plan with optimization enabled:

WARNING: explain plans may not be the same ones as the plan chosen in the real query!
Explain method for version "5.3.0-alpha0" aggregation:{
	"explainVersion" : "1",
	"stages" : [
		{
			"$cursor" : {
				"queryPlanner" : {
					"namespace" : "fuzzer.fuzzer_coll",
					"indexFilterSet" : false,
					"parsedQuery" : {
 
					},
					"queryHash" : "4DA3A4C7",
					"planCacheKey" : "4DA3A4C7",
					"maxIndexedOrSolutionsReached" : false,
					"maxIndexedAndSolutionsReached" : false,
					"maxScansToExplodeReached" : false,
					"winningPlan" : {
						"stage" : "COUNT_SCAN",
						"keyPattern" : {
							"date" : -1
						},
						"indexName" : "date_-1",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"date" : [ ]
						},
						"isUnique" : false,
						"isSparse" : false,
						"isPartial" : false,
						"indexVersion" : 2,
						"indexBounds" : {
							"startKey" : {
								"date" : { "$maxKey" : 1 }
							},
							"startKeyInclusive" : true,
							"endKey" : {
								"date" : { "$minKey" : 1 }
							},
							"endKeyInclusive" : true
						}
					},
					"rejectedPlans" : [ ]
				},
				"executionStats" : {
					"executionSuccess" : true,
					"nReturned" : 2,
					"executionTimeMillis" : 5,
					"totalKeysExamined" : 3,
					"totalDocsExamined" : 0,
					"executionStages" : {
						"stage" : "COUNT_SCAN",
						"nReturned" : 2,
						"executionTimeMillisEstimate" : 0,
						"works" : 3,
						"advanced" : 2,
						"needTime" : 0,
						"needYield" : 0,
						"saveState" : 1,
						"restoreState" : 1,
						"isEOF" : 1,
						"keysExamined" : 3,
						"keyPattern" : {
							"date" : -1
						},
						"indexName" : "date_-1",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"date" : [ ]
						},
						"isUnique" : false,
						"isSparse" : false,
						"isPartial" : false,
						"indexVersion" : 2,
						"indexBounds" : {
							"startKey" : {
								"date" : { "$maxKey" : 1 }
							},
							"startKeyInclusive" : true,
							"endKey" : {
								"date" : { "$minKey" : 1 }
							},
							"endKeyInclusive" : true
						}
					},
					"allPlansExecution" : [ ]
				}
			},
			"nReturned" : NumberLong(2),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$_internalDensify" : {
				"field" : "date",
				"partitionByFields" : [ ],
				"range" : {
					"step" : 1,
					"bounds" : "full",
					"unit" : "hour"
				}
			},
			"nReturned" : NumberLong(2),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$group" : {
				"_id" : {
					"$const" : null
				},
				"count" : {
					"$sum" : {
						"$const" : 1
					}
				}
			},
			"maxAccumulatorMemoryUsageBytes" : {
				"count" : NumberLong(80)
			},
			"totalOutputDataSizeBytes" : NumberLong(229),
			"usedDisk" : true,
			"spills" : NumberLong(1),
			"nReturned" : NumberLong(1),
			"executionTimeMillisEstimate" : NumberLong(0)
		},
		{
			"$project" : {
				"count" : true,
				"_id" : false
			},
			"nReturned" : NumberLong(1),
			"executionTimeMillisEstimate" : NumberLong(0)
		}
	],
	"serverInfo" : {
		"host" : "ip-10-122-11-183",
		"port" : 20020,
		"version" : "5.3.0-alpha0",
		"gitVersion" : "unknown"
	},
	"serverParameters" : {
		"internalQueryFacetBufferSizeBytes" : 104857600,
		"internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
		"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
		"internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
		"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
		"internalQueryProhibitBlockingMergeOnMongoS" : 0,
		"internalQueryMaxAddToSetBytes" : 104857600,
		"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
	},
	"command" : {
		"aggregate" : "fuzzer_coll",
		"pipeline" : [
			{
				"$sort" : {
					"date" : 1
				}
			},
			{
				"$densify" : {
					"field" : "date",
					"range" : {
						"step" : 1,
						"unit" : "hour",
						"bounds" : "full"
					}
				}
			},
			{
				"$count" : "count"
			}
		],
		"cursor" : {
 
		},
		"maxTimeMS" : 30000,
		"$db" : "fuzzer"
	},
	"ok" : 1
}



 Comments   
Comment by Githook User [ 03/Feb/22 ]

Author:

{'name': 'Ted Tuckman', 'email': 'ted.tuckman@mongodb.com', 'username': 'TedTuckman'}

Message: SERVER-63145 Add proper dependency tracking to $densify

(Cherry-picked from commit 51e13de10058a9048b71d3be179116df117b5c70)
Branch: v5.2
https://github.com/mongodb/mongo/commit/0b22fc40930c8fec72c80e18bc9039a65cacbea3

Comment by Githook User [ 01/Feb/22 ]

Author:

{'name': 'Ted Tuckman', 'email': 'ted.tuckman@mongodb.com', 'username': 'TedTuckman'}

Message: SERVER-63145 Add proper dependency tracking to $densify
Branch: master
https://github.com/mongodb/mongo/commit/51e13de10058a9048b71d3be179116df117b5c70

Generated at Thu Feb 08 05:57:00 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.