[SERVER-28454] Make more of an effort to move $match stages through $redact stages Created: 23/Mar/17  Updated: 06/Dec/22

Status: Backlog
Project: Core Server
Component/s: Aggregation Framework
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Asya Kamsky Assignee: Backlog - Query Optimization
Resolution: Unresolved Votes: 0
Labels: optimization, query-44-grooming
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Assigned Teams:
Query Optimization
Operating System: ALL
Participants:

 Description   

A $redact stage can copy a portion of a subsequent $match before the $redact, but our current optimization strategy does not attempt to move the new, copied match further forward in the pipeline.

Original Description

// correct
db.Account.explain().aggregate({$lookup:{"from" : "Opp", "as" : "Opp", "localField" : "Id", "foreignField" : "AccountId"}}, {$match:{ID_18__c:"001A0000008XXXXXXX"}})
{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
					"ID_18__c" : "001A0000008XXXXXXX"
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "source_staging.Account",
					"indexFilterSet" : false,
					"parsedQuery" : {
						"ID_18__c" : {
							"$eq" : "001A0000008XXXXXXX"
						}
					},
					"winningPlan" : {
						"stage" : "FETCH",
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"ID_18__c" : 1
							},
							"indexName" : "ID_18__c_1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"ID_18__c" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "forward",
							"indexBounds" : {
								"ID_18__c" : [
									"[\"001A0000008XXXXXXX\", \"001A0000008XXXXXXX\"]"
								]
							}
						}
					},
					"rejectedPlans" : [ ]
				}
			}
		},
		{
			"$lookup" : {
				"from" : "Opp",
				"as" : "Opp",
				"localField" : "Id",
				"foreignField" : "AccountId"
			}
		}
	],
	"ok" : 1
}

//correct
db.Account.explain().aggregate( {$redact:"$$KEEP"},{$lookup:{"from" : "Opp", "as" : "Opp", "localField" : "Id", "foreignField" : "AccountId"}}, {$match:{ID_18__c:"001A0000008XXXXXXX"}})
{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
					"ID_18__c" : "001A0000008XXXXXXX"
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "source_staging.Account",
					"indexFilterSet" : false,
					"parsedQuery" : {
						"ID_18__c" : {
							"$eq" : "001A0000008XXXXXXX"
						}
					},
					"winningPlan" : {
						"stage" : "FETCH",
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"ID_18__c" : 1
							},
							"indexName" : "ID_18__c_1",
							"isMultiKey" : false,
							"multiKeyPaths" : {
								"ID_18__c" : [ ]
							},
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 2,
							"direction" : "forward",
							"indexBounds" : {
								"ID_18__c" : [
									"[\"001A0000008XXXXXXX\", \"001A0000008XXXXXXX\"]"
								]
							}
						}
					},
					"rejectedPlans" : [ ]
				}
			}
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		},
		{
			"$redact" : "$$KEEP"
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		},
		{
			"$lookup" : {
				"from" : "Opp",
				"as" : "Opp",
				"localField" : "Id",
				"foreignField" : "AccountId"
			}
		}
	],
	"ok" : 1
}

// incorrect
db.Account.explain().aggregate({$lookup:{"from" : "Opp", "as" : "Opp", "localField" : "Id", "foreignField" : "AccountId"}}, {$redact:"$$KEEP"}, {$match:{ID_18__c:"001A0000008XXXXXXX"}})
{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
 
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "source_staging.Account",
					"indexFilterSet" : false,
					"parsedQuery" : {
 
					},
					"winningPlan" : {
						"stage" : "COLLSCAN",
						"direction" : "forward"
					},
					"rejectedPlans" : [ ]
				}
			}
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		},
		{
			"$lookup" : {
				"from" : "Opp",
				"as" : "Opp",
				"localField" : "Id",
				"foreignField" : "AccountId"
			}
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		},
		{
			"$redact" : "$$KEEP"
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		}
	],
	"ok" : 1
}

When the sequence $lookup is followed by $redact and then $match, even though $match gets moved ahead of $lookup, it's not sent to the query system.



 Comments   
Comment by Charlie Swanson [ 31/Mar/17 ]

So redbeard0531 is correct in identifying where an additional $match is generated, but that doesn't explain why there are 3 of them. The reason we get 3 $matches is because we actually optimize the pipeline twice: once just after parsing, then once again after adding the initial $cursor stage. This explains why there are 3 matches (one new one for every call to optimizePipeline()), and also why the $match doesn't get absorbed by the initial $cursor stage (we've already done query planning by then, and don't have a good mechanism for absorbing an additional filter or anything like that).

Comment by Mathias Stearn [ 23/Mar/17 ]

This is the problem: https://github.com/mongodb/mongo/blob/82b1674/src/mongo/db/pipeline/document_source_redact.cpp#L86-L88

Comment by Asya Kamsky [ 23/Mar/17 ]

In 3.5.4+ where $addFields is correctly swapped with $match I see the same issue with it when $redact is after it:

db.Account.explain().aggregate( {$addFields:{x:"$foo"}},{$redact:"$$KEEP"}, {$match:{ID_18__c:"001A0000008XXXXXXX"}})
{
	"stages" : [
		{
			"$cursor" : {
				"query" : {
 
				},
				"queryPlanner" : {
					"plannerVersion" : 1,
					"namespace" : "test.Account",
					"indexFilterSet" : false,
					"parsedQuery" : {
 
					},
					"winningPlan" : {
						"stage" : "EOF"
					},
					"rejectedPlans" : [ ]
				}
			}
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		},
		{
			"$addFields" : {
				"x" : "$foo"
			}
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		},
		{
			"$redact" : "$$KEEP"
		},
		{
			"$match" : {
				"ID_18__c" : "001A0000008XXXXXXX"
			}
		}
	],
	"ok" : 1,
	"operationTime" : Timestamp(0, 0)
}

Comment by Asya Kamsky [ 23/Mar/17 ]

It's not specific to $lookup - putting an "$unwind" into the same spot as "$lookup" will also cause the same result.

Generated at Thu Feb 08 04:18:11 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.