[SERVER-27536] In the case of plan ranking ties, prefer a plan using a partial index Created: 28/Dec/16  Updated: 06/Dec/22

Status: Backlog
Project: Core Server
Component/s: Querying
Affects Version/s: None
Fix Version/s: None

Type: Improvement Priority: Major - P3
Reporter: Asya Kamsky Assignee: Backlog - Query Optimization
Resolution: Unresolved Votes: 0
Labels: query-44-grooming
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Assigned Teams:
Query Optimization
Participants:

 Description   

db.temp.getIndexes()
[
	{
		"v" : 2,
		"key" : {
			"_id" : 1
		},
		"name" : "_id_",
		"ns" : "test.temp"
	},
	{
		"v" : 2,
		"key" : {
			"a" : 1,
			"b" : 1
		},
		"name" : "a_1_b_1",
		"ns" : "test.temp"
	},
	{
		"v" : 2,
		"key" : {
			"a" : 1,
			"c" : 1
		},
		"name" : "a_1_c_1",
		"ns" : "test.temp",
		"partialFilterExpression" : {
			"a" : 0
		}
	}
]

Here are queries and indexes they result in:

db.temp.explain().find({a:0},{_id:0,b:1})
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.temp",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"a" : {
				"$eq" : 0
			}
		},
		"winningPlan" : {
			"stage" : "PROJECTION",
			"transformBy" : {
				"_id" : 0,
				"b" : 1
			},
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"a" : 1,
					"b" : 1
				},
				"indexName" : "a_1_b_1",
				"isMultiKey" : false,
				"multiKeyPaths" : {
					"a" : [ ],
					"b" : [ ]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "forward",
				"indexBounds" : {
					"a" : [
						"[0.0, 0.0]"
					],
					"b" : [
						"[MinKey, MaxKey]"
					]
				}
			}
		},
		"rejectedPlans" : [
			{
				"stage" : "PROJECTION",
				"transformBy" : {
					"_id" : 0,
					"b" : 1
				},
				"inputStage" : {
					"stage" : "FETCH",
					"inputStage" : {
						"stage" : "IXSCAN",
						"keyPattern" : {
							"a" : 1,
							"c" : 1
						},
						"indexName" : "a_1_c_1",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"a" : [ ],
							"c" : [ ]
						},
						"isUnique" : false,
						"isSparse" : false,
						"isPartial" : true,
						"indexVersion" : 2,
						"direction" : "forward",
						"indexBounds" : {
							"a" : [
								"[0.0, 0.0]"
							],
							"c" : [
								"[MinKey, MaxKey]"
							]
						}
					}
				}
			}
		]
	},
	"serverInfo" : {
		"host" : "Asyas-MacBook-Pro-2.local",
		"port" : 27017,
		"version" : "3.4.1",
		"gitVersion" : "5e103c4f5583e2566a45d740225dc250baacfbd7"
	},
	"ok" : 1
}
test@127.0.0.1:27017(3.4.1) > db.temp.explain().find({a:0},{_id:0,c:1})
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.temp",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"a" : {
				"$eq" : 0
			}
		},
		"winningPlan" : {
			"stage" : "PROJECTION",
			"transformBy" : {
				"_id" : 0,
				"c" : 1
			},
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"a" : 1,
					"c" : 1
				},
				"indexName" : "a_1_c_1",
				"isMultiKey" : false,
				"multiKeyPaths" : {
					"a" : [ ],
					"c" : [ ]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : true,
				"indexVersion" : 2,
				"direction" : "forward",
				"indexBounds" : {
					"a" : [
						"[0.0, 0.0]"
					],
					"c" : [
						"[MinKey, MaxKey]"
					]
				}
			}
		},
		"rejectedPlans" : [
			{
				"stage" : "PROJECTION",
				"transformBy" : {
					"_id" : 0,
					"c" : 1
				},
				"inputStage" : {
					"stage" : "FETCH",
					"inputStage" : {
						"stage" : "IXSCAN",
						"keyPattern" : {
							"a" : 1,
							"b" : 1
						},
						"indexName" : "a_1_b_1",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"a" : [ ],
							"b" : [ ]
						},
						"isUnique" : false,
						"isSparse" : false,
						"isPartial" : false,
						"indexVersion" : 2,
						"direction" : "forward",
						"indexBounds" : {
							"a" : [
								"[0.0, 0.0]"
							],
							"b" : [
								"[MinKey, MaxKey]"
							]
						}
					}
				}
			}
		]
	},
	"serverInfo" : {
		"host" : "Asyas-MacBook-Pro-2.local",
		"port" : 27017,
		"version" : "3.4.1",
		"gitVersion" : "5e103c4f5583e2566a45d740225dc250baacfbd7"
	},
	"ok" : 1
}
test@127.0.0.1:27017(3.4.1) > db.temp.explain().find({a:0},{_id:0,x:1,c:1})
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.temp",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"a" : {
				"$eq" : 0
			}
		},
		"winningPlan" : {
			"stage" : "PROJECTION",
			"transformBy" : {
				"_id" : 0,
				"x" : 1,
				"c" : 1
			},
			"inputStage" : {
				"stage" : "FETCH",
				"inputStage" : {
					"stage" : "IXSCAN",
					"keyPattern" : {
						"a" : 1,
						"b" : 1
					},
					"indexName" : "a_1_b_1",
					"isMultiKey" : false,
					"multiKeyPaths" : {
						"a" : [ ],
						"b" : [ ]
					},
					"isUnique" : false,
					"isSparse" : false,
					"isPartial" : false,
					"indexVersion" : 2,
					"direction" : "forward",
					"indexBounds" : {
						"a" : [
							"[0.0, 0.0]"
						],
						"b" : [
							"[MinKey, MaxKey]"
						]
					}
				}
			}
		},
		"rejectedPlans" : [
			{
				"stage" : "PROJECTION",
				"transformBy" : {
					"_id" : 0,
					"x" : 1,
					"c" : 1
				},
				"inputStage" : {
					"stage" : "FETCH",
					"inputStage" : {
						"stage" : "IXSCAN",
						"keyPattern" : {
							"a" : 1,
							"c" : 1
						},
						"indexName" : "a_1_c_1",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"a" : [ ],
							"c" : [ ]
						},
						"isUnique" : false,
						"isSparse" : false,
						"isPartial" : true,
						"indexVersion" : 2,
						"direction" : "forward",
						"indexBounds" : {
							"a" : [
								"[0.0, 0.0]"
							],
							"c" : [
								"[MinKey, MaxKey]"
							]
						}
					}
				}
			}
		]
	},
	"serverInfo" : {
		"host" : "Asyas-MacBook-Pro-2.local",
		"port" : 27017,
		"version" : "3.4.1",
		"gitVersion" : "5e103c4f5583e2566a45d740225dc250baacfbd7"
	},
	"ok" : 1
}

I'm completely confused by the last one - it's uncovered compared to the one above but I don't see why it would use a:1,b:1 rather than a:1,c:1 which is "just as good" but is partial index with expression match and hence a smaller index and should be preferred.



 Comments   
Comment by David Storch [ 06/Jan/17 ]

Ah, I also see that partial index selection would allow this to be a covered index query.

That clearly seems to be an error - we should prefer a covered plan to non-covered, yes?

The plan ranker has tie-breaking logic which should cause us to always prefer a covered plan to a non-covered plan in the case of ties. If we are preferring a non-covered plan, all else being equal in terms of our estimate of plan cost, then this is definitively a bug. However, I don't see evidence of this occurring in the examples above. Specifically, in the last case, neither plan is covered (you can see this in the explain since both plans have a FETCH stage).

I expect smaller index to be more likely to fit in RAM.

We could consider adding a new tie-breaker that favors partial indexes in the case of ties, though I'd be hesitant to make this change without evidence that this will have a net positive effect on query optimization. Since we don't have a cost model, preferring a partial index would just be a guess about which index scan will have lower I/O costs.

Comment by Asya Kamsky [ 04/Jan/17 ]

edit doing more research - this may be related to comment in SERVER-26580 and if so I will either close this ticket or re-purpose it for part of that ticket.

Comment by Asya Kamsky [ 04/Jan/17 ]

Ah, I also see that partial index selection would allow this to be a covered index query.

That clearly seems to be an error - we should prefer a covered plan to non-covered, yes?

Comment by Asya Kamsky [ 04/Jan/17 ]

I expect smaller index to be more likely to fit in RAM.

I suppose I should do some quick estimates/measures, but in cases where a partial index exists and a full index exists, I would think it would be preferable to use partial index, but I guess I need more data to back that up.

Comment by Charlie Swanson [ 03/Jan/17 ]

asya why do you expect a smaller index to be faster? Both indexes being considered in the last case are equally selective. Did you mean that a partial index may be more likely to be in memory?

Right now we have no planning logic that would attempt to prefer a partial index for ranking purposes.

Generated at Thu Feb 08 04:15:26 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.