[SERVER-37780] Query index bounds are incorrect when _id used in compound index Created: 26/Oct/18  Updated: 29/Oct/18  Resolved: 29/Oct/18

Status: Closed
Project: Core Server
Component/s: Index Maintenance, Querying
Affects Version/s: 3.6.8
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Mike Cantrell Assignee: Danny Hatcher (Inactive)
Resolution: Done Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Operating System: ALL
Steps To Reproduce:

Import the data from the sample export

gunzip -c records.json.gz |mongoimport --db index-test --collection workspaceFile

Create indexes

    
db.workspaceFile.createIndex(
  {
            "_id" : 1, // <---- this seems to cause a problem, see executionStats below
            "workspaceId" : 1,
            "parent._id" : 1
  },
  {name:'testIndexA'}
);
db.workspaceFile.createIndex(
  {
            "workspaceId" : 1,
            "parent._id" : 1
  },
  {name:'testIndexB'}
);   

 

Define the query parameters

 
var workspaceId = "ws-3";
var fileId = ObjectId("5bd33d003bff4aac38456157");
var fileFamilyQuery = {
  "$and": [{
      "workspaceId": workspaceId,
      "$or": [{
          "_id": fileId,
      }, {
          "parent._id": fileId
      }]
  }]
};

 

Run query against testIndexA

db.workspaceFile.find(fileFamilyQuery).sort({"_id":-1}).hint("testIndexA").explain('executionStats')

Inspect the indexBounds and see that the parameters are not set and the number of keys examined is too high

  
"indexBounds" : {
	"_id" : [
		"[MaxKey, MinKey]"
	],
	"workspaceId" : [
		"[MaxKey, MinKey]"
	],
	"parent._id" : [
		"[MaxKey, MinKey]"
	]
},
"keysExamined" : 500005,

Run against testIndexB

 db.workspaceFile.find(fileFamilyQuery).sort({"_id":-1}).hint("testIndexB").explain('executionStats')

Inspect the indexBounds and see the parameters are partially at least partially set (but still missing parent._id)

"indexBounds" : {
  "workspaceId" : [
    "[\"ws-3\", \"ws-3\"]"
  ],
  "parent._id" : [
    "[MinKey, MaxKey]"
  ]
},
"keysExamined" : 2, 

 

 

Participants:

 Description   

When I include _id inside a compound index, the query doesn't appear to bind the proper values to the index bounds for the query plan. I'm unsure if this is a bug or something I'm missing. Related StackOverflow Question

I've saved some sample data to help reproduce for this issue available on Google Drive since it's too large for a Jira attachment.



 Comments   
Comment by Mike Cantrell [ 26/Oct/18 ]

That's very helpful! Thanks for the explanation. Please go ahead and close this and have a great weekend

Comment by Danny Hatcher (Inactive) [ 26/Oct/18 ]

Hello Mike,

Thank you for providing such detail; it has helped me understand where you are coming from. I believe the problem here is that the "hint" is overriding the optimal index selection.

When specifying a hint as part of the query process, the query will use that hinted index to satisfy the query. However, in cases with an $or clause, MongoDB can actually use multiple indexes to satisfy the multiple paths. If we run an explain against your data without hinting toward a specific index, we can see what I mean:

	"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 2,
		"executionTimeMillis" : 2,
		"totalKeysExamined" : 2,
		"totalDocsExamined" : 2,
		"executionStages" : {
			"stage" : "SORT",
			"nReturned" : 2,
			"executionTimeMillisEstimate" : 0,
			"works" : 8,
			"advanced" : 2,
			"needTime" : 5,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"sortPattern" : {
				"_id" : -1
			},
			"memUsage" : 602,
			"memLimit" : 33554432,
			"inputStage" : {
				"stage" : "SORT_KEY_GENERATOR",
				"nReturned" : 2,
				"executionTimeMillisEstimate" : 0,
				"works" : 5,
				"advanced" : 2,
				"needTime" : 2,
				"needYield" : 0,
				"saveState" : 0,
				"restoreState" : 0,
				"isEOF" : 1,
				"invalidates" : 0,
				"inputStage" : {
					"stage" : "FETCH",
					"filter" : {
						"workspaceId" : {
							"$eq" : "ws-3"
						}
					},
					"nReturned" : 2,
					"executionTimeMillisEstimate" : 0,
					"works" : 4,
					"advanced" : 2,
					"needTime" : 1,
					"needYield" : 0,
					"saveState" : 0,
					"restoreState" : 0,
					"isEOF" : 1,
					"invalidates" : 0,
					"docsExamined" : 2,
					"alreadyHasObj" : 0,
					"inputStage" : {
						"stage" : "OR",
						"nReturned" : 2,
						"executionTimeMillisEstimate" : 0,
						"works" : 4,
						"advanced" : 2,
						"needTime" : 1,
						"needYield" : 0,
						"saveState" : 0,
						"restoreState" : 0,
						"isEOF" : 1,
						"invalidates" : 0,
						"dupsTested" : 2,
						"dupsDropped" : 0,
						"recordIdsForgotten" : 0,
						"inputStages" : [
							{
								"stage" : "IXSCAN",
								"nReturned" : 1,
								"executionTimeMillisEstimate" : 0,
								"works" : 2,
								"advanced" : 1,
								"needTime" : 0,
								"needYield" : 0,
								"saveState" : 0,
								"restoreState" : 0,
								"isEOF" : 1,
								"invalidates" : 0,
								"keyPattern" : {
									"workspaceId" : 1,
									"parent._id" : 1
								},
								"indexName" : "testIndexB",
								"isMultiKey" : false,
								"multiKeyPaths" : {
									"workspaceId" : [ ],
									"parent._id" : [ ]
								},
								"isUnique" : false,
								"isSparse" : false,
								"isPartial" : false,
								"indexVersion" : 2,
								"direction" : "forward",
								"indexBounds" : {
									"workspaceId" : [
										"[\"ws-3\", \"ws-3\"]"
									],
									"parent._id" : [
										"[ObjectId('5bd33d003bff4aac38456157'), ObjectId('5bd33d003bff4aac38456157')]"
									]
								},
								"keysExamined" : 1,
								"seeks" : 1,
								"dupsTested" : 0,
								"dupsDropped" : 0,
								"seenInvalidated" : 0
							},
							{
								"stage" : "IXSCAN",
								"nReturned" : 1,
								"executionTimeMillisEstimate" : 0,
								"works" : 2,
								"advanced" : 1,
								"needTime" : 0,
								"needYield" : 0,
								"saveState" : 0,
								"restoreState" : 0,
								"isEOF" : 1,
								"invalidates" : 0,
								"keyPattern" : {
									"_id" : 1
								},
								"indexName" : "_id_",
								"isMultiKey" : false,
								"multiKeyPaths" : {
									"_id" : [ ]
								},
								"isUnique" : true,
								"isSparse" : false,
								"isPartial" : false,
								"indexVersion" : 2,
								"direction" : "forward",
								"indexBounds" : {
									"_id" : [
										"[ObjectId('5bd33d003bff4aac38456157'), ObjectId('5bd33d003bff4aac38456157')]"
									]
								},
								"keysExamined" : 1,
								"seeks" : 1,
								"dupsTested" : 0,
								"dupsDropped" : 0,
								"seenInvalidated" : 0
							}
						]
					}
				}
			}
		}
	},

Two different indexes are used to satisfy different aspects of the $or clause. As you can see, the bounds are tightly defined and very few keys are examined to return the result set.

But in the above example, your testIndexA isn't being used at all. Let's say that you really wanted those specific keys to be used in a single index but still be performant. The following should help accomplish that goal:

db.workspaceFile.createIndex({"workspaceId":1, "parent._id" : 1, "_id" : 1}, {name:'testIndexC'})

With this index created, the following plan occurs:

"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 2,
		"executionTimeMillis" : 2,
		"totalKeysExamined" : 2,
		"totalDocsExamined" : 2,
		"executionStages" : {
			"stage" : "FETCH",
			"filter" : {
				"workspaceId" : {
					"$eq" : "ws-3"
				}
			},
			"nReturned" : 2,
			"executionTimeMillisEstimate" : 0,
			"works" : 8,
			"advanced" : 2,
			"needTime" : 4,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"docsExamined" : 2,
			"alreadyHasObj" : 0,
			"inputStage" : {
				"stage" : "SORT_MERGE",
				"nReturned" : 2,
				"executionTimeMillisEstimate" : 0,
				"works" : 6,
				"advanced" : 2,
				"needTime" : 4,
				"needYield" : 0,
				"saveState" : 0,
				"restoreState" : 0,
				"isEOF" : 1,
				"invalidates" : 0,
				"sortPattern" : {
					"_id" : -1
				},
				"dupsTested" : 2,
				"dupsDropped" : 0,
				"inputStages" : [
					{
						"stage" : "IXSCAN",
						"nReturned" : 1,
						"executionTimeMillisEstimate" : 0,
						"works" : 2,
						"advanced" : 1,
						"needTime" : 0,
						"needYield" : 0,
						"saveState" : 0,
						"restoreState" : 0,
						"isEOF" : 1,
						"invalidates" : 0,
						"keyPattern" : {
							"workspaceId" : 1,
							"parent._id" : 1,
							"_id" : 1
						},
						"indexName" : "testIndexC",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"workspaceId" : [ ],
							"parent._id" : [ ],
							"_id" : [ ]
						},
						"isUnique" : false,
						"isSparse" : false,
						"isPartial" : false,
						"indexVersion" : 2,
						"direction" : "backward",
						"indexBounds" : {
							"workspaceId" : [
								"[\"ws-3\", \"ws-3\"]"
							],
							"parent._id" : [
								"[ObjectId('5bd33d003bff4aac38456157'), ObjectId('5bd33d003bff4aac38456157')]"
							],
							"_id" : [
								"[MaxKey, MinKey]"
							]
						},
						"keysExamined" : 1,
						"seeks" : 1,
						"dupsTested" : 0,
						"dupsDropped" : 0,
						"seenInvalidated" : 0
					},
					{
						"stage" : "IXSCAN",
						"nReturned" : 1,
						"executionTimeMillisEstimate" : 0,
						"works" : 2,
						"advanced" : 1,
						"needTime" : 0,
						"needYield" : 0,
						"saveState" : 0,
						"restoreState" : 0,
						"isEOF" : 1,
						"invalidates" : 0,
						"keyPattern" : {
							"_id" : 1
						},
						"indexName" : "_id_",
						"isMultiKey" : false,
						"multiKeyPaths" : {
							"_id" : [ ]
						},
						"isUnique" : true,
						"isSparse" : false,
						"isPartial" : false,
						"indexVersion" : 2,
						"direction" : "backward",
						"indexBounds" : {
							"_id" : [
								"[ObjectId('5bd33d003bff4aac38456157'), ObjectId('5bd33d003bff4aac38456157')]"
							]
						},
						"keysExamined" : 1,
						"seeks" : 1,
						"dupsTested" : 0,
						"dupsDropped" : 0,
						"seenInvalidated" : 0
					}
				]
			}
		}
	},

In this case, "testIndexC" is used to help fulfill the query. Because _id has been moved to the back of the index, the first two keys are the ones actually bound to find their matches for that part of the $or clause. When moving to the other side of the $or, the default _id index is used.

If you're having problems figuring out optimal index configurations, a good first step is simply moving keys around inside the index. As we've seen here, order definitely matters. Additionally, you can use db.setLogLevel(5, "query") to output the full query planner to the log.

Does all of the above make sense?

Thanks,

Danny

Generated at Thu Feb 08 04:47:00 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.