[SERVER-7354] Index with enbedded document Created: 15/Oct/12  Updated: 15/Feb/13  Resolved: 15/Oct/12

Status: Closed
Project: Core Server
Component/s: Index Maintenance, Performance
Affects Version/s: 2.2.0
Fix Version/s: None

Type: Improvement Priority: Major - P3
Reporter: Lucas Pouzac Assignee: Aaron Staple
Resolution: Done Votes: 0
Labels: indexing, performance
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified
Environment:

Ubuntu 12.04 LTS


Issue Links:
Related
related to SERVER-7509 Combine index ranges for dotted field... Closed
Participants:

 Description   

Explain function show a difference of index's usage with embedded document. nscanned is increase with enbedded document.

Example :

Embedded document

 
db.test.insert({"code" : "000001", doc : {"code1" : "000001", "type" : [ "1" ,"3" ]}});
db.test.insert({"code" : "000002", doc : {"code1" : "000001", "type" : [ "1" ]}});
db.test.insert({"code" : "000003", doc : {"code1" : "000001", "type" : [ "1", "2"] }});
db.test.insert({"code" : "000004", doc : {"code1" : "000001", "type" : [ "2" ]}});
db.test.insert({"code" : "000005", doc : {"code1" : "000001", "type" : [ ]}});
db.test.insert({"code" : "000006", doc : {"code1" : "000001", "type" : [ "3" ]}});
db.test.insert({"code" : "000007", doc : {"code1" : "000001" }});
db.test.insert({"code" : "000001", doc : {"code1" : "000002", "type" : [ "1" ,"3" ]}});
db.test.insert({"code" : "000002", doc : {"code1" : "000002", "type" : [ "1" ]}});
db.test.insert({"code" : "000003", doc : {"code1" : "000002", "type" : [ "1", "2"] }});
db.test.insert({"code" : "000004", doc : {"code1" : "000002", "type" : [ "2" ]}});
db.test.insert({"code" : "000005", doc : {"code1" : "000002", "type" : [ ]}});
db.test.insert({"code" : "000006", doc : {"code1" : "000002", "type" : [ "3" ]}});
db.test.insert({"code" : "000007", doc : {"code1" : "000002" }});
db.test.insert({"code" : "000001", doc : {"code1" : "000003", "type" : [ "1" ,"3" ]}});
db.test.insert({"code" : "000002", doc : {"code1" : "000003", "type" : [ "1" ]}});
db.test.insert({"code" : "000003", doc : {"code1" : "000003", "type" : [ "1", "2"] }});
db.test.insert({"code" : "000004", doc : {"code1" : "000003", "type" : [ "2" ]}});
db.test.insert({"code" : "000005", doc : {"code1" : "000003", "type" : [ ]}});
db.test.insert({"code" : "000006", doc : {"code1" : "000003", "type" : [ "3" ]}});
db.test.insert({"code" : "000007", doc : {"code1" : "000003" }});
 
db.test.ensureIndex( { "doc.code1": 1, "doc.type": -1, "code": -1 }, { unique: false, sparse: false, background: true, name: "index_test"} )
 
db.test.find( { "$or":[ { "doc.code1" : "000001" }, { "doc.code1" : "000003" } ], "doc.type" : "2" } ).explain()
{
	"clauses" : [
		{
			"cursor" : "BtreeCursor index_test",
			"isMultiKey" : true,
			"n" : 2,
			"nscannedObjects" : 9,
			"nscanned" : 9,
			"nscannedObjectsAllPlans" : 9,
			"nscannedAllPlans" : 9,
			"scanAndOrder" : false,
			"indexOnly" : false,
			"nYields" : 0,
			"nChunkSkips" : 0,
			"millis" : 0,
			"indexBounds" : {
				"doc.code1" : [
					[
						"000001",
						"000001"
					]
				],
				"doc.type" : [
					[
						{
							"$maxElement" : 1
						},
						{
							"$minElement" : 1
						}
					]
				],
				"code" : [
					[
						{
							"$maxElement" : 1
						},
						{
							"$minElement" : 1
						}
					]
				]
			}
		},
		{
			"cursor" : "BtreeCursor index_test",
			"isMultiKey" : true,
			"n" : 2,
			"nscannedObjects" : 9,
			"nscanned" : 9,
			"nscannedObjectsAllPlans" : 9,
			"nscannedAllPlans" : 9,
			"scanAndOrder" : false,
			"indexOnly" : false,
			"nYields" : 0,
			"nChunkSkips" : 0,
			"millis" : 0,
			"indexBounds" : {
				"doc.code1" : [
					[
						"000003",
						"000003"
					]
				],
				"doc.type" : [
					[
						{
							"$maxElement" : 1
						},
						{
							"$minElement" : 1
						}
					]
				],
				"code" : [
					[
						{
							"$maxElement" : 1
						},
						{
							"$minElement" : 1
						}
					]
				]
			}
		}
	],
	"n" : 4,
	"nscannedObjects" : 18,
	"nscanned" : 18,
	"nscannedObjectsAllPlans" : 18,
	"nscannedAllPlans" : 18,
	"millis" : 0,
	"server" : "xxx:27017"
}

Not embedded document

 
db.test2.insert({"code" : "000001", "code1" : "000001", "type" : [ "1" ,"3" ]});
db.test2.insert({"code" : "000002", "code1" : "000001", "type" : [ "1" ]});
db.test2.insert({"code" : "000003", "code1" : "000001", "type" : [ "1", "2"] });
db.test2.insert({"code" : "000004", "code1" : "000001", "type" : [ "2" ]});
db.test2.insert({"code" : "000005", "code1" : "000001", "type" : [ ]});
db.test2.insert({"code" : "000006", "code1" : "000001", "type" : [ "3" ]});
db.test2.insert({"code" : "000007", "code1" : "000001" });
db.test2.insert({"code" : "000001", "code1" : "000002", "type" : [ "1" ,"3" ]});
db.test2.insert({"code" : "000002", "code1" : "000002", "type" : [ "1" ]});
db.test2.insert({"code" : "000003", "code1" : "000002", "type" : [ "1", "2"] });
db.test2.insert({"code" : "000004", "code1" : "000002", "type" : [ "2" ]});
db.test2.insert({"code" : "000005", "code1" : "000002", "type" : [ ]});
db.test2.insert({"code" : "000006", "code1" : "000002", "type" : [ "3" ]});
db.test2.insert({"code" : "000007", "code1" : "000002" });
db.test2.insert({"code" : "000001", "code1" : "000003", "type" : [ "1" ,"3" ]});
db.test2.insert({"code" : "000002", "code1" : "000003", "type" : [ "1" ]});
db.test2.insert({"code" : "000003", "code1" : "000003", "type" : [ "1", "2"] });
db.test2.insert({"code" : "000004", "code1" : "000003", "type" : [ "2" ]});
db.test2.insert({"code" : "000005", "code1" : "000003", "type" : [ ]});
db.test2.insert({"code" : "000006", "code1" : "000003", "type" : [ "3" ]});
db.test2.insert({"code" : "000007", "code1" : "000003" });
 
db.test2.ensureIndex( { "code1": 1, "type": -1, "code": -1 }, { unique: false, sparse: false, background: true, name: "index_test2"} )
 
db.test2.find( { "$or":[ { "code1" : "000001" }, { "code1" : "000003" } ], "type" : "2" } ).explain()
{
	"clauses" : [
		{
			"cursor" : "BtreeCursor index_test2",
			"isMultiKey" : true,
			"n" : 2,
			"nscannedObjects" : 2,
			"nscanned" : 2,
			"nscannedObjectsAllPlans" : 2,
			"nscannedAllPlans" : 2,
			"scanAndOrder" : false,
			"indexOnly" : false,
			"nYields" : 0,
			"nChunkSkips" : 0,
			"millis" : 0,
			"indexBounds" : {
				"code1" : [
					[
						"000001",
						"000001"
					]
				],
				"type" : [
					[
						"2",
						"2"
					]
				],
				"code" : [
					[
						{
							"$maxElement" : 1
						},
						{
							"$minElement" : 1
						}
					]
				]
			}
		},
		{
			"cursor" : "BtreeCursor index_test2",
			"isMultiKey" : true,
			"n" : 2,
			"nscannedObjects" : 2,
			"nscanned" : 2,
			"nscannedObjectsAllPlans" : 2,
			"nscannedAllPlans" : 2,
			"scanAndOrder" : false,
			"indexOnly" : false,
			"nYields" : 0,
			"nChunkSkips" : 0,
			"millis" : 0,
			"indexBounds" : {
				"code1" : [
					[
						"000003",
						"000003"
					]
				],
				"type" : [
					[
						"2",
						"2"
					]
				],
				"code" : [
					[
						{
							"$maxElement" : 1
						},
						{
							"$minElement" : 1
						}
					]
				]
			}
		}
	],
	"n" : 4,
	"nscannedObjects" : 4,
	"nscanned" : 4,
	"nscannedObjectsAllPlans" : 4,
	"nscannedAllPlans" : 4,
	"millis" : 0,
	"server" : "xxx:27017"
}



 Comments   
Comment by Lucas Pouzac [ 30/Oct/12 ]

Ok, I open a new ticket : SERVER-7509

Comment by Aaron Staple [ 24/Oct/12 ]

Which ticket do you mean? If you'd like us to have a ticket for optimizing the dotted case you mentioned, I should probably create a new ticket for that. Does that work?

Comment by Lucas Pouzac [ 24/Oct/12 ]

Thanks for tour answer.

You could re-open this ticket please?

Comment by Aaron Staple [ 24/Oct/12 ]

Hi Lucas,

Per your question about the optimization not working when you nest your field an additional level deeper - The reason is that there are some cases where the SERVER-3104 optimization will not work correctly when there are dotted fields within a $elemMatch expression. For the time being, the optimization is prevented in all cases with dotted field names.

Comment by Lucas Pouzac [ 17/Oct/12 ]

Hi Aaron,

Below, the test with the corrections you've made. The data has been changed.
Functionally, queries with "$elemMatch" are correct while others are not because only 4 results must match.
Index works correctly on the latest version of mongodb.

db.test.insert({"code" : "000001", doc : [{"code1" : "000001", "type" : [ "1" ,"3" ]}]});
db.test.insert({"code" : "000002", doc : [{"code1" : "000001", "type" : [ "1" ]}]});
db.test.insert({"code" : "000003", doc : [{"code1" : "000001", "type" : [ "1", "2"] }]});
db.test.insert({"code" : "000004", doc : [{"code1" : "000001", "type" : [ "2" ]}, {"code1" : "000002", "type" : [ "3" ]} ]});
db.test.insert({"code" : "000005", doc : [{"code1" : "000001", "type" : [ ]}]});
db.test.insert({"code" : "000006", doc : [{"code1" : "000001", "type" : [ "3" ]}]});
db.test.insert({"code" : "000007", doc : [{"code1" : "000001" }]});
db.test.insert({"code" : "000001", doc : [{"code1" : "000002", "type" : [ "1" ,"3" ]}]});
db.test.insert({"code" : "000002", doc : [{"code1" : "000002", "type" : [ "1" ]}]});
db.test.insert({"code" : "000003", doc : [{"code1" : "000002", "type" : [ "1", "2"] }]});
db.test.insert({"code" : "000004", doc : [{"code1" : "000002", "type" : [ "2" ]}, {"code1" : "000001", "type" : [ "3" ]} ]});
db.test.insert({"code" : "000005", doc : [{"code1" : "000002", "type" : [ ]}]});
db.test.insert({"code" : "000006", doc : [{"code1" : "000002", "type" : [ "3" ]}]});
db.test.insert({"code" : "000007", doc : [{"code1" : "000002" }]});
db.test.insert({"code" : "000001", doc : [{"code1" : "000003", "type" : [ "1" ,"3" ]}]});
db.test.insert({"code" : "000002", doc : [{"code1" : "000003", "type" : [ "1" ]}]});
db.test.insert({"code" : "000003", doc : [{"code1" : "000003", "type" : [ "1", "2"] }]});
db.test.insert({"code" : "000004", doc : [{"code1" : "000003", "type" : [ "2" ]}]});
db.test.insert({"code" : "000005", doc : [{"code1" : "000003", "type" : [ ]}]});
db.test.insert({"code" : "000006", doc : [{"code1" : "000003", "type" : [ "3" ]}]});
db.test.insert({"code" : "000007", doc : [{"code1" : "000003" }]});
 
db.test.ensureIndex( { "doc.code1": 1, "doc.type": -1, "code": -1 }, { unique: false, sparse: false, background: true, name: "index_test"} )
 
db.test.find( { "$or":[ { "doc.code1" : "000001" }, { "doc.code1" : "000003" } ], "doc.type" : "2" } ).count()
5
db.test.find( { "doc.code1" : { $in:[ "000001", "000003" ] }, "doc.type" : "2" } ).count()
5
db.test.find( { "doc" : { "$elemMatch" : { "code1" : { "$in" :[ "000001", "000003" ] }, "type" : "2" } } } ).count()
4
 
db.test.find( { "doc" : { "$elemMatch" : { "code1" : { "$in" :[ "000001", "000003" ] }, "type" : "2" } } } ).explain()
{
	"cursor" : "BtreeCursor index_test multi",
	"isMultiKey" : true,
	"n" : 4,
	"nscannedObjects" : 4,
	"nscanned" : 6,
	"nscannedObjectsAllPlans" : 10,
	"nscannedAllPlans" : 12,
	"scanAndOrder" : false,
	"indexOnly" : false,
	"nYields" : 0,
	"nChunkSkips" : 0,
	"millis" : 0,
	"indexBounds" : {
		"doc.code1" : [
			[
				"000001",
				"000001"
			],
			[
				"000003",
				"000003"
			]
		],
		"doc.type" : [
			[
				"2",
				"2"
			]
		],
		"code" : [
			[
				{
					"$maxElement" : 1
				},
				{
					"$minElement" : 1
				}
			]
		]
	},
	"server" : "xxx:27017"
}

By cons, if I add a sub-level of the attribute "type" (which is my final document and can not be changed), the index is not working properly.

db.dropDatabase()
db.test.insert({"code" : "000001", doc : [ {"code1" : "000001", "type" : { "subtype" : [ "1" ,"3" ]}}]});
db.test.insert({"code" : "000002", doc : [ {"code1" : "000001", "type" : { "subtype" : [ "1" ]}}]});
db.test.insert({"code" : "000003", doc : [ {"code1" : "000001", "type" : { "subtype" : [ "1", "2"] }}]});
db.test.insert({"code" : "000004", doc : [ {"code1" : "000001", "type" : { "subtype" : [ "2" ]}}, {"code1" : "000002", "type" : { "subtype" : [ "3" ]}} ]});
db.test.insert({"code" : "000005", doc : [ {"code1" : "000001", "type" : { "subtype" : [ ]}}]});
db.test.insert({"code" : "000006", doc : [ {"code1" : "000001", "type" : { "subtype" : [ "3" ]}}]});
db.test.insert({"code" : "000007", doc : [ {"code1" : "000001" }]});
 
db.test.insert({"code" : "000001", doc : [ {"code1" : "000002", "type" : { "subtype" : [ "1" ,"3" ]}}]});
db.test.insert({"code" : "000002", doc : [ {"code1" : "000002", "type" : { "subtype" : [ "1" ]}}]});
db.test.insert({"code" : "000003", doc : [ {"code1" : "000002", "type" : { "subtype" : [ "1", "2"] }}]});
db.test.insert({"code" : "000004", doc : [ {"code1" : "000002", "type" : { "subtype" : [ "2" ]}}, {"code1" : "000001", "type" : { "subtype" : [ "3" ]}} ]});
db.test.insert({"code" : "000005", doc : [ {"code1" : "000002", "type" : { "subtype" : [ ]}}]});
db.test.insert({"code" : "000006", doc : [ {"code1" : "000002", "type" : { "subtype" : [ "3" ]}}]});
db.test.insert({"code" : "000007", doc : [ {"code1" : "000002" }]});
 
db.test.insert({"code" : "000001", doc : [ {"code1" : "000003", "type" : { "subtype" : [ "1" ,"3" ]}}]});
db.test.insert({"code" : "000002", doc : [ {"code1" : "000003", "type" : { "subtype" : [ "1" ]}}]});
db.test.insert({"code" : "000003", doc : [ {"code1" : "000003", "type" : { "subtype" : [ "1", "2"] }}]});
db.test.insert({"code" : "000004", doc : [ {"code1" : "000003", "type" : { "subtype" : [ "2" ]}}]});
db.test.insert({"code" : "000005", doc : [ {"code1" : "000003", "type" : { "subtype" : [ ]}}]});
db.test.insert({"code" : "000006", doc : [ {"code1" : "000003", "type" : { "subtype" : [ "3" ]}}]});
db.test.insert({"code" : "000007", doc : [ {"code1" : "000003" }]});
db.test.ensureIndex( { "doc.code1": 1, "doc.type.subtype": -1, "code": -1 }, { unique: false, sparse: false, background: true, name: "index_test"} )
 
db.test.find( { "doc" : { "$elemMatch" : { "code1" : { "$in" :[ "000001", "000003" ] }, "type.subtype" : "2" } } } ).explain()
{
	"cursor" : "BtreeCursor index_test multi",
	"isMultiKey" : true,
	"n" : 4,
	"nscannedObjects" : 19,
	"nscanned" : 20,
	"nscannedObjectsAllPlans" : 39,
	"nscannedAllPlans" : 40,
	"scanAndOrder" : false,
	"indexOnly" : false,
	"nYields" : 0,
	"nChunkSkips" : 0,
	"millis" : 0,
	"indexBounds" : {
		"doc.code1" : [
			[
				"000001",
				"000001"
			],
			[
				"000003",
				"000003"
			]
		],
		"doc.type.subtype" : [
			[
				{
					"$maxElement" : 1
				},
				{
					"$minElement" : 1
				}
			]
		],
		"code" : [
			[
				{
					"$maxElement" : 1
				},
				{
					"$minElement" : 1
				}
			]
		]
	},
	"server" : "xxx:27017"
}

Comment by Aaron Staple [ 16/Oct/12 ]

Hi Lucas,

Sorry, I misread the example with the difference between code and code1.

To get the benefit of the SERVER-3104 fix,you would need to change your schema slightly to this:

{"code" : "000001", doc : [

{"code1" : "000001", "type" : [ "1" ,"3" ]}

]}

and your query to this

{ doc: { $elemMatch: { code1:

{ $in:[ "000001", "000003" ] }

, type:"2" } } }

Also, the SERVER-3104 fix is only available in the nightly development build right now.

Comment by Lucas Pouzac [ 16/Oct/12 ]

Hi Aaron,

I try with 2.2.0 and the development version (mongodb-linux-x86_64-2012-10-15) and the result with $in operande is worse for all queries.

Embedded document

db.test.find( { "doc.code1" : { $in:[ "000001", "000003" ] }, "doc.type" : "2" } ).explain()
{
	"cursor" : "BtreeCursor index_test multi",
	"isMultiKey" : true,
	"n" : 4,
	"nscannedObjects" : 18,
	"nscanned" : 19,
	"nscannedObjectsAllPlans" : 37,
	"nscannedAllPlans" : 38,
	"scanAndOrder" : false,
	"indexOnly" : false,
	"nYields" : 0,
	"nChunkSkips" : 0,
	"millis" : 0,
	"indexBounds" : {
		"doc.code1" : [
			[
				"000001",
				"000001"
			],
			[
				"000003",
				"000003"
			]
		],
		"doc.type" : [
			[
				{
					"$maxElement" : 1
				},
				{
					"$minElement" : 1
				}
			]
		],
		"code" : [
			[
				{
					"$maxElement" : 1
				},
				{
					"$minElement" : 1
				}
			]
		]
	},
	"server" : "xxx:27017"
}

Not enbedded document

db.test2.find( { "code1" : { $in:[ "000001", "000003" ] }, "type" : "2" } ).explain()
{
	"cursor" : "BtreeCursor index_test2 multi",
	"isMultiKey" : true,
	"n" : 4,
	"nscannedObjects" : 4,
	"nscanned" : 6,
	"nscannedObjectsAllPlans" : 10,
	"nscannedAllPlans" : 12,
	"scanAndOrder" : false,
	"indexOnly" : false,
	"nYields" : 0,
	"nChunkSkips" : 0,
	"millis" : 0,
	"indexBounds" : {
		"code1" : [
			[
				"000001",
				"000001"
			],
			[
				"000003",
				"000003"
			]
		],
		"type" : [
			[
				"2",
				"2"
			]
		],
		"code" : [
			[
				{
					"$maxElement" : 1
				},
				{
					"$minElement" : 1
				}
			]
		]
	},
	"server" : "xxx:27017"
}

In resume

  Embedded $or Embedded $in Not embbeded $or Not embedded $in
n 4 4 4 4
nscannedObjects 18 18 4 4
nscanned 18 19 4 6
nscannedObjectsAllPlans 18 37 4 10
nscannedAllPlans 18 38 4 12

I open a discussion : https://groups.google.com/forum/?hl=fr&fromgroups=#!topic/mongodb-user/aruciRY9Czw

Comment by Aaron Staple [ 15/Oct/12 ]

Hi Lucas,

The indexing behavior here is described in SERVER-3104. You can improve the index bounds by using $in instead of $or.

{ "doc.code1" :

{ $in:[ "000001", "000003" ] }

, "doc.type" : "2" }

Generated at Thu Feb 08 03:14:17 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.