[SERVER-13205] Query access planner merges 2d leaves incorrectly inside OR Created: 14/Mar/14  Updated: 11/Jul/16  Resolved: 27/Mar/14

Status: Closed
Project: Core Server
Component/s: Geo, Querying
Affects Version/s: 2.6.0-rc1
Fix Version/s: 2.6.0-rc3

Type: Bug Priority: Major - P3
Reporter: Kristian Picon Assignee: David Storch
Resolution: Done Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Operating System: ALL
Participants:

 Description   

The $or query with 2d geo children (noted below) is missing documents from the result set:

> db.foo.insert([{loc:[10,10]},{loc:[15,15]},{loc:[20,20]}])
BulkWriteResult({
	"writeErrors" : [ ],
	"writeConcernErrors" : [ ],
	"nInserted" : 3,
	"nUpserted" : 0,
	"nMatched" : 0,
	"nModified" : 0,
	"nRemoved" : 0,
	"upserted" : [ ]
})
> db.foo.ensureIndex({loc:"2d"})
{
	"createdCollectionAutomatically" : false,
	"numIndexesBefore" : 1,
	"numIndexesAfter" : 2,
	"ok" : 1
}
> var circle10 = {loc:{$geoWithin:{$centerSphere:[[10,10],0.2]}}}
> var circle20 = {loc:{$geoWithin:{$centerSphere:[[20,20],0.2]}}}
> db.foo.find(circle10)
{ "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.find(circle20)
{ "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.find({$and:[circle10,circle20]})
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.find({$or:[circle10,circle20]}) // wrong! should return all 3 documents!
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.dropIndexes()
{
	"nIndexesWas" : 2,
	"msg" : "non-_id indexes dropped for collection",
	"ok" : 1
}
> db.foo.find({$or:[circle10,circle20]}) // that's better
{ "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }

The cause seems to be an issue with merging 2d leaves under an OR.

Full explain for the $or query:

> db.foo.find({$or:[circle10,circle20]}).explain(true)
{
	"cursor" : "GeoBrowse-circle",
	"isMultiKey" : false,
	"n" : 1,
	"nscannedObjects" : 2,
	"nscanned" : 2,
	"nscannedObjectsAllPlans" : 2,
	"nscannedAllPlans" : 2,
	"scanAndOrder" : false,
	"indexOnly" : false,
	"nYields" : 0,
	"nChunkSkips" : 0,
	"millis" : 1,
	"indexBounds" : {
		"loc" : [
			[
				[
					0,
					0
				],
				[
					22.5,
					22.5
				]
			],
			[
				[
					-5.625,
					-5.625
				],
				[
					0,
					0
				]
			],
			[
				[
					-5.625,
					0
				],
				[
					0,
					5.625
				]
			],
			[
				[
					-5.625,
					5.625
				],
				[
					0,
					11.25
				]
			],
			[
				[
					-5.625,
					11.25
				],
				[
					0,
					16.875
				]
			],
			[
				[
					-5.625,
					16.875
				],
				[
					0,
					22.5
				]
			],
			[
				[
					16.875,
					-5.625
				],
				[
					22.5,
					0
				]
			],
			[
				[
					11.25,
					-5.625
				],
				[
					16.875,
					0
				]
			],
			[
				[
					5.625,
					-5.625
				],
				[
					11.25,
					0
				]
			],
			[
				[
					0,
					-5.625
				],
				[
					5.625,
					0
				]
			]
		]
	},
	"allPlans" : [
		{
			"cursor" : "GeoBrowse-circle",
			"isMultiKey" : false,
			"n" : 1,
			"nscannedObjects" : 2,
			"nscanned" : 2,
			"scanAndOrder" : false,
			"indexOnly" : false,
			"nChunkSkips" : 0,
			"indexBounds" : {
				"loc" : [
					[
						[
							0,
							0
						],
						[
							22.5,
							22.5
						]
					],
					[
						[
							-5.625,
							-5.625
						],
						[
							0,
							0
						]
					],
					[
						[
							-5.625,
							0
						],
						[
							0,
							5.625
						]
					],
					[
						[
							-5.625,
							5.625
						],
						[
							0,
							11.25
						]
					],
					[
						[
							-5.625,
							11.25
						],
						[
							0,
							16.875
						]
					],
					[
						[
							-5.625,
							16.875
						],
						[
							0,
							22.5
						]
					],
					[
						[
							16.875,
							-5.625
						],
						[
							22.5,
							0
						]
					],
					[
						[
							11.25,
							-5.625
						],
						[
							16.875,
							0
						]
					],
					[
						[
							5.625,
							-5.625
						],
						[
							11.25,
							0
						]
					],
					[
						[
							0,
							-5.625
						],
						[
							5.625,
							0
						]
					]
				]
			}
		}
	],
	"server" : "Rassi-MacBook-Pro.local:27017",
	"filterSet" : false,
	"stats" : {
		"type" : "KEEP_MUTATIONS",
		"works" : 4,
		"yields" : 0,
		"unyields" : 0,
		"invalidates" : 0,
		"advanced" : 1,
		"needTime" : 2,
		"needFetch" : 0,
		"isEOF" : 1,
		"children" : [
			{
				"type" : "FETCH",
				"works" : 4,
				"yields" : 0,
				"unyields" : 0,
				"invalidates" : 0,
				"advanced" : 1,
				"needTime" : 2,
				"needFetch" : 0,
				"isEOF" : 1,
				"alreadyHasObj" : 2,
				"forcedFetches" : 0,
				"matchTested" : 1,
				"children" : [
					{
						"type" : "GEO_2D",
						"works" : 2,
						"yields" : 0,
						"unyields" : 0,
						"invalidates" : 0,
						"advanced" : 2,
						"needTime" : 0,
						"needFetch" : 0,
						"isEOF" : 1,
						"geometryType" : "circle",
						"field" : "loc",
						"boundsVerbose" : [
							"(0,0) -->> (22.5,22.5)",
							"(-5.625,-5.625) -->> (0,0)",
							"(-5.625,0) -->> (0,5.625)",
							"(-5.625,5.625) -->> (0,11.25)",
							"(-5.625,11.25) -->> (0,16.875)",
							"(-5.625,16.875) -->> (0,22.5)",
							"(16.875,-5.625) -->> (22.5,0)",
							"(11.25,-5.625) -->> (16.875,0)",
							"(5.625,-5.625) -->> (11.25,0)",
							"(0,-5.625) -->> (5.625,0)"
						],
						"children" : [ ]
					}
				]
			}
		]
	}
}
>

Verbose query log output for the $or query:

2014-03-26T21:26:03.490-0400 [conn1] [QLOG] Running query:
ns=test.foo limit=0 skip=0
Tree: $or
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
=============================
Options = INDEX_INTERSECTION KEEP_MUTATIONS
Canonical query:
ns=test.foo limit=0 skip=0
Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
=============================
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || First: 0 notFirst: full path: loc
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
[Node #0]: predicate
	first indices: [0]
	pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
	indexToAssign: 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
FETCH
---fetched = 1
---sortedByDiskLoc = 0
---getSort = []
---Child:
------KEEP_MUTATIONS
---------filter:
                GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
---------fetched = 0
---------sortedByDiskLoc = 0
---------getSort = []
---------Child:
------------GEO_2D
---------------keyPattern = { loc: "2d" }
---------------fetched = 0
---------------sortedByDiskLoc = 0
---------------getSort = []
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] No cache data for subchild ns=test.foo limit=0 skip=0
Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
=============================
Options = INDEX_INTERSECTION KEEP_MUTATIONS
Canonical query:
ns=test.foo limit=0 skip=0
Tree: $or
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
=============================
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
$or
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || First: 0 notFirst: full path: loc
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }  || First: 0 notFirst: full path: loc
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 2
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 1
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
[Node #0]: predicate
	first indices: [0]
	pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
	indexToAssign: 0
[Node #1]: predicate
	first indices: [0]
	pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
	indexToAssign: 0
[Node #2]: ALL OF: [ 0 1 ]
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
$or
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
    GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
KEEP_MUTATIONS
---filter:
        $or
            GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
            GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
---fetched = 1
---sortedByDiskLoc = 0
---getSort = []
---Child:
------FETCH
---------filter:
                GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
---------fetched = 1
---------sortedByDiskLoc = 0
---------getSort = []
---------Child:
------------GEO_2D
---------------keyPattern = { loc: "2d" }
---------------fetched = 0
---------------sortedByDiskLoc = 0
---------------getSort = []
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
2014-03-26T21:26:03.492-0400 [conn1] [QLOG] Not caching runner but returning 1 results.

Note that the solution from the verbose query log specifies only the index bounds for the [10,10] circle, and then adds a filter on top for the [20,20] circle. Instead, it should be applying a union of the bounds, and then adding the entire $or filter on top (needed since centerSphere bounds are inexact). I infer that the access planner is incorrectly applying AND merging logic when it should be applying OR merging logic.

Original report:

Summary: Wrong cursor used for geo $or

Description: When there is a 2d index present on a collection and an $or query is performed, the wrong cursor is sometimes selected. The result is that not all the relevant documents are actually returned. This was encountered when trying to $or a $geoWithin $centerSphere, and a $geoWithin $box. The database attempted to use the GeoBrowse-box cursor, giving incorrect results.

Steps To Reproduce: Make a collection with a 2d index and some datapoints, then run:

db.datapoints.find({
    "$or": [{
        "location": {
            "$geoWithin": {
                "$centerSphere": [
                    [11.77734375, 47.27922900257082], 0.27644030243339723
                ]
            }
        }
    }, {
        "location": {
            "$geoWithin": {
                "$box": [
                    [-90.87890625, 32.627420045483085],
                    [-66.26953125, 45.56450582712792]
                ]
            }
        }
    }]
}).explain()



 Comments   
Comment by Githook User [ 27/Mar/14 ]

Author:

{u'username': u'dstorch', u'name': u'David Storch', u'email': u'david.storch@10gen.com'}

Message: SERVER-13205 fix merging of special leaf nodes in access planner

(cherry picked from commit 719134aa7985c0a697f199fc78e323d04e3a65ad)
Branch: v2.6
https://github.com/mongodb/mongo/commit/8bb2e09856750c7e5dd40713bc2b6ed100db4355

Comment by Githook User [ 27/Mar/14 ]

Author:

{u'username': u'dstorch', u'name': u'David Storch', u'email': u'david.storch@10gen.com'}

Message: SERVER-13205 fix merging of special leaf nodes in access planner
Branch: master
https://github.com/mongodb/mongo/commit/719134aa7985c0a697f199fc78e323d04e3a65ad

Comment by J Rassi [ 27/Mar/14 ]

Changed ticket summary, replaced description.

Generated at Thu Feb 08 03:30:58 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.