The $or query with 2d geo children (noted below) is missing documents from the result set:
> db.foo.insert([{loc:[10,10]},{loc:[15,15]},{loc:[20,20]}])
BulkWriteResult({
"writeErrors" : [ ],
"writeConcernErrors" : [ ],
"nInserted" : 3,
"nUpserted" : 0,
"nMatched" : 0,
"nModified" : 0,
"nRemoved" : 0,
"upserted" : [ ]
})
> db.foo.ensureIndex({loc:"2d"})
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
> var circle10 = {loc:{$geoWithin:{$centerSphere:[[10,10],0.2]}}}
> var circle20 = {loc:{$geoWithin:{$centerSphere:[[20,20],0.2]}}}
> db.foo.find(circle10)
{ "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.find(circle20)
{ "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.find({$and:[circle10,circle20]})
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.find({$or:[circle10,circle20]}) // wrong! should return all 3 documents!
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
> db.foo.dropIndexes()
{
"nIndexesWas" : 2,
"msg" : "non-_id indexes dropped for collection",
"ok" : 1
}
> db.foo.find({$or:[circle10,circle20]}) // that's better
{ "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
{ "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }
The cause seems to be an issue with merging 2d leaves under an OR.
Full explain for the $or query:
> db.foo.find({$or:[circle10,circle20]}).explain(true)
{
"cursor" : "GeoBrowse-circle",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 2,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 1,
"indexBounds" : {
"loc" : [
[
[
0,
0
],
[
22.5,
22.5
]
],
[
[
-5.625,
-5.625
],
[
0,
0
]
],
[
[
-5.625,
0
],
[
0,
5.625
]
],
[
[
-5.625,
5.625
],
[
0,
11.25
]
],
[
[
-5.625,
11.25
],
[
0,
16.875
]
],
[
[
-5.625,
16.875
],
[
0,
22.5
]
],
[
[
16.875,
-5.625
],
[
22.5,
0
]
],
[
[
11.25,
-5.625
],
[
16.875,
0
]
],
[
[
5.625,
-5.625
],
[
11.25,
0
]
],
[
[
0,
-5.625
],
[
5.625,
0
]
]
]
},
"allPlans" : [
{
"cursor" : "GeoBrowse-circle",
"isMultiKey" : false,
"n" : 1,
"nscannedObjects" : 2,
"nscanned" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
"loc" : [
[
[
0,
0
],
[
22.5,
22.5
]
],
[
[
-5.625,
-5.625
],
[
0,
0
]
],
[
[
-5.625,
0
],
[
0,
5.625
]
],
[
[
-5.625,
5.625
],
[
0,
11.25
]
],
[
[
-5.625,
11.25
],
[
0,
16.875
]
],
[
[
-5.625,
16.875
],
[
0,
22.5
]
],
[
[
16.875,
-5.625
],
[
22.5,
0
]
],
[
[
11.25,
-5.625
],
[
16.875,
0
]
],
[
[
5.625,
-5.625
],
[
11.25,
0
]
],
[
[
0,
-5.625
],
[
5.625,
0
]
]
]
}
}
],
"server" : "Rassi-MacBook-Pro.local:27017",
"filterSet" : false,
"stats" : {
"type" : "KEEP_MUTATIONS",
"works" : 4,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 2,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "FETCH",
"works" : 4,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 1,
"needTime" : 2,
"needFetch" : 0,
"isEOF" : 1,
"alreadyHasObj" : 2,
"forcedFetches" : 0,
"matchTested" : 1,
"children" : [
{
"type" : "GEO_2D",
"works" : 2,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 2,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"geometryType" : "circle",
"field" : "loc",
"boundsVerbose" : [
"(0,0) -->> (22.5,22.5)",
"(-5.625,-5.625) -->> (0,0)",
"(-5.625,0) -->> (0,5.625)",
"(-5.625,5.625) -->> (0,11.25)",
"(-5.625,11.25) -->> (0,16.875)",
"(-5.625,16.875) -->> (0,22.5)",
"(16.875,-5.625) -->> (22.5,0)",
"(11.25,-5.625) -->> (16.875,0)",
"(5.625,-5.625) -->> (11.25,0)",
"(0,-5.625) -->> (5.625,0)"
],
"children" : [ ]
}
]
}
]
}
}
>
Verbose query log output for the $or query:
2014-03-26T21:26:03.490-0400 [conn1] [QLOG] Running query:
ns=test.foo limit=0 skip=0
Tree: $or
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
=============================
Options = INDEX_INTERSECTION KEEP_MUTATIONS
Canonical query:
ns=test.foo limit=0 skip=0
Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
=============================
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
[Node #0]: predicate
first indices: [0]
pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
indexToAssign: 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || Selected Index #0 pos 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
FETCH
---fetched = 1
---sortedByDiskLoc = 0
---getSort = []
---Child:
------KEEP_MUTATIONS
---------filter:
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
---------fetched = 0
---------sortedByDiskLoc = 0
---------getSort = []
---------Child:
------------GEO_2D
---------------keyPattern = { loc: "2d" }
---------------fetched = 0
---------------sortedByDiskLoc = 0
---------------getSort = []
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] No cache data for subchild ns=test.foo limit=0 skip=0
Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
=============================
Options = INDEX_INTERSECTION KEEP_MUTATIONS
Canonical query:
ns=test.foo limit=0 skip=0
Tree: $or
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
Sort: {}
Proj: {}
=============================
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
$or
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 2
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 1
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
[Node #0]: predicate
first indices: [0]
pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
indexToAssign: 0
[Node #1]: predicate
first indices: [0]
pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
indexToAssign: 0
[Node #2]: ALL OF: [ 0 1 ]
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
$or
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || Selected Index #0 pos 0
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || Selected Index #0 pos 0
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
KEEP_MUTATIONS
---filter:
$or
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
---fetched = 1
---sortedByDiskLoc = 0
---getSort = []
---Child:
------FETCH
---------filter:
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || Selected Index #0 pos 0
---------fetched = 1
---------sortedByDiskLoc = 0
---------getSort = []
---------Child:
------------GEO_2D
---------------keyPattern = { loc: "2d" }
---------------fetched = 0
---------------sortedByDiskLoc = 0
---------------getSort = []
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
2014-03-26T21:26:03.492-0400 [conn1] [QLOG] Not caching runner but returning 1 results.
Note that the solution from the verbose query log specifies only the index bounds for the [10,10] circle, and then adds a filter on top for the [20,20] circle. Instead, it should be applying a union of the bounds, and then adding the entire $or filter on top (needed since centerSphere bounds are inexact). I infer that the access planner is incorrectly applying AND merging logic when it should be applying OR merging logic.
Original report:
Summary: Wrong cursor used for geo $or
Description: When there is a 2d index present on a collection and an $or query is performed, the wrong cursor is sometimes selected. The result is that not all the relevant documents are actually returned. This was encountered when trying to $or a $geoWithin $centerSphere, and a $geoWithin $box. The database attempted to use the GeoBrowse-box cursor, giving incorrect results.
Steps To Reproduce: Make a collection with a 2d index and some datapoints, then run:
db.datapoints.find({ "$or": [{ "location": { "$geoWithin": { "$centerSphere": [ [11.77734375, 47.27922900257082], 0.27644030243339723 ] } } }, { "location": { "$geoWithin": { "$box": [ [-90.87890625, 32.627420045483085], [-66.26953125, 45.56450582712792] ] } } }] }).explain()