Description
The $or query with 2d geo children (noted below) is missing documents from the result set:
> db.foo.insert([{loc:[10,10]},{loc:[15,15]},{loc:[20,20]}])
|
BulkWriteResult({
|
"writeErrors" : [ ], |
"writeConcernErrors" : [ ], |
"nInserted" : 3, |
"nUpserted" : 0, |
"nMatched" : 0, |
"nModified" : 0, |
"nRemoved" : 0, |
"upserted" : [ ] |
})
|
> db.foo.ensureIndex({loc:"2d"}) |
{
|
"createdCollectionAutomatically" : false, |
"numIndexesBefore" : 1, |
"numIndexesAfter" : 2, |
"ok" : 1 |
}
|
> var circle10 = {loc:{$geoWithin:{$centerSphere:[[10,10],0.2]}}} |
> var circle20 = {loc:{$geoWithin:{$centerSphere:[[20,20],0.2]}}} |
> db.foo.find(circle10)
|
{ "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] } |
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } |
> db.foo.find(circle20)
|
{ "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] } |
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } |
> db.foo.find({$and:[circle10,circle20]})
|
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } |
> db.foo.find({$or:[circle10,circle20]}) // wrong! should return all 3 documents! |
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } |
> db.foo.dropIndexes()
|
{
|
"nIndexesWas" : 2, |
"msg" : "non-_id indexes dropped for collection", |
"ok" : 1 |
}
|
> db.foo.find({$or:[circle10,circle20]}) // that's better |
{ "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] } |
{ "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } |
{ "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] } |
The cause seems to be an issue with merging 2d leaves under an OR.
Full explain for the $or query:
> db.foo.find({$or:[circle10,circle20]}).explain(true)
|
{
|
"cursor" : "GeoBrowse-circle",
|
"isMultiKey" : false,
|
"n" : 1,
|
"nscannedObjects" : 2,
|
"nscanned" : 2,
|
"nscannedObjectsAllPlans" : 2,
|
"nscannedAllPlans" : 2,
|
"scanAndOrder" : false,
|
"indexOnly" : false,
|
"nYields" : 0,
|
"nChunkSkips" : 0,
|
"millis" : 1,
|
"indexBounds" : {
|
"loc" : [
|
[
|
[
|
0,
|
0
|
],
|
[
|
22.5,
|
22.5
|
]
|
],
|
[
|
[
|
-5.625,
|
-5.625
|
],
|
[
|
0,
|
0
|
]
|
],
|
[
|
[
|
-5.625,
|
0
|
],
|
[
|
0,
|
5.625
|
]
|
],
|
[
|
[
|
-5.625,
|
5.625
|
],
|
[
|
0,
|
11.25
|
]
|
],
|
[
|
[
|
-5.625,
|
11.25
|
],
|
[
|
0,
|
16.875
|
]
|
],
|
[
|
[
|
-5.625,
|
16.875
|
],
|
[
|
0,
|
22.5
|
]
|
],
|
[
|
[
|
16.875,
|
-5.625
|
],
|
[
|
22.5,
|
0
|
]
|
],
|
[
|
[
|
11.25,
|
-5.625
|
],
|
[
|
16.875,
|
0
|
]
|
],
|
[
|
[
|
5.625,
|
-5.625
|
],
|
[
|
11.25,
|
0
|
]
|
],
|
[
|
[
|
0,
|
-5.625
|
],
|
[
|
5.625,
|
0
|
]
|
]
|
]
|
},
|
"allPlans" : [
|
{
|
"cursor" : "GeoBrowse-circle",
|
"isMultiKey" : false,
|
"n" : 1,
|
"nscannedObjects" : 2,
|
"nscanned" : 2,
|
"scanAndOrder" : false,
|
"indexOnly" : false,
|
"nChunkSkips" : 0,
|
"indexBounds" : {
|
"loc" : [
|
[
|
[
|
0,
|
0
|
],
|
[
|
22.5,
|
22.5
|
]
|
],
|
[
|
[
|
-5.625,
|
-5.625
|
],
|
[
|
0,
|
0
|
]
|
],
|
[
|
[
|
-5.625,
|
0
|
],
|
[
|
0,
|
5.625
|
]
|
],
|
[
|
[
|
-5.625,
|
5.625
|
],
|
[
|
0,
|
11.25
|
]
|
],
|
[
|
[
|
-5.625,
|
11.25
|
],
|
[
|
0,
|
16.875
|
]
|
],
|
[
|
[
|
-5.625,
|
16.875
|
],
|
[
|
0,
|
22.5
|
]
|
],
|
[
|
[
|
16.875,
|
-5.625
|
],
|
[
|
22.5,
|
0
|
]
|
],
|
[
|
[
|
11.25,
|
-5.625
|
],
|
[
|
16.875,
|
0
|
]
|
],
|
[
|
[
|
5.625,
|
-5.625
|
],
|
[
|
11.25,
|
0
|
]
|
],
|
[
|
[
|
0,
|
-5.625
|
],
|
[
|
5.625,
|
0
|
]
|
]
|
]
|
}
|
}
|
],
|
"server" : "Rassi-MacBook-Pro.local:27017",
|
"filterSet" : false,
|
"stats" : {
|
"type" : "KEEP_MUTATIONS",
|
"works" : 4,
|
"yields" : 0,
|
"unyields" : 0,
|
"invalidates" : 0,
|
"advanced" : 1,
|
"needTime" : 2,
|
"needFetch" : 0,
|
"isEOF" : 1,
|
"children" : [
|
{
|
"type" : "FETCH",
|
"works" : 4,
|
"yields" : 0,
|
"unyields" : 0,
|
"invalidates" : 0,
|
"advanced" : 1,
|
"needTime" : 2,
|
"needFetch" : 0,
|
"isEOF" : 1,
|
"alreadyHasObj" : 2,
|
"forcedFetches" : 0,
|
"matchTested" : 1,
|
"children" : [
|
{
|
"type" : "GEO_2D",
|
"works" : 2,
|
"yields" : 0,
|
"unyields" : 0,
|
"invalidates" : 0,
|
"advanced" : 2,
|
"needTime" : 0,
|
"needFetch" : 0,
|
"isEOF" : 1,
|
"geometryType" : "circle",
|
"field" : "loc",
|
"boundsVerbose" : [
|
"(0,0) -->> (22.5,22.5)",
|
"(-5.625,-5.625) -->> (0,0)",
|
"(-5.625,0) -->> (0,5.625)",
|
"(-5.625,5.625) -->> (0,11.25)",
|
"(-5.625,11.25) -->> (0,16.875)",
|
"(-5.625,16.875) -->> (0,22.5)",
|
"(16.875,-5.625) -->> (22.5,0)",
|
"(11.25,-5.625) -->> (16.875,0)",
|
"(5.625,-5.625) -->> (11.25,0)",
|
"(0,-5.625) -->> (5.625,0)"
|
],
|
"children" : [ ]
|
}
|
]
|
}
|
]
|
}
|
}
|
>
|
Verbose query log output for the $or query:
2014-03-26T21:26:03.490-0400 [conn1] [QLOG] Running query:
|
ns=test.foo limit=0 skip=0
|
Tree: $or
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
|
Sort: {}
|
Proj: {}
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
|
=============================
|
Options = INDEX_INTERSECTION KEEP_MUTATIONS
|
Canonical query:
|
ns=test.foo limit=0 skip=0
|
Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
Sort: {}
|
Proj: {}
|
=============================
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
|
[Node #0]: predicate
|
first indices: [0]
|
pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
indexToAssign: 0
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || Selected Index #0 pos 0
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
|
FETCH
|
---fetched = 1
|
---sortedByDiskLoc = 0
|
---getSort = []
|
---Child:
|
------KEEP_MUTATIONS
|
---------filter:
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
---------fetched = 0
|
---------sortedByDiskLoc = 0
|
---------getSort = []
|
---------Child:
|
------------GEO_2D
|
---------------keyPattern = { loc: "2d" }
|
---------------fetched = 0
|
---------------sortedByDiskLoc = 0
|
---------------getSort = []
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] No cache data for subchild ns=test.foo limit=0 skip=0
|
Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
Sort: {}
|
Proj: {}
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
|
=============================
|
Options = INDEX_INTERSECTION KEEP_MUTATIONS
|
Canonical query:
|
ns=test.foo limit=0 skip=0
|
Tree: $or
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
|
Sort: {}
|
Proj: {}
|
=============================
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
|
$or
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 2
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 1
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
|
[Node #0]: predicate
|
first indices: [0]
|
pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
indexToAssign: 0
|
[Node #1]: predicate
|
first indices: [0]
|
pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
|
indexToAssign: 0
|
[Node #2]: ALL OF: [ 0 1 ]
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
|
$or
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || Selected Index #0 pos 0
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || Selected Index #0 pos 0
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
|
KEEP_MUTATIONS
|
---filter:
|
$or
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
|
---fetched = 1
|
---sortedByDiskLoc = 0
|
---getSort = []
|
---Child:
|
------FETCH
|
---------filter:
|
GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || Selected Index #0 pos 0
|
---------fetched = 1
|
---------sortedByDiskLoc = 0
|
---------getSort = []
|
---------Child:
|
------------GEO_2D
|
---------------keyPattern = { loc: "2d" }
|
---------------fetched = 0
|
---------------sortedByDiskLoc = 0
|
---------------getSort = []
|
2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
|
2014-03-26T21:26:03.492-0400 [conn1] [QLOG] Not caching runner but returning 1 results.
|
Note that the solution from the verbose query log specifies only the index bounds for the [10,10] circle, and then adds a filter on top for the [20,20] circle. Instead, it should be applying a union of the bounds, and then adding the entire $or filter on top (needed since centerSphere bounds are inexact). I infer that the access planner is incorrectly applying AND merging logic when it should be applying OR merging logic.
Original report:
Summary: Wrong cursor used for geo $or
Description: When there is a 2d index present on a collection and an $or query is performed, the wrong cursor is sometimes selected. The result is that not all the relevant documents are actually returned. This was encountered when trying to $or a $geoWithin $centerSphere, and a $geoWithin $box. The database attempted to use the GeoBrowse-box cursor, giving incorrect results.
Steps To Reproduce: Make a collection with a 2d index and some datapoints, then run:
db.datapoints.find({"$or": [{"location": {"$geoWithin": {"$centerSphere": [[11.77734375, 47.27922900257082], 0.27644030243339723]}}}, {"location": {"$geoWithin": {"$box": [[-90.87890625, 32.627420045483085],[-66.26953125, 45.56450582712792]]}}}]}).explain()