[SERVER-19832] Compound 2dsphere index does unnecessary object scans when a non-anchored regex is given for the second field Created: 07/Aug/15  Updated: 28/Dec/23

Status: Backlog
Project: Core Server
Component/s: Geo, Querying
Affects Version/s: 3.0.2
Fix Version/s: None

Type: Improvement Priority: Major - P3
Reporter: Alexander Komyagin Assignee: Backlog - Query Integration
Resolution: Unresolved Votes: 0
Labels: qi-geo, query-44-grooming
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Assigned Teams:
Query Integration
Participants:

 Description   

Standalone 3.0.2 (MMAP)

Sample data set:

a = -73.97
b = 40.77
 
for(i=0;i<100;i++) {
x = Math.random()
db.tow_destinations.insert(
   {
      address : {
     location : { type: "Point", coordinates: [ a+x, b-x ] }
      },
      upperCaseName:"LUKOIL GAS STATION"
   }
)
}
 
db.tow_destinations.insert(
   {
      address : {
     location : { type: "Point", coordinates: [ a, b ] }
      },
      upperCaseName:"LUKOIL GAS SUPPLY"
   }
)

Index:

db.tow_destinations.ensureIndex({ 'address.location': "2dsphere", upperCaseName: 1 })

Query:

> db.tow_destinations.find({ 'address.location': { $near: { $geometry: { type: "Point", coordinates: [ a,b ] }, $maxDistance: 321868.0 } }, upperCaseName:/LUKOIL GAS SU/})
{ "_id" : ObjectId("55c4f32533b480f8f169bdc9"), "address" : { "location" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] } }, "upperCaseName" : "LUKOIL GAS SUPPLY" }

Scanned all objects that matched the geo search (108) to return 1:

> db.system.profile.find().sort({$natural:-1}).limit(2)
{ "op" : "query", "ns" : "test.tow_destinations", "query" : { "address.location" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] }, "$maxDistance" : 321868 } }, "upperCaseName" : /LUKOIL GAS SU/ }, "ntoreturn" : 0, "ntoskip" : 0, "nscanned" : 273, "nscannedObjects" : 108, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 2, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(3) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(3) } }, "Database" : { "acquireCount" : { "r" : NumberLong(3) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(3) } } }, "nreturned" : 1, "responseLength" : 164, "millis" : 4, "execStats" : { "summary" : "GEO_NEAR_2DSPHERE { address.location: \"2dsphere\", upperCaseName: 1.0 }" }, "ts" : ISODate("2015-08-07T19:26:18.760Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }
{ "op" : "query", "ns" : "test.system.profile", "query" : { "query" : {  }, "orderby" : { "$natural" : -1 } }, "cursorid" : 36288866986, "ntoreturn" : 2, "ntoskip" : 0, "nscanned" : 0, "nscannedObjects" : 2, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 0, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(1) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(1) } }, "Database" : { "acquireCount" : { "r" : NumberLong(1) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(1) } } }, "nreturned" : 2, "responseLength" : 1561, "millis" : 0, "execStats" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ ] }, "nReturned" : 2, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 2, "needTime" : 1, "needFetch" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 0, "invalidates" : 0, "direction" : "backward", "docsExamined" : 2 }, "ts" : ISODate("2015-08-07T19:26:07.773Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }

Apparently, query with an anchored regex works as expected:

> db.tow_destinations.find({ 'address.location': { $near: { $geometry: { type: "Point", coordinates: [ a,b ] }, $maxDistance: 321868.0 } }, upperCaseName:/^LUKOIL GAS SU/})
{ "_id" : ObjectId("55c4f32533b480f8f169bdc9"), "address" : { "location" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] } }, "upperCaseName" : "LUKOIL GAS SUPPLY" }

It scanned only 1 object:

> db.system.profile.find().sort({$natural:-1}).limit(2)
{ "op" : "query", "ns" : "test.tow_destinations", "query" : { "address.location" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] }, "$maxDistance" : 321868 } }, "upperCaseName" : /^LUKOIL GAS SU/ }, "ntoreturn" : 0, "ntoskip" : 0, "nscanned" : 230, "nscannedObjects" : 1, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 1, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(2) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(2) } }, "Database" : { "acquireCount" : { "r" : NumberLong(2) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(2) } } }, "nreturned" : 1, "responseLength" : 164, "millis" : 1, "execStats" : { "summary" : "GEO_NEAR_2DSPHERE { address.location: \"2dsphere\", upperCaseName: 1.0 }" }, "ts" : ISODate("2015-08-07T19:26:05.609Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }
{ "op" : "query", "ns" : "test.system.profile", "query" : { "query" : {  }, "orderby" : { "$natural" : -1 } }, "cursorid" : 35326384387, "ntoreturn" : 2, "ntoskip" : 0, "nscanned" : 0, "nscannedObjects" : 2, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 0, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(1) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(1) } }, "Database" : { "acquireCount" : { "r" : NumberLong(1) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(1) } } }, "nreturned" : 2, "responseLength" : 2097, "millis" : 0, "execStats" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ ] }, "nReturned" : 2, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 2, "needTime" : 1, "needFetch" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 0, "invalidates" : 0, "direction" : "backward", "docsExamined" : 2 }, "ts" : ISODate("2015-08-07T19:20:16.669Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }


Generated at Thu Feb 08 03:52:13 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.