Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-19832

Compound 2dsphere index does unnecessary object scans when a non-anchored regex is given for the second field

    • Query Integration

      Standalone 3.0.2 (MMAP)

      Sample data set:

      a = -73.97
      b = 40.77
      
      for(i=0;i<100;i++) {
      x = Math.random()
      db.tow_destinations.insert(
         {
            address : {
           location : { type: "Point", coordinates: [ a+x, b-x ] }
            },
            upperCaseName:"LUKOIL GAS STATION"
         }
      )
      }
      
      db.tow_destinations.insert(
         {
            address : {
           location : { type: "Point", coordinates: [ a, b ] }
            },
            upperCaseName:"LUKOIL GAS SUPPLY"
         }
      )
      

      Index:

      db.tow_destinations.ensureIndex({ 'address.location': "2dsphere", upperCaseName: 1 })
      

      Query:

      > db.tow_destinations.find({ 'address.location': { $near: { $geometry: { type: "Point", coordinates: [ a,b ] }, $maxDistance: 321868.0 } }, upperCaseName:/LUKOIL GAS SU/})
      { "_id" : ObjectId("55c4f32533b480f8f169bdc9"), "address" : { "location" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] } }, "upperCaseName" : "LUKOIL GAS SUPPLY" }
      

      Scanned all objects that matched the geo search (108) to return 1:

      > db.system.profile.find().sort({$natural:-1}).limit(2)
      { "op" : "query", "ns" : "test.tow_destinations", "query" : { "address.location" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] }, "$maxDistance" : 321868 } }, "upperCaseName" : /LUKOIL GAS SU/ }, "ntoreturn" : 0, "ntoskip" : 0, "nscanned" : 273, "nscannedObjects" : 108, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 2, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(3) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(3) } }, "Database" : { "acquireCount" : { "r" : NumberLong(3) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(3) } } }, "nreturned" : 1, "responseLength" : 164, "millis" : 4, "execStats" : { "summary" : "GEO_NEAR_2DSPHERE { address.location: \"2dsphere\", upperCaseName: 1.0 }" }, "ts" : ISODate("2015-08-07T19:26:18.760Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }
      { "op" : "query", "ns" : "test.system.profile", "query" : { "query" : {  }, "orderby" : { "$natural" : -1 } }, "cursorid" : 36288866986, "ntoreturn" : 2, "ntoskip" : 0, "nscanned" : 0, "nscannedObjects" : 2, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 0, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(1) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(1) } }, "Database" : { "acquireCount" : { "r" : NumberLong(1) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(1) } } }, "nreturned" : 2, "responseLength" : 1561, "millis" : 0, "execStats" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ ] }, "nReturned" : 2, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 2, "needTime" : 1, "needFetch" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 0, "invalidates" : 0, "direction" : "backward", "docsExamined" : 2 }, "ts" : ISODate("2015-08-07T19:26:07.773Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }
      

      Apparently, query with an anchored regex works as expected:

      > db.tow_destinations.find({ 'address.location': { $near: { $geometry: { type: "Point", coordinates: [ a,b ] }, $maxDistance: 321868.0 } }, upperCaseName:/^LUKOIL GAS SU/})
      { "_id" : ObjectId("55c4f32533b480f8f169bdc9"), "address" : { "location" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] } }, "upperCaseName" : "LUKOIL GAS SUPPLY" }
      

      It scanned only 1 object:

      > db.system.profile.find().sort({$natural:-1}).limit(2)
      { "op" : "query", "ns" : "test.tow_destinations", "query" : { "address.location" : { "$near" : { "$geometry" : { "type" : "Point", "coordinates" : [ -73.97, 40.77 ] }, "$maxDistance" : 321868 } }, "upperCaseName" : /^LUKOIL GAS SU/ }, "ntoreturn" : 0, "ntoskip" : 0, "nscanned" : 230, "nscannedObjects" : 1, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 1, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(2) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(2) } }, "Database" : { "acquireCount" : { "r" : NumberLong(2) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(2) } } }, "nreturned" : 1, "responseLength" : 164, "millis" : 1, "execStats" : { "summary" : "GEO_NEAR_2DSPHERE { address.location: \"2dsphere\", upperCaseName: 1.0 }" }, "ts" : ISODate("2015-08-07T19:26:05.609Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }
      { "op" : "query", "ns" : "test.system.profile", "query" : { "query" : {  }, "orderby" : { "$natural" : -1 } }, "cursorid" : 35326384387, "ntoreturn" : 2, "ntoskip" : 0, "nscanned" : 0, "nscannedObjects" : 2, "keyUpdates" : 0, "writeConflicts" : 0, "numYield" : 0, "locks" : { "Global" : { "acquireCount" : { "r" : NumberLong(1) } }, "MMAPV1Journal" : { "acquireCount" : { "r" : NumberLong(1) } }, "Database" : { "acquireCount" : { "r" : NumberLong(1) } }, "Collection" : { "acquireCount" : { "R" : NumberLong(1) } } }, "nreturned" : 2, "responseLength" : 2097, "millis" : 0, "execStats" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ ] }, "nReturned" : 2, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 2, "needTime" : 1, "needFetch" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 0, "invalidates" : 0, "direction" : "backward", "docsExamined" : 2 }, "ts" : ISODate("2015-08-07T19:20:16.669Z"), "client" : "127.0.0.1", "allUsers" : [ ], "user" : "" }
      

            Assignee:
            backlog-query-integration [DO NOT USE] Backlog - Query Integration
            Reporter:
            alex.komyagin@mongodb.com Alexander Komyagin (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

              Created:
              Updated: