Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-15696

$regex, $in and $sort with index returns too many results

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Major - P3 Major - P3
    • 2.6.6, 2.8.0-rc0
    • Affects Version/s: 2.6.4, 2.6.5, 2.7.7
    • Component/s: Querying
    • None
    • ALL
    • Hide

      My index is :

      {
                      "v" : 1,
                      "key" : {
                              "channelId" : 1,
                              "searchField" : 1
                      },
                      "name" : "channelId_1_searchField_1",
                      "ns" : "search.searchSeriesOnChannel1"
      }
      

      Here's a working query hinting not to use the index which returns 2 results :

      db.searchSeriesOnChannel1.find({$and : [{ "searchField" : { "$regex" : "_dr" , "$options" : "i"}, "channelId" : { "$in" : [ "CTVTo" , "CTVHT"]}}]}).sort({searchField:1}).hint({$natural:1}).explain()
      {
              "cursor" : "BasicCursor",
              "isMultiKey" : false,
              "n" : 2,
              "nscannedObjects" : 21743,
              "nscanned" : 21743,
              "nscannedObjectsAllPlans" : 21743,
              "nscannedAllPlans" : 21743,
              "scanAndOrder" : true,
              "indexOnly" : false,
              "nYields" : 169,
              "nChunkSkips" : 0,
              "millis" : 59,
              "server" : "fonsemongo:27017",
              "filterSet" : false
      }
      

      Using the index, it returns 62 elements because it discards the searchField $regex filter.

      db.searchSeriesOnChannel1.find({$and : [{ "searchField" : { "$regex" : "_dr" , "$options" : "i"}, "channelId" : { "$in" : [ "CTVTo" , "CTVHT"]}}]}).sort({searchField:1}).explain()
      {
              "clauses" : [
                      {
                              "cursor" : "BtreeCursor channelId_1_searchField_1",
                              "isMultiKey" : false,
                              "n" : 62,
                              "nscannedObjects" : 62,
                              "nscanned" : 62,
                              "scanAndOrder" : false,
                              "indexOnly" : false,
                              "nChunkSkips" : 0,
                              "indexBounds" : {
                                      "channelId" : [
                                              [
                                                      "CTVHT",
                                                      "CTVHT"
                                              ]
                                      ],
                                      "searchField" : [
                                              [
                                                      "",
                                                      {
      
                                                      }
                                              ],
                                              [
                                                      /_dr/i,
                                                      /_dr/i
                                              ]
                                      ]
                              }
                      },
                      {
                              "cursor" : "BtreeCursor channelId_1_searchField_1",
                              "isMultiKey" : false,
                              "n" : 0,
                              "nscannedObjects" : 0,
                              "nscanned" : 0,
                              "scanAndOrder" : false,
                              "indexOnly" : false,
                              "nChunkSkips" : 0,
                              "indexBounds" : {
                                      "channelId" : [
                                              [
                                                      "CTVTo",
                                                      "CTVTo"
                                              ]
                                      ],
                                      "searchField" : [
                                              [
                                                      "",
                                                      {
      
                                                      }
                                              ],
                                              [
                                                      /_dr/i,
                                                      /_dr/i
                                              ]
                                      ]
                              }
                      }
              ],
              "cursor" : "QueryOptimizerCursor",
              "n" : 62,
              "nscannedObjects" : 62,
              "nscanned" : 62,
              "nscannedObjectsAllPlans" : 62,
              "nscannedAllPlans" : 62,
              "scanAndOrder" : false,
              "nYields" : 1,
              "nChunkSkips" : 0,
              "millis" : 0,
              "server" : "fonsemongo:27017",
              "filterSet" : false
      }
      
      Show
      My index is : { "v" : 1, "key" : { "channelId" : 1, "searchField" : 1 }, "name" : "channelId_1_searchField_1" , "ns" : "search.searchSeriesOnChannel1" } Here's a working query hinting not to use the index which returns 2 results : db.searchSeriesOnChannel1.find({$and : [{ "searchField" : { "$regex" : "_dr" , "$options" : "i" }, "channelId" : { "$ in " : [ "CTVTo" , "CTVHT" ]}}]}).sort({searchField:1}).hint({$natural:1}).explain() { "cursor" : "BasicCursor" , "isMultiKey" : false , "n" : 2, "nscannedObjects" : 21743, "nscanned" : 21743, "nscannedObjectsAllPlans" : 21743, "nscannedAllPlans" : 21743, "scanAndOrder" : true , "indexOnly" : false , "nYields" : 169, "nChunkSkips" : 0, "millis" : 59, "server" : "fonsemongo:27017" , "filterSet" : false } Using the index, it returns 62 elements because it discards the searchField $regex filter. db.searchSeriesOnChannel1.find({$and : [{ "searchField" : { "$regex" : "_dr" , "$options" : "i" }, "channelId" : { "$ in " : [ "CTVTo" , "CTVHT" ]}}]}).sort({searchField:1}).explain() { "clauses" : [ { "cursor" : "BtreeCursor channelId_1_searchField_1" , "isMultiKey" : false , "n" : 62, "nscannedObjects" : 62, "nscanned" : 62, "scanAndOrder" : false , "indexOnly" : false , "nChunkSkips" : 0, "indexBounds" : { "channelId" : [ [ "CTVHT" , "CTVHT" ] ], "searchField" : [ [ "", { } ], [ /_dr/i, /_dr/i ] ] } }, { "cursor" : "BtreeCursor channelId_1_searchField_1" , "isMultiKey" : false , "n" : 0, "nscannedObjects" : 0, "nscanned" : 0, "scanAndOrder" : false , "indexOnly" : false , "nChunkSkips" : 0, "indexBounds" : { "channelId" : [ [ "CTVTo" , "CTVTo" ] ], "searchField" : [ [ "", { } ], [ /_dr/i, /_dr/i ] ] } } ], "cursor" : "QueryOptimizerCursor" , "n" : 62, "nscannedObjects" : 62, "nscanned" : 62, "nscannedObjectsAllPlans" : 62, "nscannedAllPlans" : 62, "scanAndOrder" : false , "nYields" : 1, "nChunkSkips" : 0, "millis" : 0, "server" : "fonsemongo:27017" , "filterSet" : false }

      The combination of a $regex, $in and $sort using an index returns too many results. The sort is done on the field filtered with a $regex and that filter is completely discarded. We get 62 results with the index and 2 results without it.

            Assignee:
            david.storch@mongodb.com David Storch
            Reporter:
            simonlg Simon Lavigne-Giroux
            Votes:
            1 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: