Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-29805

Case insensitive regex search does not take advantage of collation index

    XMLWordPrintableJSON

Details

    • Icon: Bug Bug
    • Resolution: Works as Designed
    • Icon: Major - P3 Major - P3
    • None
    • 3.4.4
    • Index Maintenance
    • None
    • ALL
    • Hide

      > db.createCollection("bigbug2",{collation:{locale:"en",strength:2}});
      { "ok" : 1 }
      > db.bigbug2.insert({a:"bob"});
      WriteResult({ "nInserted" : 1 })
      > db.bigbug2.insert({a:"james"});
      WriteResult({ "nInserted" : 1 })
      > db.bigbug2.createIndex({a:1});
      {
      	"createdCollectionAutomatically" : false,
      	"numIndexesBefore" : 1,
      	"numIndexesAfter" : 2,
      	"ok" : 1
      }
      > db.bigbug2.find({a:/^bo/i}).explain("executionStats");
      {
      	"queryPlanner" : {
      		"plannerVersion" : 1,
      		"namespace" : "testing.bigbug2",
      		"indexFilterSet" : false,
      		"parsedQuery" : {
      			"a" : {
      				"$regex" : "^bo",
      				"$options" : "i"
      			}
      		},
      		"collation" : {
      			"locale" : "en",
      			"caseLevel" : false,
      			"caseFirst" : "off",
      			"strength" : 2,
      			"numericOrdering" : false,
      			"alternate" : "non-ignorable",
      			"maxVariable" : "punct",
      			"normalization" : false,
      			"backwards" : false,
      			"version" : "57.1"
      		},
      		"winningPlan" : {
      			"stage" : "FETCH",
      			"filter" : {
      				"a" : {
      					"$regex" : "^bo",
      					"$options" : "i"
      				}
      			},
      			"inputStage" : {
      				"stage" : "IXSCAN",
      				"keyPattern" : {
      					"a" : 1
      				},
      				"indexName" : "a_1",
      				"collation" : {
      					"locale" : "en",
      					"caseLevel" : false,
      					"caseFirst" : "off",
      					"strength" : 2,
      					"numericOrdering" : false,
      					"alternate" : "non-ignorable",
      					"maxVariable" : "punct",
      					"normalization" : false,
      					"backwards" : false,
      					"version" : "57.1"
      				},
      				"isMultiKey" : false,
      				"multiKeyPaths" : {
      					"a" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"a" : [
      						"[\"\", {})",
      						"[/^bo/i, /^bo/i]"
      					]
      				}
      			}
      		},
      		"rejectedPlans" : [ ]
      	},
      	"executionStats" : {
      		"executionSuccess" : true,
      		"nReturned" : 1,
      		"executionTimeMillis" : 1,
      		"totalKeysExamined" : 2,
      		"totalDocsExamined" : 2,
      		"executionStages" : {
      			"stage" : "FETCH",
      			"filter" : {
      				"a" : {
      					"$regex" : "^bo",
      					"$options" : "i"
      				}
      			},
      			"nReturned" : 1,
      			"executionTimeMillisEstimate" : 0,
      			"works" : 3,
      			"advanced" : 1,
      			"needTime" : 1,
      			"needYield" : 0,
      			"saveState" : 0,
      			"restoreState" : 0,
      			"isEOF" : 1,
      			"invalidates" : 0,
      			"docsExamined" : 2,
      			"alreadyHasObj" : 0,
      			"inputStage" : {
      				"stage" : "IXSCAN",
      				"nReturned" : 2,
      				"executionTimeMillisEstimate" : 0,
      				"works" : 3,
      				"advanced" : 2,
      				"needTime" : 0,
      				"needYield" : 0,
      				"saveState" : 0,
      				"restoreState" : 0,
      				"isEOF" : 1,
      				"invalidates" : 0,
      				"keyPattern" : {
      					"a" : 1
      				},
      				"indexName" : "a_1",
      				"collation" : {
      					"locale" : "en",
      					"caseLevel" : false,
      					"caseFirst" : "off",
      					"strength" : 2,
      					"numericOrdering" : false,
      					"alternate" : "non-ignorable",
      					"maxVariable" : "punct",
      					"normalization" : false,
      					"backwards" : false,
      					"version" : "57.1"
      				},
      				"isMultiKey" : false,
      				"multiKeyPaths" : {
      					"a" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"a" : [
      						"[\"\", {})",
      						"[/^bo/i, /^bo/i]"
      					]
      				},
      				"keysExamined" : 2,
      				"seeks" : 1,
      				"dupsTested" : 0,
      				"dupsDropped" : 0,
      				"seenInvalidated" : 0
      			}
      		}
      	},
      	"serverInfo" : {
      		"host" : "alwaysbcoding",
      		"port" : 27017,
      		"version" : "3.4.4",
      		"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
      	},
      	"ok" : 1
      }
      

      Show
      > db.createCollection("bigbug2",{collation:{locale:"en",strength:2}}); { "ok" : 1 } > db.bigbug2.insert({a:"bob"}); WriteResult({ "nInserted" : 1 }) > db.bigbug2.insert({a:"james"}); WriteResult({ "nInserted" : 1 }) > db.bigbug2.createIndex({a:1}); { "createdCollectionAutomatically" : false, "numIndexesBefore" : 1, "numIndexesAfter" : 2, "ok" : 1 } > db.bigbug2.find({a:/^bo/i}).explain("executionStats"); { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "testing.bigbug2", "indexFilterSet" : false, "parsedQuery" : { "a" : { "$regex" : "^bo", "$options" : "i" } }, "collation" : { "locale" : "en", "caseLevel" : false, "caseFirst" : "off", "strength" : 2, "numericOrdering" : false, "alternate" : "non-ignorable", "maxVariable" : "punct", "normalization" : false, "backwards" : false, "version" : "57.1" }, "winningPlan" : { "stage" : "FETCH", "filter" : { "a" : { "$regex" : "^bo", "$options" : "i" } }, "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "a" : 1 }, "indexName" : "a_1", "collation" : { "locale" : "en", "caseLevel" : false, "caseFirst" : "off", "strength" : 2, "numericOrdering" : false, "alternate" : "non-ignorable", "maxVariable" : "punct", "normalization" : false, "backwards" : false, "version" : "57.1" }, "isMultiKey" : false, "multiKeyPaths" : { "a" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "a" : [ "[\"\", {})", "[/^bo/i, /^bo/i]" ] } } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 1, "executionTimeMillis" : 1, "totalKeysExamined" : 2, "totalDocsExamined" : 2, "executionStages" : { "stage" : "FETCH", "filter" : { "a" : { "$regex" : "^bo", "$options" : "i" } }, "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 1, "needTime" : 1, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "docsExamined" : 2, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 2, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 2, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "keyPattern" : { "a" : 1 }, "indexName" : "a_1", "collation" : { "locale" : "en", "caseLevel" : false, "caseFirst" : "off", "strength" : 2, "numericOrdering" : false, "alternate" : "non-ignorable", "maxVariable" : "punct", "normalization" : false, "backwards" : false, "version" : "57.1" }, "isMultiKey" : false, "multiKeyPaths" : { "a" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "a" : [ "[\"\", {})", "[/^bo/i, /^bo/i]" ] }, "keysExamined" : 2, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0, "seenInvalidated" : 0 } } }, "serverInfo" : { "host" : "alwaysbcoding", "port" : 27017, "version" : "3.4.4", "gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd" }, "ok" : 1 }

    Description

      Original Summary

      Incorrect bounds on regex search with collation index

      Original Description

      When an index is created with a collation strength of 1 or 2 (case insensitive), bounds are not correctly set when running a regex query like this /^search/i. In the following I search a simple collection like so:

      {a:/^bo/i}

      . When I run an explain on it, it has incorrect bounds added "[\"\", {})". This causes the index stage to be completely useless, as it selects everything in the whole index, at least in my tests! In the demo below, you can note that the input IXSCAN stage returns 2 results, when it should only return one.

      "indexBounds" : {
      "a" : [
      "[\"\", {})",
      "[/^bo/i, /^bo/i]"
      ]
      },

      Attachments

        Activity

          People

            kelsey.schubert@mongodb.com Kelsey Schubert
            dane@turbobuilt.com dane truelson
            Votes:
            0 Vote for this issue
            Watchers:
            9 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: