Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-29805

Case insensitive regex search does not take advantage of collation index

    • Type: Icon: Bug Bug
    • Resolution: Works as Designed
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.4.4
    • Component/s: Index Maintenance
    • Labels:
      None
    • ALL
    • Hide
      > db.createCollection("bigbug2",{collation:{locale:"en",strength:2}});
      { "ok" : 1 }
      > db.bigbug2.insert({a:"bob"});
      WriteResult({ "nInserted" : 1 })
      > db.bigbug2.insert({a:"james"});
      WriteResult({ "nInserted" : 1 })
      > db.bigbug2.createIndex({a:1});
      {
      	"createdCollectionAutomatically" : false,
      	"numIndexesBefore" : 1,
      	"numIndexesAfter" : 2,
      	"ok" : 1
      }
      > db.bigbug2.find({a:/^bo/i}).explain("executionStats");
      {
      	"queryPlanner" : {
      		"plannerVersion" : 1,
      		"namespace" : "testing.bigbug2",
      		"indexFilterSet" : false,
      		"parsedQuery" : {
      			"a" : {
      				"$regex" : "^bo",
      				"$options" : "i"
      			}
      		},
      		"collation" : {
      			"locale" : "en",
      			"caseLevel" : false,
      			"caseFirst" : "off",
      			"strength" : 2,
      			"numericOrdering" : false,
      			"alternate" : "non-ignorable",
      			"maxVariable" : "punct",
      			"normalization" : false,
      			"backwards" : false,
      			"version" : "57.1"
      		},
      		"winningPlan" : {
      			"stage" : "FETCH",
      			"filter" : {
      				"a" : {
      					"$regex" : "^bo",
      					"$options" : "i"
      				}
      			},
      			"inputStage" : {
      				"stage" : "IXSCAN",
      				"keyPattern" : {
      					"a" : 1
      				},
      				"indexName" : "a_1",
      				"collation" : {
      					"locale" : "en",
      					"caseLevel" : false,
      					"caseFirst" : "off",
      					"strength" : 2,
      					"numericOrdering" : false,
      					"alternate" : "non-ignorable",
      					"maxVariable" : "punct",
      					"normalization" : false,
      					"backwards" : false,
      					"version" : "57.1"
      				},
      				"isMultiKey" : false,
      				"multiKeyPaths" : {
      					"a" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"a" : [
      						"[\"\", {})",
      						"[/^bo/i, /^bo/i]"
      					]
      				}
      			}
      		},
      		"rejectedPlans" : [ ]
      	},
      	"executionStats" : {
      		"executionSuccess" : true,
      		"nReturned" : 1,
      		"executionTimeMillis" : 1,
      		"totalKeysExamined" : 2,
      		"totalDocsExamined" : 2,
      		"executionStages" : {
      			"stage" : "FETCH",
      			"filter" : {
      				"a" : {
      					"$regex" : "^bo",
      					"$options" : "i"
      				}
      			},
      			"nReturned" : 1,
      			"executionTimeMillisEstimate" : 0,
      			"works" : 3,
      			"advanced" : 1,
      			"needTime" : 1,
      			"needYield" : 0,
      			"saveState" : 0,
      			"restoreState" : 0,
      			"isEOF" : 1,
      			"invalidates" : 0,
      			"docsExamined" : 2,
      			"alreadyHasObj" : 0,
      			"inputStage" : {
      				"stage" : "IXSCAN",
      				"nReturned" : 2,
      				"executionTimeMillisEstimate" : 0,
      				"works" : 3,
      				"advanced" : 2,
      				"needTime" : 0,
      				"needYield" : 0,
      				"saveState" : 0,
      				"restoreState" : 0,
      				"isEOF" : 1,
      				"invalidates" : 0,
      				"keyPattern" : {
      					"a" : 1
      				},
      				"indexName" : "a_1",
      				"collation" : {
      					"locale" : "en",
      					"caseLevel" : false,
      					"caseFirst" : "off",
      					"strength" : 2,
      					"numericOrdering" : false,
      					"alternate" : "non-ignorable",
      					"maxVariable" : "punct",
      					"normalization" : false,
      					"backwards" : false,
      					"version" : "57.1"
      				},
      				"isMultiKey" : false,
      				"multiKeyPaths" : {
      					"a" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"a" : [
      						"[\"\", {})",
      						"[/^bo/i, /^bo/i]"
      					]
      				},
      				"keysExamined" : 2,
      				"seeks" : 1,
      				"dupsTested" : 0,
      				"dupsDropped" : 0,
      				"seenInvalidated" : 0
      			}
      		}
      	},
      	"serverInfo" : {
      		"host" : "alwaysbcoding",
      		"port" : 27017,
      		"version" : "3.4.4",
      		"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
      	},
      	"ok" : 1
      }
      
      Show
      > db.createCollection("bigbug2",{collation:{locale:"en",strength:2}}); { "ok" : 1 } > db.bigbug2.insert({a:"bob"}); WriteResult({ "nInserted" : 1 }) > db.bigbug2.insert({a:"james"}); WriteResult({ "nInserted" : 1 }) > db.bigbug2.createIndex({a:1}); { "createdCollectionAutomatically" : false, "numIndexesBefore" : 1, "numIndexesAfter" : 2, "ok" : 1 } > db.bigbug2.find({a:/^bo/i}).explain("executionStats"); { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "testing.bigbug2", "indexFilterSet" : false, "parsedQuery" : { "a" : { "$regex" : "^bo", "$options" : "i" } }, "collation" : { "locale" : "en", "caseLevel" : false, "caseFirst" : "off", "strength" : 2, "numericOrdering" : false, "alternate" : "non-ignorable", "maxVariable" : "punct", "normalization" : false, "backwards" : false, "version" : "57.1" }, "winningPlan" : { "stage" : "FETCH", "filter" : { "a" : { "$regex" : "^bo", "$options" : "i" } }, "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "a" : 1 }, "indexName" : "a_1", "collation" : { "locale" : "en", "caseLevel" : false, "caseFirst" : "off", "strength" : 2, "numericOrdering" : false, "alternate" : "non-ignorable", "maxVariable" : "punct", "normalization" : false, "backwards" : false, "version" : "57.1" }, "isMultiKey" : false, "multiKeyPaths" : { "a" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "a" : [ "[\"\", {})", "[/^bo/i, /^bo/i]" ] } } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 1, "executionTimeMillis" : 1, "totalKeysExamined" : 2, "totalDocsExamined" : 2, "executionStages" : { "stage" : "FETCH", "filter" : { "a" : { "$regex" : "^bo", "$options" : "i" } }, "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 1, "needTime" : 1, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "docsExamined" : 2, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 2, "executionTimeMillisEstimate" : 0, "works" : 3, "advanced" : 2, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "invalidates" : 0, "keyPattern" : { "a" : 1 }, "indexName" : "a_1", "collation" : { "locale" : "en", "caseLevel" : false, "caseFirst" : "off", "strength" : 2, "numericOrdering" : false, "alternate" : "non-ignorable", "maxVariable" : "punct", "normalization" : false, "backwards" : false, "version" : "57.1" }, "isMultiKey" : false, "multiKeyPaths" : { "a" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "a" : [ "[\"\", {})", "[/^bo/i, /^bo/i]" ] }, "keysExamined" : 2, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0, "seenInvalidated" : 0 } } }, "serverInfo" : { "host" : "alwaysbcoding", "port" : 27017, "version" : "3.4.4", "gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd" }, "ok" : 1 }

      Original Summary

      Incorrect bounds on regex search with collation index

      Original Description

      When an index is created with a collation strength of 1 or 2 (case insensitive), bounds are not correctly set when running a regex query like this /^search/i. In the following I search a simple collection like so:

      {a:/^bo/i}

      . When I run an explain on it, it has incorrect bounds added "[\"\", {})". This causes the index stage to be completely useless, as it selects everything in the whole index, at least in my tests! In the demo below, you can note that the input IXSCAN stage returns 2 results, when it should only return one.

      "indexBounds" : {
      "a" : [
      "[\"\", {})",
      "[/^bo/i, /^bo/i]"
      ]
      },

            Assignee:
            kelsey.schubert@mongodb.com Kelsey Schubert
            Reporter:
            dane@turbobuilt.com dane truelson
            Votes:
            0 Vote for this issue
            Watchers:
            9 Start watching this issue

              Created:
              Updated:
              Resolved: