Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-48993

explodeForSort can produce incorrect query plan

    • Fully Compatible
    • ALL
    • v4.4, v4.2, v4.0, v3.6
    • Hide
      function assertSortOrderWithCollationIndexBF17891() {
          const coll = db.bf17891;
          coll.drop();
          assert.commandWorked(db.createCollection(coll.getName()));
      
          // Create an index with a non-simple collation.
          assert.commandWorked(
              coll.createIndex({a: 1, b: 1}, {collation: {locale: "en_US", strength: 1}}));
      
          // Insert some documents into the collection. The value of 'a' is the same non-string value for
          // each document, while 'b' is a string.
          assert.commandWorked(
              coll.insert([{a: 0, b: "ccc"}, {a: 0, b: "ddd"}, {a: 0, b: "CCC"}, {a: 0, b: "DDD"}]));
      
          // Run a point-query on the first field of the compound index with a sort on the second field.
          const indexSortedResults = coll.find({a: 0}, {_id: 0}).sort({b: 1}).toArray();
      
          // Repeat the query but this time force a COLLSCAN.
          const collScanSortedResults =
              coll.find({a: 0}, {_id: 0}).sort({b: 1}).hint({$natural: 1}).toArray();
      
          // Print out the explain results for this query. It will be observed that the plan is a
          // SORT_MERGE-IXSCAN using the {a: 1, b: 1} index, despite the fact that this index has a
          // non-simple collation. The {a: 1, b: 1} index is eligible to answer the query because the
          // predicate 'a' is a non-string value. We then run through some checks to see whether we can
          // avoid performing an in-memory sort, eventually calling QueryPlannerAnalysis::explodeForSort.
          // But this method fails to notice that the index's collation should make it ineligible to
          // provide a sort for this query, and so we end up producing a SORT_MERGE plan that will
          // incorrectly sort the output according to the collation.
          printjson(coll.find({a: 0}).sort({b: 1}).explain());
      
          // The documents should be returned in ascending order, but instead are sorted according to the
          // case-insensitive index. The COLLSCAN produces the correct ordering of results.
          assert.docEq(indexSortedResults, collScanSortedResults);
      }
      
      Show
      function assertSortOrderWithCollationIndexBF17891() { const coll = db.bf17891; coll.drop(); assert .commandWorked(db.createCollection(coll.getName())); // Create an index with a non-simple collation. assert .commandWorked( coll.createIndex({a: 1, b: 1}, {collation: {locale: "en_US" , strength: 1}})); // Insert some documents into the collection. The value of 'a' is the same non-string value for // each document, while 'b' is a string. assert .commandWorked( coll.insert([{a: 0, b: "ccc" }, {a: 0, b: "ddd" }, {a: 0, b: "CCC" }, {a: 0, b: "DDD" }])); // Run a point-query on the first field of the compound index with a sort on the second field. const indexSortedResults = coll.find({a: 0}, {_id: 0}).sort({b: 1}).toArray(); // Repeat the query but this time force a COLLSCAN. const collScanSortedResults = coll.find({a: 0}, {_id: 0}).sort({b: 1}).hint({$natural: 1}).toArray(); // Print out the explain results for this query. It will be observed that the plan is a // SORT_MERGE-IXSCAN using the {a: 1, b: 1} index, despite the fact that this index has a // non-simple collation. The {a: 1, b: 1} index is eligible to answer the query because the // predicate 'a' is a non-string value. We then run through some checks to see whether we can // avoid performing an in-memory sort, eventually calling QueryPlannerAnalysis::explodeForSort. // But this method fails to notice that the index's collation should make it ineligible to // provide a sort for this query, and so we end up producing a SORT_MERGE plan that will // incorrectly sort the output according to the collation. printjson(coll.find({a: 0}).sort({b: 1}).explain()); // The documents should be returned in ascending order, but instead are sorted according to the // case -insensitive index. The COLLSCAN produces the correct ordering of results. assert .docEq(indexSortedResults, collScanSortedResults); }
    • Query 2020-06-29, Query 2020-07-13, Query 2020-07-27
    • 8

      Given a compound index with a non-simple collation, a point-query on a prefix field together with a sort on a suffix field can result in a IXSCAN query plan that incorrectly sorts according to the collation. This issue has existed at least since version 3.6.

      The attached reproduction script describes the problem. A query which does not specify a collation (or which explicitly specifies a different collation) may still use an index which has a non-matching collation if the query predicates are non-string values.  We then call QueryPlannerAnalysis::analyzeSort to determine whether we need to perform an in-memory sort. As part of this process we call QueryPlannerAnalysis::explodeForSort to see whether the plan can be rewritten as a SORT_MERGE. Unfortunately, this method does not enforce the constraint that an index with a different collation than the query cannot provide a sort.   

      Incorrect execution plan example:

       db.c.find({a: 1}).sort({b: 1}).explain()
      {
      	"queryPlanner" : {
      		"plannerVersion" : 1,
      		"namespace" : "test.c",
      		"indexFilterSet" : false,
      		"parsedQuery" : {
      			"a" : {
      				"$eq" : 1
      			}
      		},
      		"queryHash" : "E97AE8CA",
      		"planCacheKey" : "35AFA63E",
      		"winningPlan" : {
      			"stage" : "FETCH",
      			"inputStage" : {
      				"stage" : "SORT_MERGE",
      				"sortPattern" : {
      					"b" : 1
      				},
      				"inputStage" : {
      					"stage" : "IXSCAN",
      					"keyPattern" : {
      						"a" : 1,
      						"b" : 1
      					},
      					"indexName" : "a_1_b_1",
      					"collation" : {
      						"locale" : "en",
      						"caseLevel" : false,
      						"caseFirst" : "off",
      						"strength" : 1,
      						"numericOrdering" : false,
      						"alternate" : "non-ignorable",
      						"maxVariable" : "punct",
      						"normalization" : false,
      						"backwards" : false,
      						"version" : "57.1"
      					},
      					"isMultiKey" : false,
      					"multiKeyPaths" : {
      						"a" : [ ],
      						"b" : [ ]
      					},
      					"isUnique" : false,
      					"isSparse" : false,
      					"isPartial" : false,
      					"indexVersion" : 2,
      					"direction" : "forward",
      					"indexBounds" : {
      						"a" : [
      							"[1.0, 1.0]"
      						],
      						"b" : [
      							"[MinKey, MaxKey]"
      						]
      					}
      				}
      			}
      		},
      		"rejectedPlans" : [ ]
      	},
      	"serverInfo" : {
      		"host" : "redshirt",
      		"port" : 27017,
      		"version" : "0.0.0",
      		"gitVersion" : "unknown"
      	},
      	"ok" : 1
      }
      

       Incorrect output example. Note that the results are sorted on case-insensitive value of field "b".

      > db.c.find({a: 1}).sort({b: 1})
      { "_id" : ObjectId("5eecc3ecb4eefa8f13223f2c"), "a" : 1, "b" : "aa" }
      { "_id" : ObjectId("5eecc3f3b4eefa8f13223f2d"), "a" : 1, "b" : "AA" }
      { "_id" : ObjectId("5eecc3f7b4eefa8f13223f2e"), "a" : 1, "b" : "BB" }
      { "_id" : ObjectId("5eecc3f9b4eefa8f13223f2f"), "a" : 1, "b" : "bb" }
      

       

            Assignee:
            mindaugas.malinauskas@mongodb.com Mindaugas Malinauskas
            Reporter:
            mindaugas.malinauskas@mongodb.com Mindaugas Malinauskas
            Votes:
            0 Vote for this issue
            Watchers:
            10 Start watching this issue

              Created:
              Updated:
              Resolved: