Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-56865

explodeForSort logic potentially suboptimal when multikey field(s) follow sort fields in the index

    • Type: Icon: Task Task
    • Resolution: Unresolved
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 4.2.14, 4.4.6
    • Component/s: None
    • Query Optimization

      Consider the query:

      db.foo.find({e:{$in:[1,2]}, r:{$gt:0}}).sort({s:1})

      If following the ESR Guidance, one would create the following index:

      db.foo.createIndex({e:1, s:1, r:1}) 

      This works fine and produces a SORT_MERGE plan when the index is not multikey:

      > db.foo.find({e:{$in:[1,2]}, r:{$gt:0}}).sort({s:1}).explain().queryPlanner.winningPlan.queryPlan
      {
      	"stage" : "FETCH",
      	"planNodeId" : 4,
      	"inputStage" : {
      		"stage" : "SORT_MERGE",
      		"planNodeId" : 3,
      		"sortPattern" : {
      			"s" : 1
      		},
      		"inputStages" : [
      			{
      				"stage" : "IXSCAN",
      				"planNodeId" : 1,
      				"keyPattern" : {
      					"e" : 1,
      					"s" : 1,
      					"r" : 1
      				},
      				"indexName" : "e_1_s_1_r_1",
      				"isMultiKey" : false,
      				"multiKeyPaths" : {
      					"e" : [ ],
      					"s" : [ ],
      					"r" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"e" : [
      						"[1.0, 1.0]"
      					],
      					"s" : [
      						"[MinKey, MaxKey]"
      					],
      					"r" : [
      						"(0.0, inf.0]"
      					]
      				}
      			},
      			{
      				"stage" : "IXSCAN",
      				"planNodeId" : 2,
      				"keyPattern" : {
      					"e" : 1,
      					"s" : 1,
      					"r" : 1
      				},
      				"indexName" : "e_1_s_1_r_1",
      				"isMultiKey" : false,
      				"multiKeyPaths" : {
      					"e" : [ ],
      					"s" : [ ],
      					"r" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"e" : [
      						"[2.0, 2.0]"
      					],
      					"s" : [
      						"[MinKey, MaxKey]"
      					],
      					"r" : [
      						"(0.0, inf.0]"
      					]
      				}
      			}
      		]
      	}
      } 

      Making the index multikey on the trailing range predicate converts the plan into one that does NOT utilize the SORT_MERGE stage and instead performs a blocking SORT:

      > db.foo.insert({r:[1,2,3]})
      WriteResult({ "nInserted" : 1 })
      > db.foo.find({e:{$in:[1,2]}, r:{$gt:0}}).sort({s:1}).explain().queryPlanner.winningPlan.queryPlan
      {
      	"stage" : "FETCH",
      	"planNodeId" : 3,
      	"inputStage" : {
      		"stage" : "SORT",
      		"planNodeId" : 2,
      		"sortPattern" : {
      			"s" : 1
      		},
      		"memLimit" : 104857600,
      		"type" : "default",
      		"inputStage" : {
      			"stage" : "IXSCAN",
      			"planNodeId" : 1,
      			"keyPattern" : {
      				"e" : 1,
      				"s" : 1,
      				"r" : 1
      			},
      			"indexName" : "e_1_s_1_r_1",
      			"isMultiKey" : true,
      			"multiKeyPaths" : {
      				"e" : [ ],
      				"s" : [ ],
      				"r" : [
      					"r"
      				]
      			},
      			"isUnique" : false,
      			"isSparse" : false,
      			"isPartial" : false,
      			"indexVersion" : 2,
      			"direction" : "forward",
      			"indexBounds" : {
      				"e" : [
      					"[1.0, 1.0]",
      					"[2.0, 2.0]"
      				],
      				"s" : [
      					"[MinKey, MaxKey]"
      				],
      				"r" : [
      					"(0.0, inf.0]"
      				]
      			}
      		}
      	}
      } 

      This behavior does not occur when the array/multikey field is one of the prefixed equality conditions:

      > db.foo.remove({})
      WriteResult({ "nRemoved" : 1 })
      > db.foo.reIndex()
      {
      	...
      	"ok" : 1
      } 
      > db.foo.insert({e:[1,2,3]})
      WriteResult({ "nInserted" : 1 })
      > db.foo.find({e:{$in:[1,2]}, r:{$gt:0}}).sort({s:1}).explain().queryPlanner.winningPlan.queryPlan
      {
      	"stage" : "FETCH",
      	"planNodeId" : 4,
      	"inputStage" : {
      		"stage" : "SORT_MERGE",
      		"planNodeId" : 3,
      		"sortPattern" : {
      			"s" : 1
      		},
      		"inputStages" : [
      			{
      				"stage" : "IXSCAN",
      				"planNodeId" : 1,
      				"keyPattern" : {
      					"e" : 1,
      					"s" : 1,
      					"r" : 1
      				},
      				"indexName" : "e_1_s_1_r_1",
      				"isMultiKey" : true,
      				"multiKeyPaths" : {
      					"e" : [
      						"e"
      					],
      					"s" : [ ],
      					"r" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"e" : [
      						"[1.0, 1.0]"
      					],
      					"s" : [
      						"[MinKey, MaxKey]"
      					],
      					"r" : [
      						"(0.0, inf.0]"
      					]
      				}
      			},
      			{
      				"stage" : "IXSCAN",
      				"planNodeId" : 2,
      				"keyPattern" : {
      					"e" : 1,
      					"s" : 1,
      					"r" : 1
      				},
      				"indexName" : "e_1_s_1_r_1",
      				"isMultiKey" : true,
      				"multiKeyPaths" : {
      					"e" : [
      						"e"
      					],
      					"s" : [ ],
      					"r" : [ ]
      				},
      				"isUnique" : false,
      				"isSparse" : false,
      				"isPartial" : false,
      				"indexVersion" : 2,
      				"direction" : "forward",
      				"indexBounds" : {
      					"e" : [
      						"[2.0, 2.0]"
      					],
      					"s" : [
      						"[MinKey, MaxKey]"
      					],
      					"r" : [
      						"(0.0, inf.0]"
      					]
      				}
      			}
      		]
      	}
      } 

      The problem appears to be related to how we examine the index key patterns to determine if they provide the desired functionality.  More specifically, it looks like there is an assumption made when identifying portion of the index that provides the sort and checking it for multikeyness.  The associated comments in the code say:

      // The rest of the fields define the sort order we could obtain by exploding
      // the bounds.
      
          // One of the indexed fields providing the sort is multikey. It is not correct for a
          // field with multikey components to provide a sort, so bail out.

      The assumption that all of the remaining fields are relevant to the sort is not necessarily correct.  In the example above, it is only the s field in the index that is associated with the sort.  The r field after that is causing the code to exit as it is a multikey field, but since it is NOT part of the sort the fact that it is an array should not violate any constraints on sorting.

      Investigated using "version" : "4.5.0-7162-g7581854".

            Assignee:
            backlog-query-optimization [DO NOT USE] Backlog - Query Optimization
            Reporter:
            christopher.harris@mongodb.com Chris Harris
            Votes:
            0 Vote for this issue
            Watchers:
            15 Start watching this issue

              Created:
              Updated: