Details
-
Improvement
-
Resolution: Unresolved
-
Minor - P4
-
None
-
4.4.3
-
None
-
Query Execution
Description
When performing aggregation with $sort+$match+$group+$first pipeline, some unexpected plan is being chosen. I would expect to see DISTINCT_SCAN and small totalDocsExamined amount in explain, but having SORT_KEY_GENERATOR and large totalDocsExamined amount.
Given:
mongos> sh.status()
|
— Sharding Status —
|
...
|
shards:
|
{ "_id" : "rs0", "host" : "rs0/127.0.0.1:28100", "state" : 1 } |
{ "_id" : "rs1", "host" : "rs1/127.0.0.1:28101", "state" : 1 } |
mongos> sh.shardCollection("test.addresses", { zipcode: "hashed" }) |
|
|
mongos> db.addresses.createIndex({zipcode: 1, house: 1}) |
|
|
mongos> db.addresses.insert([
|
{zipcode: "111", house: 1}, |
{zipcode: "111", house: 2}, |
{zipcode: "111", house: 3}]) |
mongos> db.addresses.insert([
|
{zipcode: "444", house: 1}, |
{zipcode: "444", house: 2}, |
{zipcode: "444", house: 3}]) |
mongos> db.addresses.getShardDistribution()
|
Shard rs0 at rs0/127.0.0.1:28100 |
data : 162B docs : 3 chunks : 2 |
Shard rs1 at rs1/127.0.0.1:28101 |
data : 162B docs : 3 chunks : 2 |
So we have 3 documents on each shard with 2 distinct shard keys. Given that we could perform such aggregation:
mongos> db.addresses.explain("executionStats").aggregate([{$sort: {zipcode: 1, house: 1}}, {$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}]) |
{
|
"serverInfo" : { |
"host" : "5ae1a7bb5de9", |
"port" : 27017, |
"version" : "4.4.3", |
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13" |
},
|
"mergeType" : "mongos", |
"splitPipeline" : { |
"shardsPart" : [ |
{
|
"$match" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
}
|
},
|
{
|
"$sort" : { |
"sortKey" : { |
"zipcode" : 1, |
"house" : 1 |
}
|
}
|
}
|
],
|
"mergerPart" : [ |
{
|
"$group" : { |
"_id" : "$zipcode", |
"first" : { |
"$first" : "$$ROOT" |
}
|
}
|
}
|
]
|
},
|
"shards" : { |
"rs1" : { |
"host" : "127.0.0.1:28101", |
"queryPlanner" : { |
"plannerVersion" : 1, |
"namespace" : "test.addresses", |
"indexFilterSet" : false, |
"parsedQuery" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
},
|
"optimizedPipeline" : true, |
"winningPlan" : { |
"stage" : "FETCH", |
"inputStage" : { |
"stage" : "SORT_KEY_GENERATOR", |
"inputStage" : { |
"stage" : "SHARDING_FILTER", |
"inputStage" : { |
"stage" : "IXSCAN", |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
}
|
}
|
}
|
}
|
},
|
"rejectedPlans" : [ |
{
|
"stage" : "SORT", |
"sortPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"memLimit" : 104857600, |
"type" : "simple", |
"inputStage" : { |
"stage" : "SHARDING_FILTER", |
"inputStage" : { |
"stage" : "FETCH", |
"filter" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
},
|
"inputStage" : { |
"stage" : "IXSCAN", |
"keyPattern" : { |
"zipcode" : "hashed" |
},
|
"indexName" : "zipcode_hashed", |
"isMultiKey" : false, |
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[-1348867638842708814, -1348867638842708814]", |
"[4824476026935005375, 4824476026935005375]" |
]
|
}
|
}
|
}
|
}
|
}
|
]
|
},
|
"executionStats" : { |
"executionSuccess" : true, |
"nReturned" : 3, |
"executionTimeMillis" : 0, |
"totalKeysExamined" : 3, |
"totalDocsExamined" : 3, |
"executionStages" : { |
"stage" : "FETCH", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 5, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"docsExamined" : 3, |
"alreadyHasObj" : 0, |
"inputStage" : { |
"stage" : "SORT_KEY_GENERATOR", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 4, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"inputStage" : { |
"stage" : "SHARDING_FILTER", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 4, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"chunkSkips" : 0, |
"inputStage" : { |
"stage" : "IXSCAN", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 4, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
},
|
"keysExamined" : 3, |
"seeks" : 1, |
"dupsTested" : 0, |
"dupsDropped" : 0 |
}
|
}
|
}
|
}
|
}
|
},
|
"rs0" : { |
"host" : "127.0.0.1:28100", |
"queryPlanner" : { |
"plannerVersion" : 1, |
"namespace" : "test.addresses", |
"indexFilterSet" : false, |
"parsedQuery" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
},
|
"optimizedPipeline" : true, |
"winningPlan" : { |
"stage" : "FETCH", |
"inputStage" : { |
"stage" : "SORT_KEY_GENERATOR", |
"inputStage" : { |
"stage" : "SHARDING_FILTER", |
"inputStage" : { |
"stage" : "IXSCAN", |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
}
|
}
|
}
|
}
|
},
|
"rejectedPlans" : [ |
{
|
"stage" : "SORT", |
"sortPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"memLimit" : 104857600, |
"type" : "simple", |
"inputStage" : { |
"stage" : "SHARDING_FILTER", |
"inputStage" : { |
"stage" : "FETCH", |
"filter" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
},
|
"inputStage" : { |
"stage" : "IXSCAN", |
"keyPattern" : { |
"zipcode" : "hashed" |
},
|
"indexName" : "zipcode_hashed", |
"isMultiKey" : false, |
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[-1348867638842708814, -1348867638842708814]", |
"[4824476026935005375, 4824476026935005375]" |
]
|
}
|
}
|
}
|
}
|
}
|
]
|
},
|
"executionStats" : { |
"executionSuccess" : true, |
"nReturned" : 3, |
"executionTimeMillis" : 0, |
"totalKeysExamined" : 3, |
"totalDocsExamined" : 3, |
"executionStages" : { |
"stage" : "FETCH", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 5, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"docsExamined" : 3, |
"alreadyHasObj" : 0, |
"inputStage" : { |
"stage" : "SORT_KEY_GENERATOR", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 4, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"inputStage" : { |
"stage" : "SHARDING_FILTER", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 4, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"chunkSkips" : 0, |
"inputStage" : { |
"stage" : "IXSCAN", |
"nReturned" : 3, |
"executionTimeMillisEstimate" : 0, |
"works" : 4, |
"advanced" : 3, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 0, |
"restoreState" : 0, |
"isEOF" : 1, |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
},
|
"keysExamined" : 3, |
"seeks" : 1, |
"dupsTested" : 0, |
"dupsDropped" : 0 |
}
|
}
|
}
|
}
|
}
|
}
|
},
|
"ok" : 1, |
"operationTime" : Timestamp(1615826160, 1), |
"$clusterTime" : { |
"clusterTime" : Timestamp(1615826160, 1), |
"signature" : { |
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), |
"keyId" : NumberLong(0) |
}
|
}
|
}
|
As we can see in totalDocsExamined all 3 documents is being fetched on each shard, and no DISTINCT_SCAN stage.
But if we remove $sort stage, we can see expected behavior:
mongos> db.addresses.explain("executionStats").aggregate([{$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}]) |
{
|
"serverInfo" : { |
"host" : "5ae1a7bb5de9", |
"port" : 27017, |
"version" : "4.4.3", |
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13" |
},
|
"mergeType" : "mongos", |
"splitPipeline" : { |
"shardsPart" : [ |
{
|
"$match" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
}
|
},
|
{
|
"$group" : { |
"_id" : "$zipcode", |
"first" : { |
"$first" : "$$ROOT" |
}
|
}
|
}
|
],
|
"mergerPart" : [ |
{
|
"$group" : { |
"_id" : "$$ROOT._id", |
"first" : { |
"$first" : "$$ROOT.first" |
},
|
"$doingMerge" : true |
}
|
}
|
]
|
},
|
"shards" : { |
"rs0" : { |
"host" : "127.0.0.1:28100", |
"stages" : [ |
{
|
"$cursor" : { |
"queryPlanner" : { |
"plannerVersion" : 1, |
"namespace" : "test.addresses", |
"indexFilterSet" : false, |
"parsedQuery" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
},
|
"queryHash" : "A9C86C49", |
"planCacheKey" : "E7443539", |
"winningPlan" : { |
"stage" : "FETCH", |
"inputStage" : { |
"stage" : "DISTINCT_SCAN", |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
}
|
}
|
},
|
"rejectedPlans" : [ ] |
},
|
"executionStats" : { |
"executionSuccess" : true, |
"nReturned" : 1, |
"executionTimeMillis" : 0, |
"totalKeysExamined" : 1, |
"totalDocsExamined" : 1, |
"executionStages" : { |
"stage" : "FETCH", |
"nReturned" : 1, |
"executionTimeMillisEstimate" : 0, |
"works" : 2, |
"advanced" : 1, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 1, |
"restoreState" : 1, |
"isEOF" : 1, |
"docsExamined" : 1, |
"alreadyHasObj" : 0, |
"inputStage" : { |
"stage" : "DISTINCT_SCAN", |
"nReturned" : 1, |
"executionTimeMillisEstimate" : 0, |
"works" : 2, |
"advanced" : 1, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 1, |
"restoreState" : 1, |
"isEOF" : 1, |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
},
|
"keysExamined" : 1 |
}
|
}
|
}
|
},
|
"nReturned" : NumberLong(1), |
"executionTimeMillisEstimate" : NumberLong(0) |
},
|
{
|
"$groupByDistinctScan" : { |
"newRoot" : { |
"_id" : "$zipcode", |
"first" : "$$ROOT" |
}
|
},
|
"nReturned" : NumberLong(1), |
"executionTimeMillisEstimate" : NumberLong(0) |
}
|
]
|
},
|
"rs1" : { |
"host" : "127.0.0.1:28101", |
"stages" : [ |
{
|
"$cursor" : { |
"queryPlanner" : { |
"plannerVersion" : 1, |
"namespace" : "test.addresses", |
"indexFilterSet" : false, |
"parsedQuery" : { |
"zipcode" : { |
"$in" : [ |
"111", |
"444" |
]
|
}
|
},
|
"queryHash" : "A9C86C49", |
"planCacheKey" : "E7443539", |
"winningPlan" : { |
"stage" : "FETCH", |
"inputStage" : { |
"stage" : "DISTINCT_SCAN", |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
}
|
}
|
},
|
"rejectedPlans" : [ ] |
},
|
"executionStats" : { |
"executionSuccess" : true, |
"nReturned" : 1, |
"executionTimeMillis" : 0, |
"totalKeysExamined" : 1, |
"totalDocsExamined" : 1, |
"executionStages" : { |
"stage" : "FETCH", |
"nReturned" : 1, |
"executionTimeMillisEstimate" : 0, |
"works" : 2, |
"advanced" : 1, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 1, |
"restoreState" : 1, |
"isEOF" : 1, |
"docsExamined" : 1, |
"alreadyHasObj" : 0, |
"inputStage" : { |
"stage" : "DISTINCT_SCAN", |
"nReturned" : 1, |
"executionTimeMillisEstimate" : 0, |
"works" : 2, |
"advanced" : 1, |
"needTime" : 0, |
"needYield" : 0, |
"saveState" : 1, |
"restoreState" : 1, |
"isEOF" : 1, |
"keyPattern" : { |
"zipcode" : 1, |
"house" : 1 |
},
|
"indexName" : "zipcode_1_house_1", |
"isMultiKey" : false, |
"multiKeyPaths" : { |
"zipcode" : [ ], |
"house" : [ ] |
},
|
"isUnique" : false, |
"isSparse" : false, |
"isPartial" : false, |
"indexVersion" : 2, |
"direction" : "forward", |
"indexBounds" : { |
"zipcode" : [ |
"[\"111\", \"111\"]", |
"[\"444\", \"444\"]" |
],
|
"house" : [ |
"[MinKey, MaxKey]" |
]
|
},
|
"keysExamined" : 1 |
}
|
}
|
}
|
},
|
"nReturned" : NumberLong(1), |
"executionTimeMillisEstimate" : NumberLong(0) |
},
|
{
|
"$groupByDistinctScan" : { |
"newRoot" : { |
"_id" : "$zipcode", |
"first" : "$$ROOT" |
}
|
},
|
"nReturned" : NumberLong(1), |
"executionTimeMillisEstimate" : NumberLong(0) |
}
|
]
|
}
|
},
|
"ok" : 1, |
"operationTime" : Timestamp(1615826288, 1), |
"$clusterTime" : { |
"clusterTime" : Timestamp(1615826288, 1), |
"signature" : { |
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), |
"keyId" : NumberLong(0) |
}
|
}
|
}
|
Without sorting this aggregation works well, but I need some defined order here.
Attachments
Issue Links
- related to
-
SERVER-42160 $group stages that use a DISTINCT_SCAN do not use a SHARDING_FILTER on sharded collections
-
- Backlog
-
-
SERVER-5477 when sharded, no need to merge groups if $group _id is the shard key or original document _id
-
- Backlog
-