-
Type: Improvement
-
Resolution: Unresolved
-
Priority: Minor - P4
-
None
-
Affects Version/s: 4.4.3
-
Component/s: Distributed Query Execution
-
None
-
Query Execution
When performing aggregation with $sort+$match+$group+$first pipeline, some unexpected plan is being chosen. I would expect to see DISTINCT_SCAN and small totalDocsExamined amount in explain, but having SORT_KEY_GENERATOR and large totalDocsExamined amount.
Given:
mongos> sh.status() — Sharding Status — ... shards: { "_id" : "rs0", "host" : "rs0/127.0.0.1:28100", "state" : 1 } { "_id" : "rs1", "host" : "rs1/127.0.0.1:28101", "state" : 1 } mongos> sh.shardCollection("test.addresses", { zipcode: "hashed" }) mongos> db.addresses.createIndex({zipcode: 1, house: 1}) mongos> db.addresses.insert([ {zipcode: "111", house: 1}, {zipcode: "111", house: 2}, {zipcode: "111", house: 3}]) mongos> db.addresses.insert([ {zipcode: "444", house: 1}, {zipcode: "444", house: 2}, {zipcode: "444", house: 3}]) mongos> db.addresses.getShardDistribution() Shard rs0 at rs0/127.0.0.1:28100 data : 162B docs : 3 chunks : 2 Shard rs1 at rs1/127.0.0.1:28101 data : 162B docs : 3 chunks : 2
So we have 3 documents on each shard with 2 distinct shard keys. Given that we could perform such aggregation:
mongos> db.addresses.explain("executionStats").aggregate([{$sort: {zipcode: 1, house: 1}}, {$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}]) { "serverInfo" : { "host" : "5ae1a7bb5de9", "port" : 27017, "version" : "4.4.3", "gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13" }, "mergeType" : "mongos", "splitPipeline" : { "shardsPart" : [ { "$match" : { "zipcode" : { "$in" : [ "111", "444" ] } } }, { "$sort" : { "sortKey" : { "zipcode" : 1, "house" : 1 } } } ], "mergerPart" : [ { "$group" : { "_id" : "$zipcode", "first" : { "$first" : "$$ROOT" } } } ] }, "shards" : { "rs1" : { "host" : "127.0.0.1:28101", "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.addresses", "indexFilterSet" : false, "parsedQuery" : { "zipcode" : { "$in" : [ "111", "444" ] } }, "optimizedPipeline" : true, "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "inputStage" : { "stage" : "SHARDING_FILTER", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] } } } } }, "rejectedPlans" : [ { "stage" : "SORT", "sortPattern" : { "zipcode" : 1, "house" : 1 }, "memLimit" : 104857600, "type" : "simple", "inputStage" : { "stage" : "SHARDING_FILTER", "inputStage" : { "stage" : "FETCH", "filter" : { "zipcode" : { "$in" : [ "111", "444" ] } }, "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "zipcode" : "hashed" }, "indexName" : "zipcode_hashed", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[-1348867638842708814, -1348867638842708814]", "[4824476026935005375, 4824476026935005375]" ] } } } } } ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 3, "executionTimeMillis" : 0, "totalKeysExamined" : 3, "totalDocsExamined" : 3, "executionStages" : { "stage" : "FETCH", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 5, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "docsExamined" : 3, "alreadyHasObj" : 0, "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "inputStage" : { "stage" : "SHARDING_FILTER", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "chunkSkips" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] }, "keysExamined" : 3, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0 } } } } } }, "rs0" : { "host" : "127.0.0.1:28100", "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.addresses", "indexFilterSet" : false, "parsedQuery" : { "zipcode" : { "$in" : [ "111", "444" ] } }, "optimizedPipeline" : true, "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "inputStage" : { "stage" : "SHARDING_FILTER", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] } } } } }, "rejectedPlans" : [ { "stage" : "SORT", "sortPattern" : { "zipcode" : 1, "house" : 1 }, "memLimit" : 104857600, "type" : "simple", "inputStage" : { "stage" : "SHARDING_FILTER", "inputStage" : { "stage" : "FETCH", "filter" : { "zipcode" : { "$in" : [ "111", "444" ] } }, "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "zipcode" : "hashed" }, "indexName" : "zipcode_hashed", "isMultiKey" : false, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[-1348867638842708814, -1348867638842708814]", "[4824476026935005375, 4824476026935005375]" ] } } } } } ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 3, "executionTimeMillis" : 0, "totalKeysExamined" : 3, "totalDocsExamined" : 3, "executionStages" : { "stage" : "FETCH", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 5, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "docsExamined" : 3, "alreadyHasObj" : 0, "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "inputStage" : { "stage" : "SHARDING_FILTER", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "chunkSkips" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] }, "keysExamined" : 3, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0 } } } } } } }, "ok" : 1, "operationTime" : Timestamp(1615826160, 1), "$clusterTime" : { "clusterTime" : Timestamp(1615826160, 1), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } } }
As we can see in totalDocsExamined all 3 documents is being fetched on each shard, and no DISTINCT_SCAN stage.
But if we remove $sort stage, we can see expected behavior:
mongos> db.addresses.explain("executionStats").aggregate([{$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}]) { "serverInfo" : { "host" : "5ae1a7bb5de9", "port" : 27017, "version" : "4.4.3", "gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13" }, "mergeType" : "mongos", "splitPipeline" : { "shardsPart" : [ { "$match" : { "zipcode" : { "$in" : [ "111", "444" ] } } }, { "$group" : { "_id" : "$zipcode", "first" : { "$first" : "$$ROOT" } } } ], "mergerPart" : [ { "$group" : { "_id" : "$$ROOT._id", "first" : { "$first" : "$$ROOT.first" }, "$doingMerge" : true } } ] }, "shards" : { "rs0" : { "host" : "127.0.0.1:28100", "stages" : [ { "$cursor" : { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.addresses", "indexFilterSet" : false, "parsedQuery" : { "zipcode" : { "$in" : [ "111", "444" ] } }, "queryHash" : "A9C86C49", "planCacheKey" : "E7443539", "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "DISTINCT_SCAN", "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] } } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 1, "executionTimeMillis" : 0, "totalKeysExamined" : 1, "totalDocsExamined" : 1, "executionStages" : { "stage" : "FETCH", "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 1, "restoreState" : 1, "isEOF" : 1, "docsExamined" : 1, "alreadyHasObj" : 0, "inputStage" : { "stage" : "DISTINCT_SCAN", "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 1, "restoreState" : 1, "isEOF" : 1, "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] }, "keysExamined" : 1 } } } }, "nReturned" : NumberLong(1), "executionTimeMillisEstimate" : NumberLong(0) }, { "$groupByDistinctScan" : { "newRoot" : { "_id" : "$zipcode", "first" : "$$ROOT" } }, "nReturned" : NumberLong(1), "executionTimeMillisEstimate" : NumberLong(0) } ] }, "rs1" : { "host" : "127.0.0.1:28101", "stages" : [ { "$cursor" : { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.addresses", "indexFilterSet" : false, "parsedQuery" : { "zipcode" : { "$in" : [ "111", "444" ] } }, "queryHash" : "A9C86C49", "planCacheKey" : "E7443539", "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "DISTINCT_SCAN", "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] } } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 1, "executionTimeMillis" : 0, "totalKeysExamined" : 1, "totalDocsExamined" : 1, "executionStages" : { "stage" : "FETCH", "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 1, "restoreState" : 1, "isEOF" : 1, "docsExamined" : 1, "alreadyHasObj" : 0, "inputStage" : { "stage" : "DISTINCT_SCAN", "nReturned" : 1, "executionTimeMillisEstimate" : 0, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 1, "restoreState" : 1, "isEOF" : 1, "keyPattern" : { "zipcode" : 1, "house" : 1 }, "indexName" : "zipcode_1_house_1", "isMultiKey" : false, "multiKeyPaths" : { "zipcode" : [ ], "house" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "zipcode" : [ "[\"111\", \"111\"]", "[\"444\", \"444\"]" ], "house" : [ "[MinKey, MaxKey]" ] }, "keysExamined" : 1 } } } }, "nReturned" : NumberLong(1), "executionTimeMillisEstimate" : NumberLong(0) }, { "$groupByDistinctScan" : { "newRoot" : { "_id" : "$zipcode", "first" : "$$ROOT" } }, "nReturned" : NumberLong(1), "executionTimeMillisEstimate" : NumberLong(0) } ] } }, "ok" : 1, "operationTime" : Timestamp(1615826288, 1), "$clusterTime" : { "clusterTime" : Timestamp(1615826288, 1), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } } }
Without sorting this aggregation works well, but I need some defined order here.
- is related to
-
SERVER-96116 Extend shard filtering agg distinct scan tests to include $sort + $match + $first cases
- Closed
- related to
-
SERVER-42160 $group stages that use a DISTINCT_SCAN do not use a SHARDING_FILTER on sharded collections
- Backlog
-
SERVER-5477 when sharded, no need to merge groups if $group _id is the shard key or original document _id
- Backlog