|
When performing aggregation with $sort+$match+$group+$first pipeline, some unexpected plan is being chosen. I would expect to see DISTINCT_SCAN and small totalDocsExamined amount in explain, but having SORT_KEY_GENERATOR and large totalDocsExamined amount.
Given:
mongos> sh.status()
|
— Sharding Status —
|
...
|
shards:
|
{ "_id" : "rs0", "host" : "rs0/127.0.0.1:28100", "state" : 1 }
|
{ "_id" : "rs1", "host" : "rs1/127.0.0.1:28101", "state" : 1 }
|
mongos> sh.shardCollection("test.addresses", { zipcode: "hashed" })
|
|
mongos> db.addresses.createIndex({zipcode: 1, house: 1})
|
|
mongos> db.addresses.insert([
|
{zipcode: "111", house: 1},
|
{zipcode: "111", house: 2},
|
{zipcode: "111", house: 3}])
|
mongos> db.addresses.insert([
|
{zipcode: "444", house: 1},
|
{zipcode: "444", house: 2},
|
{zipcode: "444", house: 3}])
|
mongos> db.addresses.getShardDistribution()
|
Shard rs0 at rs0/127.0.0.1:28100
|
data : 162B docs : 3 chunks : 2
|
Shard rs1 at rs1/127.0.0.1:28101
|
data : 162B docs : 3 chunks : 2
|
So we have 3 documents on each shard with 2 distinct shard keys. Given that we could perform such aggregation:
mongos> db.addresses.explain("executionStats").aggregate([{$sort: {zipcode: 1, house: 1}}, {$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}])
|
{
|
"serverInfo" : {
|
"host" : "5ae1a7bb5de9",
|
"port" : 27017,
|
"version" : "4.4.3",
|
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
|
},
|
"mergeType" : "mongos",
|
"splitPipeline" : {
|
"shardsPart" : [
|
{
|
"$match" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
}
|
},
|
{
|
"$sort" : {
|
"sortKey" : {
|
"zipcode" : 1,
|
"house" : 1
|
}
|
}
|
}
|
],
|
"mergerPart" : [
|
{
|
"$group" : {
|
"_id" : "$zipcode",
|
"first" : {
|
"$first" : "$$ROOT"
|
}
|
}
|
}
|
]
|
},
|
"shards" : {
|
"rs1" : {
|
"host" : "127.0.0.1:28101",
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "test.addresses",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
},
|
"optimizedPipeline" : true,
|
"winningPlan" : {
|
"stage" : "FETCH",
|
"inputStage" : {
|
"stage" : "SORT_KEY_GENERATOR",
|
"inputStage" : {
|
"stage" : "SHARDING_FILTER",
|
"inputStage" : {
|
"stage" : "IXSCAN",
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
}
|
}
|
}
|
}
|
},
|
"rejectedPlans" : [
|
{
|
"stage" : "SORT",
|
"sortPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"memLimit" : 104857600,
|
"type" : "simple",
|
"inputStage" : {
|
"stage" : "SHARDING_FILTER",
|
"inputStage" : {
|
"stage" : "FETCH",
|
"filter" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
},
|
"inputStage" : {
|
"stage" : "IXSCAN",
|
"keyPattern" : {
|
"zipcode" : "hashed"
|
},
|
"indexName" : "zipcode_hashed",
|
"isMultiKey" : false,
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[-1348867638842708814, -1348867638842708814]",
|
"[4824476026935005375, 4824476026935005375]"
|
]
|
}
|
}
|
}
|
}
|
}
|
]
|
},
|
"executionStats" : {
|
"executionSuccess" : true,
|
"nReturned" : 3,
|
"executionTimeMillis" : 0,
|
"totalKeysExamined" : 3,
|
"totalDocsExamined" : 3,
|
"executionStages" : {
|
"stage" : "FETCH",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 5,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"docsExamined" : 3,
|
"alreadyHasObj" : 0,
|
"inputStage" : {
|
"stage" : "SORT_KEY_GENERATOR",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 4,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"inputStage" : {
|
"stage" : "SHARDING_FILTER",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 4,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"chunkSkips" : 0,
|
"inputStage" : {
|
"stage" : "IXSCAN",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 4,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
},
|
"keysExamined" : 3,
|
"seeks" : 1,
|
"dupsTested" : 0,
|
"dupsDropped" : 0
|
}
|
}
|
}
|
}
|
}
|
},
|
"rs0" : {
|
"host" : "127.0.0.1:28100",
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "test.addresses",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
},
|
"optimizedPipeline" : true,
|
"winningPlan" : {
|
"stage" : "FETCH",
|
"inputStage" : {
|
"stage" : "SORT_KEY_GENERATOR",
|
"inputStage" : {
|
"stage" : "SHARDING_FILTER",
|
"inputStage" : {
|
"stage" : "IXSCAN",
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
}
|
}
|
}
|
}
|
},
|
"rejectedPlans" : [
|
{
|
"stage" : "SORT",
|
"sortPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"memLimit" : 104857600,
|
"type" : "simple",
|
"inputStage" : {
|
"stage" : "SHARDING_FILTER",
|
"inputStage" : {
|
"stage" : "FETCH",
|
"filter" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
},
|
"inputStage" : {
|
"stage" : "IXSCAN",
|
"keyPattern" : {
|
"zipcode" : "hashed"
|
},
|
"indexName" : "zipcode_hashed",
|
"isMultiKey" : false,
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[-1348867638842708814, -1348867638842708814]",
|
"[4824476026935005375, 4824476026935005375]"
|
]
|
}
|
}
|
}
|
}
|
}
|
]
|
},
|
"executionStats" : {
|
"executionSuccess" : true,
|
"nReturned" : 3,
|
"executionTimeMillis" : 0,
|
"totalKeysExamined" : 3,
|
"totalDocsExamined" : 3,
|
"executionStages" : {
|
"stage" : "FETCH",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 5,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"docsExamined" : 3,
|
"alreadyHasObj" : 0,
|
"inputStage" : {
|
"stage" : "SORT_KEY_GENERATOR",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 4,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"inputStage" : {
|
"stage" : "SHARDING_FILTER",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 4,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"chunkSkips" : 0,
|
"inputStage" : {
|
"stage" : "IXSCAN",
|
"nReturned" : 3,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 4,
|
"advanced" : 3,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 0,
|
"restoreState" : 0,
|
"isEOF" : 1,
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
},
|
"keysExamined" : 3,
|
"seeks" : 1,
|
"dupsTested" : 0,
|
"dupsDropped" : 0
|
}
|
}
|
}
|
}
|
}
|
}
|
},
|
"ok" : 1,
|
"operationTime" : Timestamp(1615826160, 1),
|
"$clusterTime" : {
|
"clusterTime" : Timestamp(1615826160, 1),
|
"signature" : {
|
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
|
"keyId" : NumberLong(0)
|
}
|
}
|
}
|
As we can see in totalDocsExamined all 3 documents is being fetched on each shard, and no DISTINCT_SCAN stage.
But if we remove $sort stage, we can see expected behavior:
mongos> db.addresses.explain("executionStats").aggregate([{$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}])
|
{
|
"serverInfo" : {
|
"host" : "5ae1a7bb5de9",
|
"port" : 27017,
|
"version" : "4.4.3",
|
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
|
},
|
"mergeType" : "mongos",
|
"splitPipeline" : {
|
"shardsPart" : [
|
{
|
"$match" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
}
|
},
|
{
|
"$group" : {
|
"_id" : "$zipcode",
|
"first" : {
|
"$first" : "$$ROOT"
|
}
|
}
|
}
|
],
|
"mergerPart" : [
|
{
|
"$group" : {
|
"_id" : "$$ROOT._id",
|
"first" : {
|
"$first" : "$$ROOT.first"
|
},
|
"$doingMerge" : true
|
}
|
}
|
]
|
},
|
"shards" : {
|
"rs0" : {
|
"host" : "127.0.0.1:28100",
|
"stages" : [
|
{
|
"$cursor" : {
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "test.addresses",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
},
|
"queryHash" : "A9C86C49",
|
"planCacheKey" : "E7443539",
|
"winningPlan" : {
|
"stage" : "FETCH",
|
"inputStage" : {
|
"stage" : "DISTINCT_SCAN",
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
}
|
}
|
},
|
"rejectedPlans" : [ ]
|
},
|
"executionStats" : {
|
"executionSuccess" : true,
|
"nReturned" : 1,
|
"executionTimeMillis" : 0,
|
"totalKeysExamined" : 1,
|
"totalDocsExamined" : 1,
|
"executionStages" : {
|
"stage" : "FETCH",
|
"nReturned" : 1,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 2,
|
"advanced" : 1,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 1,
|
"restoreState" : 1,
|
"isEOF" : 1,
|
"docsExamined" : 1,
|
"alreadyHasObj" : 0,
|
"inputStage" : {
|
"stage" : "DISTINCT_SCAN",
|
"nReturned" : 1,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 2,
|
"advanced" : 1,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 1,
|
"restoreState" : 1,
|
"isEOF" : 1,
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
},
|
"keysExamined" : 1
|
}
|
}
|
}
|
},
|
"nReturned" : NumberLong(1),
|
"executionTimeMillisEstimate" : NumberLong(0)
|
},
|
{
|
"$groupByDistinctScan" : {
|
"newRoot" : {
|
"_id" : "$zipcode",
|
"first" : "$$ROOT"
|
}
|
},
|
"nReturned" : NumberLong(1),
|
"executionTimeMillisEstimate" : NumberLong(0)
|
}
|
]
|
},
|
"rs1" : {
|
"host" : "127.0.0.1:28101",
|
"stages" : [
|
{
|
"$cursor" : {
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "test.addresses",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
"zipcode" : {
|
"$in" : [
|
"111",
|
"444"
|
]
|
}
|
},
|
"queryHash" : "A9C86C49",
|
"planCacheKey" : "E7443539",
|
"winningPlan" : {
|
"stage" : "FETCH",
|
"inputStage" : {
|
"stage" : "DISTINCT_SCAN",
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
}
|
}
|
},
|
"rejectedPlans" : [ ]
|
},
|
"executionStats" : {
|
"executionSuccess" : true,
|
"nReturned" : 1,
|
"executionTimeMillis" : 0,
|
"totalKeysExamined" : 1,
|
"totalDocsExamined" : 1,
|
"executionStages" : {
|
"stage" : "FETCH",
|
"nReturned" : 1,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 2,
|
"advanced" : 1,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 1,
|
"restoreState" : 1,
|
"isEOF" : 1,
|
"docsExamined" : 1,
|
"alreadyHasObj" : 0,
|
"inputStage" : {
|
"stage" : "DISTINCT_SCAN",
|
"nReturned" : 1,
|
"executionTimeMillisEstimate" : 0,
|
"works" : 2,
|
"advanced" : 1,
|
"needTime" : 0,
|
"needYield" : 0,
|
"saveState" : 1,
|
"restoreState" : 1,
|
"isEOF" : 1,
|
"keyPattern" : {
|
"zipcode" : 1,
|
"house" : 1
|
},
|
"indexName" : "zipcode_1_house_1",
|
"isMultiKey" : false,
|
"multiKeyPaths" : {
|
"zipcode" : [ ],
|
"house" : [ ]
|
},
|
"isUnique" : false,
|
"isSparse" : false,
|
"isPartial" : false,
|
"indexVersion" : 2,
|
"direction" : "forward",
|
"indexBounds" : {
|
"zipcode" : [
|
"[\"111\", \"111\"]",
|
"[\"444\", \"444\"]"
|
],
|
"house" : [
|
"[MinKey, MaxKey]"
|
]
|
},
|
"keysExamined" : 1
|
}
|
}
|
}
|
},
|
"nReturned" : NumberLong(1),
|
"executionTimeMillisEstimate" : NumberLong(0)
|
},
|
{
|
"$groupByDistinctScan" : {
|
"newRoot" : {
|
"_id" : "$zipcode",
|
"first" : "$$ROOT"
|
}
|
},
|
"nReturned" : NumberLong(1),
|
"executionTimeMillisEstimate" : NumberLong(0)
|
}
|
]
|
}
|
},
|
"ok" : 1,
|
"operationTime" : Timestamp(1615826288, 1),
|
"$clusterTime" : {
|
"clusterTime" : Timestamp(1615826288, 1),
|
"signature" : {
|
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
|
"keyId" : NumberLong(0)
|
}
|
}
|
}
|
Without sorting this aggregation works well, but I need some defined order here.
|