|
Note that for query execution plans that fully stream the data, pushing a $limit further down in the plan tree doesn't actually change the efficiency of the plan or how much data is examined. However, the $cursor stage of the pipeline always loads a full batch of data before unspooling this data to subsequent stages. Therefore, there is a benefit to pushing the $limit below the $cursor stage. That work is in progress, and is tracked by SERVER-36723. I would say that our inability to push $limit down is the primary factor at play here, so I am resolving this ticket as a duplicate of SERVER-36723.
There are a few other issues to consider, however. The first is that we currently never move a $limit before a $lookup. This will continue to be true after the fix for SERVER-36723 is merged. The ability to perform this optimization is tracked by SERVER-31072. I tested a patch locally which contains both the fix for SERVER-36723 and a draft of the fix for SERVER-31072. This shows that the example query is optimized so that the limit is pushed down beneath the $cursor stage. In the explain output from this test, you'll notice that the LIMIT stage listed inside the $cursor.queryPlanner field:
{
|
"stages" : [
|
{
|
"$cursor" : {
|
"query" : {
|
|
},
|
"fields" : {
|
"currentDate" : 1,
|
"currently_playing" : 1,
|
"name" : 1,
|
"status" : 1,
|
"_id" : 1
|
},
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "test.c",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
|
},
|
"queryHash" : "8B3D4AB8",
|
"planCacheKey" : "8B3D4AB8",
|
"winningPlan" : {
|
"stage" : "LIMIT", // The $limit was pushed down to here!
|
"limitAmount" : 410,
|
"inputStage" : {
|
"stage" : "COLLSCAN",
|
"direction" : "forward"
|
}
|
},
|
"rejectedPlans" : [ ]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : "$$NOW"
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : {
|
"$subtract" : [
|
"$currentDate",
|
{
|
"$const" : 300000
|
}
|
]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : {
|
"$dateToString" : {
|
"date" : "$currentDate",
|
"format" : {
|
"$const" : "%Y-%m-%dT%H:%M:%S.%L"
|
},
|
"timezone" : {
|
"$const" : "America/Atikokan"
|
}
|
}
|
}
|
}
|
},
|
{
|
"$lookup" : {
|
"from" : "d",
|
"as" : "status",
|
"let" : {
|
"name" : "$_id",
|
"currentDate" : "$currentDate"
|
},
|
"pipeline" : [
|
{
|
"$match" : {
|
"$expr" : {
|
"$and" : [
|
{
|
"$gte" : [
|
"$current_datetime",
|
"$$currentDate"
|
]
|
},
|
{
|
"$in" : [
|
"$$name",
|
"$player_list.name"
|
]
|
}
|
]
|
}
|
}
|
},
|
{
|
"$project" : {
|
"game_name" : 1,
|
"player_list" : {
|
"$filter" : {
|
"input" : "$player_list",
|
"as" : "player",
|
"cond" : {
|
"$eq" : [
|
"$$player.name",
|
"$$name"
|
]
|
}
|
}
|
}
|
}
|
},
|
{
|
"$limit" : 1
|
}
|
]
|
}
|
},
|
{
|
"$skip" : NumberLong(400)
|
},
|
{
|
"$addFields" : {
|
"status" : {
|
"$arrayElemAt" : [
|
"$status",
|
{
|
"$const" : 0
|
}
|
]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currently_playing" : "$status.game_name",
|
"status" : {
|
"$arrayElemAt" : [
|
"$status.player_list",
|
{
|
"$const" : 0
|
}
|
]
|
}
|
}
|
},
|
{
|
"$project" : {
|
"_id" : true,
|
"name" : true,
|
"isOnline" : {
|
"$cond" : [
|
{
|
"$not" : [
|
"$status"
|
]
|
},
|
{
|
"$const" : false
|
},
|
"$status.display_online_status"
|
]
|
},
|
"currently_playing" : {
|
"$cond" : [
|
{
|
"$not" : [
|
"$status.display_online_status"
|
]
|
},
|
{
|
"$const" : null
|
},
|
"$currently_playing"
|
]
|
}
|
}
|
}
|
],
|
"ok" : 1
|
}
|
There is also an open ticket tracking the ability to push a $skip stage below the $cursor stage: SERVER-40909. And another about swapping pushing skip below projection: SERVER-14663.
edumanasses@gmail.com, hopefully this explanation makes sense, but let me know if you have any follow-up questions and I'd be happy to answer them. Also, please feel free to watch and vote for the other related tickets: SERVER-31072, SERVER-40909, and SERVER-14663.
|
|
Hi edumanasses@gmail.com,
I'll pass the ticket along to the query team and they'll consider it.
Kind regards,
Carl
|
|
I assumed MongoDB was doing some optimization under the hood, that behavior makes sense. But since there is a performance hit I was wondering if the implementation could optimize the pipeline differently by placing the skip and limit stage at the beginning of the pipeline, or before any expensive stages, regardless of the query preceding those stages so that no expensive stages like lookup have to be processed on all documents before they can be limited and skipped. I did some tests with the same query but with skip and limit placed at the beginning of the pipeline and I got a better performance compared to the default optimization.
|
|
Hi edumanasses@gmail.com,
Thanks for sending those over.
Here is what's happening: the pipeline optimizes the $addFields first, which swaps with the $skip. Then when we optimize the $skip and it is no longer followed by the $limit, so it can't be combined.
Pipeline before optimization: (1) $lookup, (2) $addFields, (3) $skip, (4) $limit
Pipeline after optimization: (1) $lookup, (2) $skip, (3) $addFields, (4) $limit
(Note: in the explain output, the first three $addFields preceding $lookup are not important for the optimizations we are talking about.)
One work around could be to add $skip and $limit at the top when creating the view; however, you will be stuck with them in the view.
Given the above explanation, we don't think there's a bug in MongoDB. Please let me know if you have any questions about my explanation or if you still think there is a bug here.
Kind regards,
Carl
|
|
Here's the query:
db.statusView.find({}).skip(400).limit(10).explain('executionStats')
|
And here's the full explain result:
{
|
"stages" : [
|
{
|
"$cursor" : {
|
"query" : {
|
|
},
|
"fields" : {
|
"currentDate" : 1,
|
"currently_playing" : 1,
|
"name" : 1,
|
"status" : 1,
|
"_id" : 1
|
},
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "brokenlens.users",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
|
},
|
"queryHash" : "8B3D4AB8",
|
"planCacheKey" : "8B3D4AB8",
|
"winningPlan" : {
|
"stage" : "COLLSCAN",
|
"direction" : "forward"
|
},
|
"rejectedPlans" : [ ]
|
},
|
"executionStats" : {
|
"executionSuccess" : true,
|
"nReturned" : 10979,
|
"executionTimeMillis" : 27019,
|
"totalKeysExamined" : 0,
|
"totalDocsExamined" : 10979,
|
"executionStages" : {
|
"stage" : "COLLSCAN",
|
"nReturned" : 10979,
|
"executionTimeMillisEstimate" : 2,
|
"works" : 10980,
|
"advanced" : 10979,
|
"needTime" : 1,
|
"needYield" : 0,
|
"saveState" : 87,
|
"restoreState" : 86,
|
"isEOF" : 0,
|
"direction" : "forward",
|
"docsExamined" : 10979
|
}
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : "$$NOW"
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : {
|
"$subtract" : [
|
"$currentDate",
|
{
|
"$const" : 300000
|
}
|
]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : {
|
"$dateToString" : {
|
"date" : "$currentDate",
|
"format" : {
|
"$const" : "%Y-%m-%dT%H:%M:%S.%L"
|
},
|
"timezone" : {
|
"$const" : "America/Atikokan"
|
}
|
}
|
}
|
}
|
},
|
{
|
"$lookup" : {
|
"from" : "network_status",
|
"as" : "status",
|
"let" : {
|
"name" : "$_id",
|
"currentDate" : "$currentDate"
|
},
|
"pipeline" : [
|
{
|
"$match" : {
|
"$expr" : {
|
"$and" : [
|
{
|
"$gte" : [
|
"$current_datetime",
|
"$$currentDate"
|
]
|
},
|
{
|
"$in" : [
|
"$$name",
|
"$player_list.name"
|
]
|
}
|
]
|
}
|
}
|
},
|
{
|
"$project" : {
|
"game_name" : 1,
|
"player_list" : {
|
"$filter" : {
|
"input" : "$player_list",
|
"as" : "player",
|
"cond" : {
|
"$eq" : [
|
"$$player.name",
|
"$$name"
|
]
|
}
|
}
|
}
|
}
|
},
|
{
|
"$limit" : 1
|
}
|
]
|
}
|
},
|
{
|
"$skip" : NumberLong(400)
|
},
|
{
|
"$addFields" : {
|
"status" : {
|
"$arrayElemAt" : [
|
"$status",
|
{
|
"$const" : 0
|
}
|
]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currently_playing" : "$status.game_name",
|
"status" : {
|
"$arrayElemAt" : [
|
"$status.player_list",
|
{
|
"$const" : 0
|
}
|
]
|
}
|
}
|
},
|
{
|
"$project" : {
|
"_id" : true,
|
"name" : true,
|
"isOnline" : {
|
"$cond" : [
|
{
|
"$not" : [
|
"$status"
|
]
|
},
|
{
|
"$const" : false
|
},
|
"$status.display_online_status"
|
]
|
},
|
"currently_playing" : {
|
"$cond" : [
|
{
|
"$not" : [
|
"$status.display_online_status"
|
]
|
},
|
{
|
"$const" : null
|
},
|
"$currently_playing"
|
]
|
}
|
}
|
},
|
{
|
"$limit" : NumberLong(10)
|
}
|
],
|
"ok" : 1,
|
"$clusterTime" : {
|
"clusterTime" : Timestamp(1568723216, 3),
|
"signature" : {
|
"hash" : BinData(0,"opbpb3ySCPlqndwPzQDLmK5JpgI="),
|
"keyId" : NumberLong("6691006031473934337")
|
}
|
},
|
"operationTime" : Timestamp(1568723216, 3)
|
}
|
|
|
Hi edumanasses@gmail.com,
Thanks for the report!
I have tested a similar query on a collection and on a view and I have noticed that $skip and $limit are placed at the same position in the stages; therefore, suggesting that a query behaves the same way on a collection and on a view.
To investigate further, please provide the original query and the full explain output for the view and for the collection.
Kind regards,
Carl
|
[
|
{
|
"$addFields" : {
|
"currentDate" : "$$NOW"
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : {
|
"$subtract" : [
|
"$currentDate",
|
{
|
"$const" : 300000
|
}
|
]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currentDate" : {
|
"$dateToString" : {
|
"date" : "$currentDate",
|
"format" : {
|
"$const" : "%Y-%m-%dT%H:%M:%S.%L"
|
},
|
"timezone" : {
|
"$const" : "America/Atikokan"
|
}
|
}
|
}
|
}
|
},
|
{
|
"$lookup" : {
|
"from" : "network_status",
|
"as" : "status",
|
"let" : {
|
"name" : "$_id",
|
"currentDate" : "$currentDate"
|
},
|
"pipeline" : [
|
{
|
"$match" : {
|
"$expr" : {
|
"$and" : [
|
{
|
"$gte" : [
|
"$current_datetime",
|
"$$currentDate"
|
]
|
},
|
{
|
"$in" : [
|
"$$name",
|
"$player_list.name"
|
]
|
}
|
]
|
}
|
}
|
},
|
{
|
"$project" : {
|
"game_name" : 1,
|
"player_list" : {
|
"$filter" : {
|
"input" : "$player_list",
|
"as" : "player",
|
"cond" : {
|
"$eq" : [
|
"$$player.name",
|
"$$name"
|
]
|
}
|
}
|
}
|
}
|
},
|
{
|
"$limit" : 1
|
}
|
]
|
}
|
},
|
{
|
"$skip" : NumberLong(400) // cursor.skip(400) ended up here
|
},
|
{
|
"$addFields" : {
|
"status" : {
|
"$arrayElemAt" : [
|
"$status",
|
{
|
"$const" : 0
|
}
|
]
|
}
|
}
|
},
|
{
|
"$addFields" : {
|
"currently_playing" : "$status.game_name",
|
"status" : {
|
"$arrayElemAt" : [
|
"$status.player_list",
|
{
|
"$const" : 0
|
}
|
]
|
}
|
}
|
},
|
{
|
"$project" : {
|
"_id" : true,
|
"name" : true,
|
"isOnline" : {
|
"$cond" : [
|
{
|
"$not" : [
|
"$status"
|
]
|
},
|
{
|
"$const" : false
|
},
|
"$status.display_online_status"
|
]
|
},
|
"currently_playing" : {
|
"$cond" : [
|
{
|
"$not" : [
|
"$status.display_online_status"
|
]
|
},
|
{
|
"$const" : null
|
},
|
"$currently_playing"
|
]
|
}
|
}
|
},
|
{
|
"$limit" : NumberLong(10) // cursor.limit(10) ended up here
|
}
|
]
|
Here is the result of explain showing that cursor.limit() and cursor.skip() are being placed at different stages instead of being the first stages of the pipeline and thus making the query expensive because all documents are being examined and passed through the lookup stage and only then skip and limit are applied.
|
Generated at Thu Feb 08 05:02:48 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.