Loading...

XML

Word

Printable

JSON

Type: New Feature
Resolution: Unresolved
Priority: Major - P3
Fix Version/s: None
Affects Version/s: None
Component/s: Querying
Labels:
None

Assigned Teams:

Query Optimization
CAR Domain/s:
None

Aha! Reference:
None
Tracking Level:
None
Risk Status:
None
Exec Notes:
None
Goal Name(s):
None
Goal Link:
None

Consider the following script.

db.test.insert({_id : {a : 1, b : 1}})
db.test.insert({_id : {a : 1, b : 2}})
db.test.insert({_id : {a : 2, b : 1}})
db.test.insert({_id : {a : 2, b : 2}})
db.test.insert({_id : {a : 3, b : 1}})
db.test.insert({_id : {a : 3, b : 2}})
db.test.insert({_id : {a : 4, b : 1}})
db.test.insert({_id : {a : 4, b : 2}})
db.test.find({"$and" : [{_id : {"$gte" : {a : 2} }}, {_id : {"$lte" : {a : 3, b : MaxKey()} }}, {"_id.b" : 1}]})

==>

{ "_id" : { "a" : 2, "b" : 1 } }
{ "_id" : { "a" : 3, "b" : 1 } }

So far so good. Yet, doing explain() on that reveals the following:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "test.test",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [
                {
                    "_id.b" : {
                        "$eq" : 1
                    }
                },
                {
                    "_id" : {
                        "$lte" : {
                            "a" : 3,
                            "b" : { "$maxKey" : 1 }
                        }
                    }
                },
                {
                    "_id" : {
                        "$gte" : {
                            "a" : 2
                        }
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "FETCH",
            "filter" : {
                "$and" : [
                    {
                        "_id" : {
                            "$lte" : {
                                "a" : 3,
                                "b" : { "$maxKey" : 1 }
                            }
                        }
                    },
                    {
                        "_id" : {
                            "$gte" : {
                                "a" : 2
                            }
                        }
                    },
                    {
                        "_id.b" : {
                            "$eq" : 1
                        }
                    }
                ]
            },
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "_id" : 1
                },
                "indexName" : "_id_",
                "isMultiKey" : false,
                "isUnique" : true,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 1,
                "direction" : "forward",
                "indexBounds" : {
                    "_id" : [
                        "[{ a: 2.0 }, { a: 3.0, b: MaxKey }]"
                    ]
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 2,
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 4,
        "totalDocsExamined" : 4,
        "executionStages" : {
            "stage" : "FETCH",
            "filter" : {
                "$and" : [
                    {
                        "_id" : {
                            "$lte" : {
                                "a" : 3,
                                "b" : { "$maxKey" : 1 }
                            }
                        }
                    },
                    {
                        "_id" : {
                            "$gte" : {
                                "a" : 2
                            }
                        }
                    },
                    {
                        "_id.b" : {
                            "$eq" : 1
                        }
                    }
                ]
            },
            "nReturned" : 2,
            "executionTimeMillisEstimate" : 0,
            "works" : 5,
            "advanced" : 2,
            "needTime" : 2,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 4,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 4,
                "executionTimeMillisEstimate" : 0,
                "works" : 5,
                "advanced" : 4,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "_id" : 1
                },
                "indexName" : "_id_",
                "isMultiKey" : false,
                "isUnique" : true,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 1,
                "direction" : "forward",
                "indexBounds" : {
                    "_id" : [
                        "[{ a: 2.0 }, { a: 3.0, b: MaxKey }]"
                    ]
                },
                "keysExamined" : 4,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        },
        "allPlansExecution" : [ ]
    },
    "serverInfo" : {
        "host" : "***",
        "port" : 27017,
        "version" : "3.2.1",
        "gitVersion" : "a14d55980c2cdc565d4704a7e3ad37e4e535c1b2"
    },
    "ok" : 1
}

I may be misinterpreting the output, but it seems to me that the filter

{"_id.b" : 1}

is applied AFTER the full document has been retrieved, NOT when scanning the index. I am led to this conclusion by seeing the filter spec in the FETCH stage and seeing

            "nReturned" : 2,
            "docsExamined" : 4,

in its stats.

So, basically, when the index is on a sub-document, and the query filter includes the sub-document's fields, the server does not seem to understand that it could use the index to evaluate the filter; it goes to the collection instead. That can be very costly.

Some background as to why this is important. The _id index is mandatory. It is therefore desirable to make it useful. It is also non-configurable, so one pretty much has to make _id a sub-document except in simplest cases.

Assignee:: [DO NOT USE] Backlog - Query Optimization
Reporter:: Viacheslav Usov
Participants:: [DO NOT USE] Backlog - Query Optimization, Kelsey Schubert, Viacheslav Usov
Votes:: 0 Vote for this issue
Watchers:: 8 Start watching this issue

Created:: Feb 03 2016 03:35:36 PM UTC
Updated:: Dec 06 2022 04:34:29 AM UTC

Details

Description

Attachments

Activity

People

Dates