[SERVER-20913] DateTime index ignored / horrible performance Created: 14/Oct/15  Updated: 26/Nov/15  Resolved: 14/Oct/15

Status: Closed
Project: Core Server
Component/s: Querying
Affects Version/s: 2.2.0, 3.0.6
Fix Version/s: None

Type: Bug Priority: Minor - P4
Reporter: Rob Janssen Assignee: David Storch
Resolution: Duplicate Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Duplicate
duplicates SERVER-16042 Optimise $all/$and to select smallest... Closed
Related
is related to DOCS-6372 Query Plans page is out of date Closed
Operating System: ALL
Participants:

 Description   

We have recently moved from a (single instance) MongoDB server 2.2.0 on Linux to a (single instance) MongoDB server 3.0.6 on Windows for a specific project. We have a collection that contains ~1.3 million documents.

The following query to find messages for a specific 24h period performs perfectly fine on 2.2.0:

db.MyCollection.find({ query: { 
    $and: [ 
        { "Messages.Message.Header.TimeStamp": { $gte: ISODate("2015-10-12T22:00:00Z") }},
        { "Messages.Message.Header.TimeStamp": { $lt: ISODate("2015-10-13T22:00:00Z") }}
    ] 
}, $explain: 1});

Explain:

{
  "cursor": "BtreeCursor Messages.Message.Header.TimeStamp_1",
  "isMultiKey": true,
  "n": 7854,
  "nscannedObjects": 7854,
  "nscanned": 7854,
  "nscannedObjectsAllPlans": 7854,
  "nscannedAllPlans": 7854,
  "scanAndOrder": false,
  "indexOnly": false,
  "nYields": 0,
  "nChunkSkips": 0,
  "millis": 31,
  "indexBounds": {
    "Messages.Message.Header.TimeStamp": [
      [
        "2015-10-12T22:00:00Z",
        "292278995-01--2147483647T07:12:56.808Z"
      ]
    ]
  },
  "allPlans": [
    {
      "cursor": "BtreeCursor Messages.Message.Header.TimeStamp_1",
      "n": 7854,
      "nscannedObjects": 7854,
      "nscanned": 7854,
      "indexBounds": {
        "Messages.Message.Header.TimeStamp": [
          [
            "2015-10-12T22:00:00Z",
            "292278995-01--2147483647T07:12:56.808Z"
          ]
        ]
      }
    }
  ],
  "oldPlan": {
    "cursor": "BtreeCursor Messages.Message.Header.TimeStamp_1",
    "indexBounds": {
      "Messages.Message.Header.TimeStamp": [
        [
          "2015-10-12T22:00:00Z",
          "292278995-01--2147483647T07:12:56.808Z"
        ]
      ]
    }
  },
  "server": "myserver:27017"
}

(Though I don't know what's going on with the weird

""292278995-01--2147483647T07:12:56.808Z"

value...?)

We copied the collection to the new 3.0 (windows) server. And ran the same query; this took over 9 minutes to complete. I have re-created the index but that didn't help.

The explain on this (win, 3.0.6) host looks like this:

{
  "queryPlanner": {
    "plannerVersion": 1,
    "namespace": "MyDB.MyCollection",
    "indexFilterSet": false,
    "parsedQuery": {
      "$and": [
        {
          "Messages.Message.Header.TimeStamp": {
            "$lt": "2015-10-13T22:00:00Z"
          }
        },
        {
          "Messages.Message.Header.TimeStamp": {
            "$gte": "2015-10-12T22:00:00Z"
          }
        }
      ]
    },
    "winningPlan": {
      "stage": "KEEP_MUTATIONS",
      "inputStage": {
        "stage": "FETCH",
        "filter": {
          "Messages.Message.Header.TimeStamp": {
            "$gte": "2015-10-12T22:00:00Z"
          }
        },
        "inputStage": {
          "stage": "IXSCAN",
          "keyPattern": {
            "Messages.Message.Header.TimeStamp": 1
          },
          "indexName": "Messages.Message.Header.TimeStamp_1",
          "isMultiKey": true,
          "direction": "forward",
          "indexBounds": {
            "Messages.Message.Header.TimeStamp": [
              "(true, new Date(1444773600000))"
            ]
          }
        }
      }
    },
    "rejectedPlans": []
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 8033,
    "executionTimeMillis": 118350,
    "totalKeysExamined": 1383287,
    "totalDocsExamined": 1383199,
    "executionStages": {
      "stage": "KEEP_MUTATIONS",
      "nReturned": 8033,
      "executionTimeMillisEstimate": 112792,
      "works": 1383353,
      "advanced": 8033,
      "needTime": 1375254,
      "needFetch": 65,
      "saveState": 11048,
      "restoreState": 11048,
      "isEOF": 1,
      "invalidates": 0,
      "inputStage": {
        "stage": "FETCH",
        "filter": {
          "Messages.Message.Header.TimeStamp": {
            "$gte": "2015-10-12T22:00:00Z"
          }
        },
        "nReturned": 8033,
        "executionTimeMillisEstimate": 112652,
        "works": 1383353,
        "advanced": 8033,
        "needTime": 1375254,
        "needFetch": 65,
        "saveState": 11048,
        "restoreState": 11048,
        "isEOF": 1,
        "invalidates": 0,
        "docsExamined": 1383199,
        "alreadyHasObj": 0,
        "inputStage": {
          "stage": "IXSCAN",
          "nReturned": 1383199,
          "executionTimeMillisEstimate": 104092,
          "works": 1383287,
          "advanced": 1383199,
          "needTime": 88,
          "needFetch": 0,
          "saveState": 11048,
          "restoreState": 11048,
          "isEOF": 1,
          "invalidates": 0,
          "keyPattern": {
            "Messages.Message.Header.TimeStamp": 1
          },
          "indexName": "Messages.Message.Header.TimeStamp_1",
          "isMultiKey": true,
          "direction": "forward",
          "indexBounds": {
            "Messages.Message.Header.TimeStamp": [
              "(true, new Date(1444773600000))"
            ]
          },
          "keysExamined": 1383287,
          "dupsTested": 1383287,
          "dupsDropped": 88,
          "seenInvalidated": 0,
          "matchTested": 0
        }
      }
    },
    "allPlansExecution": []
  },
  "serverInfo": {
    "host": "myserver2",
    "port": 27017,
    "version": "3.0.6",
    "gitVersion": "1ef45a23a4c5e3480ac919b28afcba3c615488f2"
  }
}

If I change the query to 'greaterorequal than date X' instead of 'greaterorequal than date X and less than date Y' the query performs fine:

db.Conversations.find({ query: { 
        "Messages.Message.Header.TimeStamp": { $gte: ISODate("2015-10-12T22:00:00Z") }
}, $explain: 1});

Explain:

{
  "queryPlanner": {
    "plannerVersion": 1,
    "namespace": "MyDB.MyCollection",
    "indexFilterSet": false,
    "parsedQuery": {
      "Messages.Message.Header.TimeStamp": {
        "$gte": "2015-10-12T22:00:00Z"
      }
    },
    "winningPlan": {
      "stage": "FETCH",
      "inputStage": {
        "stage": "IXSCAN",
        "keyPattern": {
          "Messages.Message.Header.TimeStamp": 1
        },
        "indexName": "Messages.Message.Header.TimeStamp_1",
        "isMultiKey": true,
        "direction": "forward",
        "indexBounds": {
          "Messages.Message.Header.TimeStamp": [
            "[new Date(1444687200000), new Date(9223372036854775807)]"
          ]
        }
      }
    },
    "rejectedPlans": []
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 11081,
    "executionTimeMillis": 359,
    "totalKeysExamined": 11081,
    "totalDocsExamined": 11081,
    "executionStages": {
      "stage": "FETCH",
      "nReturned": 11081,
      "executionTimeMillisEstimate": 330,
      "works": 11631,
      "advanced": 11081,
      "needTime": 0,
      "needFetch": 549,
      "saveState": 558,
      "restoreState": 558,
      "isEOF": 1,
      "invalidates": 0,
      "docsExamined": 11081,
      "alreadyHasObj": 0,
      "inputStage": {
        "stage": "IXSCAN",
        "nReturned": 11081,
        "executionTimeMillisEstimate": 270,
        "works": 11081,
        "advanced": 11081,
        "needTime": 0,
        "needFetch": 0,
        "saveState": 558,
        "restoreState": 558,
        "isEOF": 1,
        "invalidates": 0,
        "keyPattern": {
          "Messages.Message.Header.TimeStamp": 1
        },
        "indexName": "Messages.Message.Header.TimeStamp_1",
        "isMultiKey": true,
        "direction": "forward",
        "indexBounds": {
          "Messages.Message.Header.TimeStamp": [
            "[new Date(1444687200000), new Date(9223372036854775807)]"
          ]
        },
        "keysExamined": 11081,
        "dupsTested": 11081,
        "dupsDropped": 0,
        "seenInvalidated": 0,
        "matchTested": 0
      }
    },
    "allPlansExecution": []
  },
  "serverInfo": {
    "host": "myserver2",
    "port": 27017,
    "version": "3.0.6",
    "gitVersion": "1ef45a23a4c5e3480ac919b28afcba3c615488f2"
  }
}

It seems that a range query "X => date1 X $and X < date2" somehow trips the queryplanner into not using the index whereas "X >= date1" is fine.

If there's anything I can to to help or provide more information then please let me know.



 Comments   
Comment by Asya Kamsky [ 26/Nov/15 ]

robiii please note that if your original query was intended to use the same timestamp to match both conditions, i.e.

{     "Messages.Message.Header.TimeStamp": { 
        $gte: ISODate("2015-10-12T22:00:00Z"),
        $lt: ISODate("2015-10-13T22:00:00Z")
 }}

expresses that you are looking for timestamp values between the two dates (as opposed to documents which have one timestamp satisfy one condition and the other satisfy the other condition) then the correct way to query for those is with $elemMatch. That expresses that the same array element must satisfy all query conditions, if the array is "messages" then query would be:

{     "Messages":{$elemMatch:{"Message.Header.TimeStamp": { 
        $gte: ISODate("2015-10-12T22:00:00Z"),
        $lt: ISODate("2015-10-13T22:00:00Z")
 }} } }

Leaving this comment in case others come across similar problem and realize they should use $elemMatch (which can properly constrain index bounds of the query on both sides).

Comment by David Storch [ 14/Oct/15 ]

robiii, glad I could help. We do have a page with basic documentation on MongoDB query optimization:

http://docs.mongodb.org/manual/core/query-plans/

While tracking down this page, however, I discovered that it is a bit out of date and has some minor accuracies in its description of planning in 2.6.x and 3.0.x versions. I filed DOCS-6372 to improve the documentation.

Comment by Rob Janssen [ 14/Oct/15 ]

Hi David!

Thanks for the explanation; it's really appreciated.
I guess I / we have been lucky until now indeed. And I consider myself / ourselves even more lucky that the day I encountered the problem a new release was available fixing this exact problem. What are the odds? So, anyway, I'll be off to get a lottery ticket

I tried adding hints etc. but this didn't help. I understand now why because of your explanation which is, again, very much appreciated.


EDIT

After the fix, the engine instead will generate both candidate plans. It then ranks them against one another and chooses the best plan using MongoDB's regular plan selection mechanism!

I wouldn't mind, maybe, a little more info on how the ranking works and how the best plan is selected but only if it's not too much of a bother. Maybe a pointer to the documentation if this can be found anywhere?


Consider this issue solved and feel free to close it as resolved Thanks again for the awesome support!

Comment by David Storch [ 14/Oct/15 ]

Hi robiii,

Thanks very much for filing this report. As you have discovered on your own, the problem you described was fixed in 3.0.7 under SERVER-16042. As such, I am resolving this ticket as a duplicate. Please read on for more of the technical details.

The issue has to do with index bounds selection for a multikey index. (A multikey index is an index which may have multiple index keys pointing to the same document in the collection, i.e. an indexed array field.) If there are multiple predicates over an array field, the server is only allowed to use one of these predicates to construct the index bounds. Consider the example of a collection containing the document {a: [1, 2, 3]} with index {a: 1}. The user issues the following query:

db.foo.find({a: {$lte: 1, $gte: 3}});

The document matches the query because array element "1" matches the predicate "a <= 1" and array element 3 matches the predicate "a >= 3".

The system must answer this query either by

  1. scanning index range [-Inf, 1], fetching the corresponding documents from the collection, and filtering out documents that don't also have an array element greater than or equal to 3; or,
  2. scanning index range [3, Inf], fetching the corresponding documents from the collection, and filtering out documents that don't also have an array element less than or equal to 1.

The problem query that you reported is essentially identical in structure to this toy example query. In your case, however, there is a large performance gap between the two plans. Imagine, for example, that the scan [3, Inf] is vastly more selective than the scan [-Inf, 1]. On version 2.2.0, you were getting lucky, since the system happened to choose the selective [3, Inf] plan. On version 3.0.6, you were getting equally unlucky, since the system happened to choose the unselective [-Inf, 1] plan. (If you're curious why this changed, it's because of a large query system refactor first released in version 2.6.0. Part of this refactor involves sorting the parsed query tree, which changes which predicate we happen to select to build the multikey index bounds.)

In 3.0.7, the problem is fixed by SERVER-16042. Prior to this fix, the query engine would arbitrarily choose one of the two available multikey index scan plans. After the fix, the engine instead will generate both candidate plans. It then ranks them against one another and chooses the best plan using MongoDB's regular plan selection mechanism!

I hope this explanation clears things up. Feel free to reach out with any further questions or concerns.

Best,
Dave

Comment by Rob Janssen [ 14/Oct/15 ]

NO WAY!

I just noticed that 3.0.7 was released yesterday (we installed 3.0.6 <24h ago, which was at the time the latest production version on mongodb.org). I installed 3.0.7 and guess what? It's fixed (or at least: seems to be; the query performs fine now)!

I have glanced over the issues fixed in 3.0.7 and from what I found so far it seems that SERVER-16042 has something to do with it (not sure though).

For those interested, the explain now looks like this:

{
  "queryPlanner": {
    "plannerVersion": 1,
    "namespace": "MyDB.MyCollection",
    "indexFilterSet": false,
    "parsedQuery": {
      "$and": [
        {
          "Messages.Message.Header.TimeStamp": {
            "$lt": "2015-10-13T22:00:00Z"
          }
        },
        {
          "Messages.Message.Header.TimeStamp": {
            "$gte": "2015-10-12T22:00:00Z"
          }
        }
      ]
    },
    "winningPlan": {
      "stage": "KEEP_MUTATIONS",
      "inputStage": {
        "stage": "FETCH",
        "filter": {
          "Messages.Message.Header.TimeStamp": {
            "$lt": "2015-10-13T22:00:00Z"
          }
        },
        "inputStage": {
          "stage": "IXSCAN",
          "keyPattern": {
            "Messages.Message.Header.TimeStamp": 1
          },
          "indexName": "Messages.Message.Header.TimeStamp_1",
          "isMultiKey": true,
          "direction": "forward",
          "indexBounds": {
            "Messages.Message.Header.TimeStamp": [
              "[new Date(1444687200000), new Date(9223372036854775807)]"
            ]
          }
        }
      }
    },
    "rejectedPlans": [
      {
        "stage": "KEEP_MUTATIONS",
        "inputStage": {
          "stage": "FETCH",
          "filter": {
            "Messages.Message.Header.TimeStamp": {
              "$gte": "2015-10-12T22:00:00Z"
            }
          },
          "inputStage": {
            "stage": "IXSCAN",
            "keyPattern": {
              "Messages.Message.Header.TimeStamp": 1
            },
            "indexName": "Messages.Message.Header.TimeStamp_1",
            "isMultiKey": true,
            "direction": "forward",
            "indexBounds": {
              "Messages.Message.Header.TimeStamp": [
                "(true, new Date(1444773600000))"
              ]
            }
          }
        }
      }
    ]
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 8067,
    "executionTimeMillis": 342,
    "totalKeysExamined": 12294,
    "totalDocsExamined": 12294,
    "executionStages": {
      "stage": "KEEP_MUTATIONS",
      "nReturned": 8067,
      "executionTimeMillisEstimate": 310,
      "works": 12301,
      "advanced": 8067,
      "needTime": 4227,
      "needFetch": 6,
      "saveState": 102,
      "restoreState": 102,
      "isEOF": 1,
      "invalidates": 0,
      "inputStage": {
        "stage": "FETCH",
        "filter": {
          "Messages.Message.Header.TimeStamp": {
            "$lt": "2015-10-13T22:00:00Z"
          }
        },
        "nReturned": 8067,
        "executionTimeMillisEstimate": 310,
        "works": 12301,
        "advanced": 8067,
        "needTime": 4227,
        "needFetch": 6,
        "saveState": 102,
        "restoreState": 102,
        "isEOF": 1,
        "invalidates": 0,
        "docsExamined": 12294,
        "alreadyHasObj": 0,
        "inputStage": {
          "stage": "IXSCAN",
          "nReturned": 12294,
          "executionTimeMillisEstimate": 220,
          "works": 12294,
          "advanced": 12294,
          "needTime": 0,
          "needFetch": 0,
          "saveState": 102,
          "restoreState": 102,
          "isEOF": 1,
          "invalidates": 0,
          "keyPattern": {
            "Messages.Message.Header.TimeStamp": 1
          },
          "indexName": "Messages.Message.Header.TimeStamp_1",
          "isMultiKey": true,
          "direction": "forward",
          "indexBounds": {
            "Messages.Message.Header.TimeStamp": [
              "[new Date(1444687200000), new Date(9223372036854775807)]"
            ]
          },
          "keysExamined": 12294,
          "dupsTested": 12294,
          "dupsDropped": 0,
          "seenInvalidated": 0,
          "matchTested": 0
        }
      }
    },
    "allPlansExecution": [
      {
        "nReturned": 0,
        "executionTimeMillisEstimate": 0,
        "totalKeysExamined": 101,
        "totalDocsExamined": 101,
        "executionStages": {
          "stage": "KEEP_MUTATIONS",
          "nReturned": 0,
          "executionTimeMillisEstimate": 0,
          "works": 101,
          "advanced": 0,
          "needTime": 101,
          "needFetch": 0,
          "saveState": 102,
          "restoreState": 102,
          "isEOF": 0,
          "invalidates": 0,
          "inputStage": {
            "stage": "FETCH",
            "filter": {
              "Messages.Message.Header.TimeStamp": {
                "$gte": "2015-10-12T22:00:00Z"
              }
            },
            "nReturned": 0,
            "executionTimeMillisEstimate": 0,
            "works": 101,
            "advanced": 0,
            "needTime": 101,
            "needFetch": 0,
            "saveState": 102,
            "restoreState": 102,
            "isEOF": 0,
            "invalidates": 0,
            "docsExamined": 101,
            "alreadyHasObj": 0,
            "inputStage": {
              "stage": "IXSCAN",
              "nReturned": 101,
              "executionTimeMillisEstimate": 0,
              "works": 101,
              "advanced": 101,
              "needTime": 0,
              "needFetch": 0,
              "saveState": 102,
              "restoreState": 102,
              "isEOF": 0,
              "invalidates": 0,
              "keyPattern": {
                "Messages.Message.Header.TimeStamp": 1
              },
              "indexName": "Messages.Message.Header.TimeStamp_1",
              "isMultiKey": true,
              "direction": "forward",
              "indexBounds": {
                "Messages.Message.Header.TimeStamp": [
                  "(true, new Date(1444773600000))"
                ]
              },
              "keysExamined": 101,
              "dupsTested": 101,
              "dupsDropped": 0,
              "seenInvalidated": 0,
              "matchTested": 0
            }
          }
        }
      },
      {
        "nReturned": 101,
        "executionTimeMillisEstimate": 10,
        "totalKeysExamined": 101,
        "totalDocsExamined": 101,
        "executionStages": {
          "stage": "KEEP_MUTATIONS",
          "nReturned": 101,
          "executionTimeMillisEstimate": 10,
          "works": 101,
          "advanced": 101,
          "needTime": 0,
          "needFetch": 0,
          "saveState": 1,
          "restoreState": 1,
          "isEOF": 0,
          "invalidates": 0,
          "inputStage": {
            "stage": "FETCH",
            "filter": {
              "Messages.Message.Header.TimeStamp": {
                "$lt": "2015-10-13T22:00:00Z"
              }
            },
            "nReturned": 101,
            "executionTimeMillisEstimate": 10,
            "works": 101,
            "advanced": 101,
            "needTime": 0,
            "needFetch": 0,
            "saveState": 1,
            "restoreState": 1,
            "isEOF": 0,
            "invalidates": 0,
            "docsExamined": 101,
            "alreadyHasObj": 0,
            "inputStage": {
              "stage": "IXSCAN",
              "nReturned": 101,
              "executionTimeMillisEstimate": 10,
              "works": 101,
              "advanced": 101,
              "needTime": 0,
              "needFetch": 0,
              "saveState": 1,
              "restoreState": 1,
              "isEOF": 0,
              "invalidates": 0,
              "keyPattern": {
                "Messages.Message.Header.TimeStamp": 1
              },
              "indexName": "Messages.Message.Header.TimeStamp_1",
              "isMultiKey": true,
              "direction": "forward",
              "indexBounds": {
                "Messages.Message.Header.TimeStamp": [
                  "[new Date(1444687200000), new Date(9223372036854775807)]"
                ]
              },
              "keysExamined": 101,
              "dupsTested": 101,
              "dupsDropped": 0,
              "seenInvalidated": 0,
              "matchTested": 0
            }
          }
        }
      }
    ]
  },
  "serverInfo": {
    "host": "myserver2",
    "port": 27017,
    "version": "3.0.7",
    "gitVersion": "6ce7cbe8c6b899552dadd907604559806aa2e9bd"
  }
}

Also please note that, in this comment but also previous comments and/or the issue itself, that wherever you read "2015-10-...00Z" you should actually read ISODate("2015-10...00Z"); I had this removed because my formatter doesn't like it

Comment by Rob Janssen [ 14/Oct/15 ]

Oh, also this doesn't improve/change the situation:

db.MyCollection.find({ query: { 
    "Messages.Message.Header.TimeStamp": { 
        $gte: ISODate("2015-10-12T22:00:00Z"),
        $lt: ISODate("2015-10-13T22:00:00Z")
    }
}, $explain: 1});

instead of:

db.MyCollection.find({ query: { 
    $and: [ 
        { "Messages.Message.Header.TimeStamp": { $gte: ISODate("2015-10-12T22:00:00Z") }},
        { "Messages.Message.Header.TimeStamp": { $lt: ISODate("2015-10-13T22:00:00Z") }}
    ] 
}, $explain: 1});

Generated at Thu Feb 08 03:55:40 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.