Query Plan Cache Issue with Index Selection on High Disparity Index Values

XMLWordPrintableJSON

    • Type: Bug
    • Resolution: Won't Do
    • Priority: Major - P3
    • None
    • Affects Version/s: 4.4.13, 4.4.29
    • Component/s: None
    • None
    • ALL
    • Hide

      Data Model Creation:

      • Create a collection test.data and add an index on {{{} { "indexField10" : 1 }

        {}}}.

      • Insert 100,000 documents with the following structure:

       

      // code placeholder
      func insertData(collection *mongo.Collection, numRecords int) {    for i := 0; i < numRecords; i++ {        record := Record{            ID:          fmt.Sprintf("%d", i),            IndexField1: fmt.Sprintf("indexValue1_%d", i),            IndexField2: fmt.Sprintf("indexValue2_%d", i),            IndexField3: fmt.Sprintf("indexValue3_%d", i),            IndexField4: fmt.Sprintf("indexValue4_%d", i),            IndexField5: fmt.Sprintf("indexValue5_%d", i),            IndexField6: fmt.Sprintf("indexValue6_%d", i),            IndexField7: fmt.Sprintf("indexValue7_%d", i),            IndexField8: fmt.Sprintf("indexValue8_%d", i),            IndexField9: fmt.Sprintf("indexValue9_%d", i),        }        if i < int(float32(numRecords)*0.5) {            record.IndexField10 = 0        } else if i >= int(float32(numRecords)*0.5) && i < int(float32(numRecords)*0.6) {            record.IndexField10 = 1        } else if i >= int(float32(numRecords)*0.6) && i < int(float32(numRecords)*0.7) {            record.IndexField10 = 2        } else if i >= int(float32(numRecords)*0.7) && i < int(float32(numRecords)*0.8) {            record.IndexField10 = 3        } else if i >= int(float32(numRecords)*0.8) && i < int(float32(numRecords)*0.9) {            record.IndexField10 = 4        } else {            record.IndexField10 = int64(i)        }...} 

      then execute command with skip and without skip, it hit the same plan cache and not update

      // code placeholder
      rs:PRIMARY> use test
      rs:PRIMARY> db.data.getPlanCache().clear() 
      rs:PRIMARY> db.data.find({"indexField10" : NumberLong(0)}, {"_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10": 1}).sort({"_id": 1}).skip(49000).limit(1000)
      rs:PRIMARY> db.data.find({"indexField10" : NumberLong(0)}, {"_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10": 1}).sort({"_id": 1}).skip(49000).limit(1000)
      rs:PRIMARY> db.data.aggregate([ {$planCacheStats: {}}, { $match: { planCacheKey: "B23FE125" } } ])
      // the cache plan is active
      rs:PRIMARY> db.data.find({"indexField10" : NumberLong(1)}, {"_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10": 1}).sort({"_id": 1}).skip(9000).limit(1000)
      rs:PRIMARY> db.data.find({"indexField10" : NumberLong(1)}, {"_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10": 1}).sort({"_id": 1}).limit(1000)
      rs:PRIMARY> db.data.find({"indexField10" : NumberLong(93333)}, {"_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10": 1}).sort({"_id": 1}).limit(1000)
      // the "error" cachedplan is still active
      rs:PRIMARY> db.data.aggregate([ {$planCacheStats: {}}, { $match: { planCacheKey: "B23FE125" } } ])

      and the last query became slow query because of the hit cached plan

      // code placeholder
      rs:PRIMARY> db.system.profile.find().sort({$natural: -1}).limit(1)
      { "op" : "query", "ns" : "test.data", "command" : { "find" : "data", "filter" : { "indexField10" : NumberLong(93333) }, "limit" : 1000, "singleBatch" : false, "sort" : { "_id" : 1 }, "projection" : { "_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10" : 1 }, "lsid" : { "id" : UUID("823a752a-2482-4e64-8256-2b45c0ddffb1") }, "$clusterTime" : { "clusterTime" : Timestamp(1752461315, 1), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } }, "$db" : "test" }, "keysExamined" : 100000, "docsExamined" : 100000, "cursorExhausted" : true, "numYield" : 100, "nreturned" : 1, "queryHash" : "2AE01972", "planCacheKey" : "B23FE125", "locks" : { "FeatureCompatibilityVersion" : { "acquireCount" : { "r" : NumberLong(102) } }, "ReplicationStateTransition" : { "acquireCount" : { "w" : NumberLong(102) } }, "Global" : { "acquireCount" : { "r" : NumberLong(102) } }, "Database" : { "acquireCount" : { "r" : NumberLong(101) } }, "Collection" : { "acquireCount" : { "r" : NumberLong(101) } }, "Mutex" : { "acquireCount" : { "r" : NumberLong(1) } } }, "flowControl" : {  }, "storage" : {  }, "responseLength" : 375, "protocol" : "op_msg", "millis" : 179, "planSummary" : "IXSCAN { _id: 1 }", "execStats" : { "stage" : "CACHED_PLAN", "nReturned" : 1, "executionTimeMillisEstimate" : 179, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "inputStage" : { "stage" : "LIMIT", "nReturned" : 1, "executionTimeMillisEstimate" : 27, "works" : 100001, "advanced" : 1, "needTime" : 99999, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "limitAmount" : 1000, "inputStage" : { "stage" : "PROJECTION_SIMPLE", "nReturned" : 1, "executionTimeMillisEstimate" : 27, "works" : 100001, "advanced" : 1, "needTime" : 99999, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "transformBy" : {  }, "inputStage" : { "stage" : "FETCH", "filter" : { "indexField10" : { "$eq" : NumberLong(93333) } }, "nReturned" : 1, "executionTimeMillisEstimate" : 26, "works" : 100001, "advanced" : 1, "needTime" : 99999, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "docsExamined" : 100000, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN", "nReturned" : 100000, "executionTimeMillisEstimate" : 8, "works" : 100001, "advanced" : 100000, "needTime" : 0, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "keyPattern" : { "_id" : 1 }, "indexName" : "_id_", "isMultiKey" : false, "multiKeyPaths" : { "_id" : [ ] }, "isUnique" : true, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "_id" : [ "[MinKey, MaxKey]" ] }, "keysExamined" : 100000, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0 } } } } }, "ts" : ISODate("2025-07-14T02:48:54.422Z"), "client" : "127.0.0.1", "appName" : "MongoDB Shell", "allUsers" : [ ], "user" : "" } 

       

      Show
      Data Model Creation : Create a collection  test.data  and add an index on {{{} { "indexField10" : 1 } {}}}. Insert 100,000 documents with the following structure:   // code placeholder func insertData(collection *mongo.Collection, numRecords  int ) {     for  i := 0; i < numRecords; i++ {        record := Record{            ID:          fmt.Sprintf( "%d" , i),            IndexField1: fmt.Sprintf( "indexValue1_%d" , i),            IndexField2: fmt.Sprintf( "indexValue2_%d" , i),            IndexField3: fmt.Sprintf( "indexValue3_%d" , i),            IndexField4: fmt.Sprintf( "indexValue4_%d" , i),            IndexField5: fmt.Sprintf( "indexValue5_%d" , i),            IndexField6: fmt.Sprintf( "indexValue6_%d" , i),            IndexField7: fmt.Sprintf( "indexValue7_%d" , i),            IndexField8: fmt.Sprintf( "indexValue8_%d" , i),            IndexField9: fmt.Sprintf( "indexValue9_%d" , i),        }         if  i <  int (float32(numRecords)*0.5) {            record.IndexField10 = 0        }  else   if  i >=  int (float32(numRecords)*0.5) && i <  int (float32(numRecords)*0.6) {            record.IndexField10 = 1        }  else   if  i >=  int (float32(numRecords)*0.6) && i <  int (float32(numRecords)*0.7) {            record.IndexField10 = 2        }  else   if  i >=  int (float32(numRecords)*0.7) && i <  int (float32(numRecords)*0.8) {            record.IndexField10 = 3        }  else   if  i >=  int (float32(numRecords)*0.8) && i <  int (float32(numRecords)*0.9) {            record.IndexField10 = 4        }  else  {            record.IndexField10 = int64(i)        }...} then execute command with skip and without skip, it hit the same plan cache and not update // code placeholder rs:PRIMARY> use test rs:PRIMARY> db.data.getPlanCache().clear() rs:PRIMARY> db.data.find({ "indexField10"  : NumberLong(0)}, { "_id"  : 1,  "indexField1"  : 1,  "indexField2"  : 1,  "indexField3"  : 1,  "indexField10" : 1}).sort({ "_id" : 1}).skip(49000).limit(1000) rs:PRIMARY> db.data.find({ "indexField10"  : NumberLong(0)}, { "_id"  : 1,  "indexField1"  : 1,  "indexField2"  : 1,  "indexField3"  : 1,  "indexField10" : 1}).sort({ "_id" : 1}).skip(49000).limit(1000) rs:PRIMARY> db.data.aggregate([ {$planCacheStats: {}}, { $match: { planCacheKey:  "B23FE125"  } } ]) // the cache plan is active rs:PRIMARY> db.data.find({ "indexField10"  : NumberLong(1)}, { "_id"  : 1,  "indexField1"  : 1,  "indexField2"  : 1,  "indexField3"  : 1,  "indexField10" : 1}).sort({ "_id" : 1}).skip(9000).limit(1000) rs:PRIMARY> db.data.find({ "indexField10"  : NumberLong(1)}, { "_id"  : 1,  "indexField1"  : 1,  "indexField2"  : 1,  "indexField3"  : 1,  "indexField10" : 1}).sort({ "_id" : 1}).limit(1000) rs:PRIMARY> db.data.find({ "indexField10"  : NumberLong(93333)}, { "_id"  : 1,  "indexField1"  : 1,  "indexField2"  : 1,  "indexField3"  : 1,  "indexField10" : 1}).sort({ "_id" : 1}).limit(1000) // the "error" cachedplan is still active rs:PRIMARY> db.data.aggregate([ {$planCacheStats: {}}, { $match: { planCacheKey:  "B23FE125"  } } ]) and the last query became slow query because of the hit cached plan // code placeholder rs:PRIMARY> db.system.profile.find().sort({$natural: -1}).limit(1) { "op" : "query" , "ns" : "test.data" , "command" : { "find" : "data" , "filter" : { "indexField10" : NumberLong(93333) }, "limit" : 1000, "singleBatch" : false , "sort" : { "_id" : 1 }, "projection" : { "_id" : 1, "indexField1" : 1, "indexField2" : 1, "indexField3" : 1, "indexField10" : 1 }, "lsid" : { "id" : UUID( "823a752a-2482-4e64-8256-2b45c0ddffb1" ) }, "$clusterTime" : { "clusterTime" : Timestamp(1752461315, 1), "signature" : { "hash" : BinData(0, "AAAAAAAAAAAAAAAAAAAAAAAAAAA=" ), "keyId" : NumberLong(0) } }, "$db" : "test" }, "keysExamined" : 100000, "docsExamined" : 100000, "cursorExhausted" : true , "numYield" : 100, "nreturned" : 1, "queryHash" : "2AE01972" , "planCacheKey" : "B23FE125" , "locks" : { "FeatureCompatibilityVersion" : { "acquireCount" : { "r" : NumberLong(102) } }, "ReplicationStateTransition" : { "acquireCount" : { "w" : NumberLong(102) } }, "Global" : { "acquireCount" : { "r" : NumberLong(102) } }, "Database" : { "acquireCount" : { "r" : NumberLong(101) } }, "Collection" : { "acquireCount" : { "r" : NumberLong(101) } }, "Mutex" : { "acquireCount" : { "r" : NumberLong(1) } } }, "flowControl" : {  }, "storage" : {  }, "responseLength" : 375, "protocol" : "op_msg" , "millis" : 179, "planSummary" : "IXSCAN { _id: 1 }" , "execStats" : { "stage" : "CACHED_PLAN" , "nReturned" : 1, "executionTimeMillisEstimate" : 179, "works" : 2, "advanced" : 1, "needTime" : 0, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "inputStage" : { "stage" : "LIMIT" , "nReturned" : 1, "executionTimeMillisEstimate" : 27, "works" : 100001, "advanced" : 1, "needTime" : 99999, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "limitAmount" : 1000, "inputStage" : { "stage" : "PROJECTION_SIMPLE" , "nReturned" : 1, "executionTimeMillisEstimate" : 27, "works" : 100001, "advanced" : 1, "needTime" : 99999, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "transformBy" : {  }, "inputStage" : { "stage" : "FETCH" , "filter" : { "indexField10" : { "$eq" : NumberLong(93333) } }, "nReturned" : 1, "executionTimeMillisEstimate" : 26, "works" : 100001, "advanced" : 1, "needTime" : 99999, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "docsExamined" : 100000, "alreadyHasObj" : 0, "inputStage" : { "stage" : "IXSCAN" , "nReturned" : 100000, "executionTimeMillisEstimate" : 8, "works" : 100001, "advanced" : 100000, "needTime" : 0, "needYield" : 0, "saveState" : 100, "restoreState" : 100, "isEOF" : 1, "keyPattern" : { "_id" : 1 }, "indexName" : "_id_" , "isMultiKey" : false , "multiKeyPaths" : { "_id" : [ ] }, "isUnique" : true , "isSparse" : false , "isPartial" : false , "indexVersion" : 2, "direction" : "forward" , "indexBounds" : { "_id" : [ "[MinKey, MaxKey]" ] }, "keysExamined" : 100000, "seeks" : 1, "dupsTested" : 0, "dupsDropped" : 0 } } } } }, "ts" : ISODate( "2025-07-14T02:48:54.422Z" ), "client" : "127.0.0.1" , "appName" : "MongoDB Shell" , "allUsers" : [ ], "user" : "" }  
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      The issue we observe is that regardless of whether skip is included or not, both queries generate the same queryHash and planCacheKey. However, the cached plan that incorporates a skip condition could lead to very inefficient query plans when the query value has a high disparity (e.g., {{{}

      {"indexField10": NumberLong(93333)}

      {}}}) without a skip condition, which retrieves only a single document.

      Expected Behavior:

      Both methods of creation can successfully update the query plan cache:

      1. Creating the query plan cache using commands with filtering conditions, projections, skip, and limit, and then continuing to query with these conditions.
      2. Creating the query plan cache using commands with filtering conditions, projections, and limit, and then continuing to query with these conditions.

      So we expect the query planner to intelligently update the query plan in the cache, selecting the most efficient index based on the distinctiveness of the queried value. When querying disparate values, a plan utilizing the index on indexField10 should be employed.

      Thank you for your attention to this matter. I look forward to your response.

            Assignee:
            Kenan Ali
            Reporter:
            stephen yang
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: