[SERVER-37220] MongoDB slow facet query using multiple group by Created: 20/Sep/18  Updated: 20/Sep/18  Resolved: 20/Sep/18

Status: Closed
Project: Core Server
Component/s: Aggregation Framework
Affects Version/s: 4.0.1
Fix Version/s: None

Type: Question Priority: Critical - P2
Reporter: Jitender Kumar Singla Assignee: Nick Brewer
Resolution: Done Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Participants:

 Description   

I am working on a project where we will have almost 5 million documents in a collection. And each document's size will be around 18571 bytes having 120 to 150 fields.

I have to return my response in less than 1 second. And mongo query will perform almost 10 to 15 group by in faceted query on maximum 3,00,000 documents.

This is my first time handling this much of data where I have to return responses in real time.

I have implemented indexes and they reduced response time to 5 to 6 seconds but I still need it in less then 1 second.

*Below is sample query:*

db.sample.aggregation(
"$match":{
"$and":[

{"is_new": <true/false>}

,

{"brand":<some-brand>}

,
{"year":{"$gte":<some-year>,"$lte":<some-year>}},
{"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
]
},
{
"$facet":{
"data":[{
"$project":

{ "_id":"_id", "brand":"$brand_name", "model":"$model_name", <will have almost 20 keys with lookup> }

}],
"count":[{"$group":{"_id":"$_id"}},

{"$count":"vin_count"}

],
"price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
<will have 12-15 group by>
}
}
)

*Below is sample document:*

{
"_id" : "KNDMC5C11J6394584",
"brand_id" : 22,
"brand_name" : "XYZ",
"abc_id" : 1234567890,
"city" : "Gurgaon, IN",
"fluctuation" : 18,
"created_at" : ISODate("2018-08-17T06:08:12.940Z"),
"release_data" : "2018-06-29",
"seller_name" : "Seller name",
"seller_price" : 34890,
"seller_rating" : 4,
"seller_zip" : "12550",
"feature1" : "ABC",
"feature2" : 3300,
"feature3" : "AB",
"expected_price" : -1,
"exterior_color" : "Unknown",
"registered_dealer" : true,
"registered_brand" : "ABC",
"fluctuation_rate" : 20.700000000000003,
"fluctuation_type" : 2,
"fluc_type_name" : "Something",
"has_patents" : false,
"tested_frequency" : 24,
"interior_color" : "---",
"is_certified" : false,
"is_certified_iso" : false,
"is_featured" : false,
"is_new" : true,
"is_certified_bhel" : false,
"location" :

{ "type" : "Point", "coordinates" : [ -24.08180236816406, 31.507198333740234 ] }

,
"max_input" : 8,
"feature4" : 3,
"feature5" : 206,
"feature6" : "Something",
"monthly_payment" : 649,
"msrp" : 34890,
"feature7" : false,
"seller_id" : 123567890,
"product_family_name" : "abc",
"product_id" : 15,
"product_name" : "Something",
"reflection" : "Something",
"fluc_id" : 2312,
"fluc_name" : "something something (abc) ac",
"updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
"product_damage_category" : "None",
"year" : 2018,
"damage_check" : "-",
"team_size" : "-",
"Technology" :

{ "camera_unit" : true }

}

*Below is the explain output*

{
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [

{ "is_new" : true }

,
{
"year" :

{ "$gte" : 2018, "$lte" : 2018 }

},
{
"sp_id" :

{ "$in" : [<list of 40,000 seller ids>] }

}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test_collection.col",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"is_new" :

{ "$eq" : true }

},
{
"year" :

{ "$lte" : 2018 }

},
{
"year" :

{ "$gte" : 2018 }

},
{
"sp_id" :

{ "$in" : [<list of 40,000 seller ids>] }

}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" :

{ "is_new" : 1, "year" : 1, "sp_id" : 1 }

,
"indexName" : "is_new_1_year_1_sp_id_1",
"isMultiKey" : false,
"multiKeyPaths" :

{ "is_new" : [ ], "year" : [ ], "sp_id" : [ ] }

,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" :

{ "is_new" : [ "[true, true]" ], "year" : [ "[2018.0, 2018.0]" ], "sp_id" : [ "[47590.0, 47590.0]", "[48333.0, 48333.0]", "[51333.0, 51333.0]", <range of 40,000 seller_ids> ] }

}
},
"rejectedPlans" : [ ]
}
}
},
{
"$facet" : {
"data" : [
{
"$project" :

{ "_id" : "$_id", "brand_name" : "$brand_name", "feature1" : "$feature1", "feature2" : "$feature2", "feature3" : "$feature3", "feature4" : "$feature4", "feature5" : "$feature5", "feature6" : "$feature6", "feature7" : "$feature7", "feature8" : "$feature8", "feature9" : "$feature9", "feature10" : "$feature10", "feature11" : "$feature11", "feature12" : "$feature12", "feature13" : "$feature13", "feature14" : "$feature14", "feature15" : "$feature15", "feature16" : "$feature16", "feature17" : "$feature17", "feature18" : "$feature18", "feature19" : "$feature19", "feature20" : "$feature20" }

}
],
"count" : [
{
"$group" :

{ "_id" : "$_id" }

},
{
"$group" : {
"_id" :

{ "$const" : null }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$project" :

{ "_id" : false, "count" : true }

}
],
"feature1" : [
{
"$match" : {
"feature1" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature1",
"name" :

{ "$first" : "$feature1" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature2" : [
{
"$match" : {
"feature2" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature2",
"name" :

{ "$first" : "$feature2" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature3" : [
{
"$match" : {
"feature3" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature3",
"name" :

{ "$first" : "$feature3" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature4" : [
{
"$match" : {
"feature4" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature4",
"name" :

{ "$first" : "$feature4" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature5" : [
{
"$match" : {
"feature5" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature5",
"name" :

{ "$first" : "$fuel" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature6" : [
{
"$match" : {
"feature6" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature6",
"name" :

{ "$first" : "$feature6" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature7" : [
{
"$match" : {
"feature7" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature7",
"name" :

{ "$first" : "$feature7" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature8" : [
{
"$match" : {
"feature8" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature8",
"name" :

{ "$first" : "$feature8" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature9" : [
{
"$match" : {
"feature9" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature9",
"name" :

{ "$first" : "$feature9" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "count" : -1 }

}
}
],
"feature10" : [
{
"$match" : {
"feature10" :

{ "$exists" : true }

}
},
{
"$group" : {
"_id" : "$feature10",
"name" :

{ "$first" : "$feature10" }

,
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
},
{
"$sort" : {
"sortKey" :

{ "_id" : -1 }

}
}
],
"feature11" : [
{
"$match" : {
"feature11" :

{ "$exists" : true }

}
},
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
}
}
],
"feature12" : [
{
"$bucketAuto" : {
"groupBy" : "$feature11",
"buckets" : 1,
"output" : {
"count" : {
"$sum" :

{ "$const" : 1 }

}
}
}
}
]
}
}
],
"ok" : 1
}
If this information is not complete for the solution. I will provide more.

I am stuck on this from last 1 month.

Any help would be appreciated.



 Comments   
Comment by Nick Brewer [ 20/Sep/18 ]

jitender.collegedunia I suggest taking a look at the following resources related to query optimization:

Additionally, while it's a bit old, this blog post is an excellent resource for information on optimizing compound indexes.

Please note that SERVER project is for reporting bugs or feature suggestions for the MongoDB server. For MongoDB-related support discussion please post on the mongodb-user group or Stack Overflow with the mongodb tag. A question like this involving more discussion would be best posted on the mongodb-user group.

-Nick

Generated at Thu Feb 08 04:45:22 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.