Use Case : it is used to group and calculate based on the presence of fields Other Notes: if you look at the internal_rules and the internal_site_exists functions that are defined within the scope being used as an extremely complicated way of determining if you're including a field in an overall summary if (!(internal_exists("Photos", rules[length].Fields))) { dict[mapValues[i].Sites[j].Name].PhotosExists += mapValues[i].Sites[j].PhotosExists; } if the "site" (ex: Google, Bing, Facebook ... etc) has a "rule" for Photos, it'll exclude it from the overall summary so don't count those values when summarizing for those sites we may not need it though Another Example same kind of work we have to do: MapReduce Code: // This was originally the map/reduce script that was refactored to aggregateReviewReportCountBySegment // This example is only "Company" level, but "Country, State, City" level were identical except with additional // params in the emit() phase function (reviewCollection, tempCollection) { if (db.getCollection(reviewCollection).count() == 0) { return; } db.getCollection(reviewCollection).mapReduce( function () { if (!this.UserId) this.UserId = [0]; for (var j = 0; j < this.UserId.length; j++) { emit({ 'UserId': this.UserId[j], 'ReviewEnabled': this.Reports == null ? null : this.Reports.Reviews, 'DateStr': internal_formatDate(this.ReviewTimeMapped) }, { Sites: [{ 'SiteName': this.SiteName, 'ZeroCount': this.Score == 0 ? 1 : 0, 'OneCount': this.Score > 0 && this.Score <= 1 ? 1 : 0, 'TwoCount': this.Score > 1 && this.Score <= 2 ? 1 : 0, 'ThreeCount': this.Score > 2 && this.Score <= 3 ? 1 : 0, 'FourCount': this.Score > 3 && this.Score <= 4 ? 1 : 0, 'FiveCount': this.Score > 4 && this.Score <= 5 ? 1 : 0, }] }); } }, function (mapKey, mapValues) { var reducedDocument = { Sites: [] }; var allSites = {}; for (var i = 0; i < mapValues.length; i++) { for (var j = 0; j < mapValues[i].Sites.length; j++) { if (allSites[mapValues[i].Sites[j].SiteName]) { allSites[mapValues[i].Sites[j].SiteName].ZeroCount += mapValues[i].Sites[j].ZeroCount; allSites[mapValues[i].Sites[j].SiteName].OneCount += mapValues[i].Sites[j].OneCount; allSites[mapValues[i].Sites[j].SiteName].TwoCount += mapValues[i].Sites[j].TwoCount; allSites[mapValues[i].Sites[j].SiteName].ThreeCount += mapValues[i].Sites[j].ThreeCount; allSites[mapValues[i].Sites[j].SiteName].FourCount += mapValues[i].Sites[j].FourCount; allSites[mapValues[i].Sites[j].SiteName].FiveCount += mapValues[i].Sites[j].FiveCount; } else { allSites[mapValues[i].Sites[j].SiteName] = mapValues[i].Sites[j]; } } } for (var field in allSites) { if (allSites[field].ZeroCount == 0 && allSites[field].OneCount == 0 && allSites[field].TwoCount == 0 && allSites[field].ThreeCount == 0 && allSites[field].FourCount == 0 && allSites[field].FiveCount == 0) continue; reducedDocument.Sites.push(allSites[field]); } return reducedDocument; }, { out: tempCollection, query: { Status: 0, InsertVersionId: { $lte: getVersion() } }, sort: { UserId: 1 }, finalize: reviewNPSMapReduceFinalize, scope: { // zero pad value internal_pad: function (n, width, z) { z = z || '0'; n = n + ''; return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n; }, internal_formatDate: function (date) { if (date == null) return null; var days = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']; var months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; return days[date.getUTCDay()] + " " + months[date.getUTCMonth()] + " " + internal_pad(date.getUTCDate(), 2) + " " + date.getUTCFullYear(); } } }) } =========================================================================================================================================== Above code is converted into Pipeline: function(sourceCollectionName, targetCollectionName, segmentOptions) { // NOTE // if you pass in segmentOptions = ["_LocationId"] the pipeline will // change to Location-centric vs. User-centric // XXX: this is an easy way to test for array equality :P var locationCentricPipeline = JSON.stringify(segmentOptions) == JSON.stringify(["_LocationId"]); var coll = db.getCollection(sourceCollectionName); var MATCH_STAGE = { Status: 0, InsertVersionId: { $lte: getVersion() } }; var SORT_STAGE = { UserId: 1 }; var PROJECTION_STAGE_1 = { _id: 0, UserId: "$UserId", SiteName: "$SiteName", Score: "$Score", date: "$ReviewTimeMapped", // FIXME: do we even need this flag anymore? ReviewEnabled: "$Reports.Reviews", "range": { $concat: [ { $cond: [{$and:[ {$gte:["$Score", 0 ]}, {$lt: ["$Score", 1]}]}, "ZeroCount", ""] }, { $cond: [{$and:[ {$gt:["$Score",0]}, {$lte:["$Score", 1]}]}, "OneCount", ""]}, { $cond: [{$and:[ {$gt:["$Score",1]}, {$lte:["$Score", 2]}]}, "TwoCount", ""]}, { $cond: [{$and:[ {$gt:["$Score",2]}, {$lte:["$Score", 3]}]}, "ThreeCount", ""]}, { $cond: [{$and:[ {$gt:["$Score",3]}, {$lte:["$Score", 4]}]}, "FourCount", ""]}, { $cond: [{$and:[ {$gt:["$Score",4]}, {$lte:["$Score", 5]}]}, "FiveCount", ""]}, ] } }; var GROUP_STAGE_1 = { _id : { DateStr: { $dateToString: { date: "$date", format: "%Y-%m-%d" } }, SiteName: "$SiteName", UserId: "$UserId", ReviewEnabled: "$ReviewEnabled" }, ZeroCount: { $sum: { $cond: [{ $eq: ["$range", "ZeroCount" ] }, 1, 0 ] } }, OneCount: { $sum: { $cond: [{ $eq: ["$range", "OneCount" ] }, 1, 0 ] } }, TwoCount: { $sum: { $cond: [{ $eq: ["$range", "TwoCount" ] }, 1, 0 ] } }, ThreeCount: { $sum: { $cond: [{ $eq: ["$range", "ThreeCount" ] }, 1, 0 ] } }, FourCount: { $sum: { $cond: [{ $eq: ["$range", "FourCount" ] }, 1, 0 ] } }, FiveCount: { $sum: { $cond: [{ $eq: ["$range", "FiveCount" ] }, 1, 0 ] } } }; var GROUP_STAGE_2 = { _id : { DateStr: "$_id.DateStr", UserId: "$_id.UserId", ReviewEnabled: "$_id.ReviewEnabled" }, Sites: { $push: { SiteName: "$_id.SiteName", ZeroCount: "$ZeroCount", OneCount: "$OneCount", TwoCount: "$TwoCount", ThreeCount: "$ThreeCount", FourCount: "$FourCount", FiveCount: "$FiveCount", } } }; var PROJECTION_STAGE_2 = { _id: 0, UserId: "$_id.UserId", ReviewEnabled: "$_id.ReviewEnabled", DateStr: "$_id.DateStr", // FIXME: in 3.6 we can use $dateFromString // ReviewDate: { $cond: { if: { $eq: [ true, true] }, then: new Date("$_id.DateStr"), else: 0 } }, Sites: "$Sites" }; // Merge segment details into aggregation stages. // This allows a single script to handle all potential permuations if (segmentOptions.length > 0) { if (locationCentricPipeline) { delete PROJECTION_STAGE_1["UserId"]; delete PROJECTION_STAGE_2["UserId"]; delete GROUP_STAGE_1["_id"]["UserId"]; delete GROUP_STAGE_2["_id"]["UserId"]; delete SORT_STAGE["UserId"]; PROJECTION_STAGE_1["_LocationId"] = "$_LocationId"; PROJECTION_STAGE_2["_LocationId"] = "$_id._LocationId"; GROUP_STAGE_1["_id"]["_LocationId"] = "$_LocationId"; GROUP_STAGE_2["_id"]["_LocationId"] = "$_id._LocationId"; SORT_STAGE["_LocationId"] = 1; } else { // Apply the segment rules to the various stages in the pipeline for (var i = 0; i < segmentOptions.length; i++) { var segment = segmentOptions[i]; // eg: Country: "$Country" PROJECTION_STAGE_1[segment] = "$" + segment; GROUP_STAGE_1["_id"][segment] = "$" + segment; GROUP_STAGE_2["_id"][segment] = "$_id." + segment; PROJECTION_STAGE_2[segment] = "$_id." + segment; } } } var pipeline = []; pipeline.push({ $match: MATCH_STAGE }); pipeline.push({ $sort: SORT_STAGE }); // Emit documents that contain the ZeroCount, OneCount ... summaries of the Scores pipeline.push({ $project: PROJECTION_STAGE_1 }); // Since we need to summarize by user, unwind those records (unless this is a location-centric aggregation) if (!locationCentricPipeline) { pipeline.push({ $unwind: "$UserId" }); } // Group the documents based on Date/User/Site and collect all counts together pipeline.push({ $group: GROUP_STAGE_1 }); // Group again to get all summarized documents by site into a Sites array pipeline.push({ $group: GROUP_STAGE_2 }); // Finally emit a document formatted for consumption pipeline.push({ $project: PROJECTION_STAGE_2 }); pipeline.push({ $out: targetCollectionName }); // STEP 2 - Aggregate for inputs and store EXEC_TIMED("Executing Aggregation", function() { coll.aggregate(pipeline, { allowDiskUse: true}); }); // STEP 3 - Calculate rNPS, Average and decorate documents for the final reports EXEC_TIMED("Updating Aggregated Counts", function() { var monitor = 0; var targetCollection = db.getCollection(targetCollectionName); var bulk = targetCollection.initializeUnorderedBulkOp(); targetCollection.find().addOption(DBQuery.Option.noTimeout).forEach(function (doc) { // update the document with RNPS, Average, ReviewDate and DateStr formatting bulk.find({_id: doc._id}).upsert().replaceOne(updateReviewReportSummaryCountWithAdditionalInfo(doc)); monitor++; if (monitor % 25000 == 0) { bulk.execute(); bulk = targetCollection.initializeUnorderedBulkOp(); } }); bulk.execute(); }); }