|
Here's another example. Suppose I want to aggregate the sales & the profit from the following schema:
[
|
{
|
_id : 1,
|
region: "north",
|
finance: [
|
{
|
"k": "sales",
|
"v": {
|
"description":"sales over the year",
|
v: 234000,
|
}
|
},
|
{
|
"k": "profit",
|
"v": {
|
"description":"money made from sales",
|
v: 123123,
|
}
|
}
|
]
|
}
|
,
|
{
|
_id : 2,
|
region: "west",
|
finance: [
|
{
|
"k": "sales",
|
"v": {
|
"description":"sales over the year",
|
v: 234000,
|
}
|
},
|
{
|
"k": "profit",
|
"v": {
|
"description": "money made from sales",
|
v: 123123,
|
}
|
}
|
]
|
}
|
]
|
|
The code needs to do the following:
- Iterate over the 'finance' array, find the sales and profit entries, and save them for aggregation
- Count how many entries there are for each region
- Sum up the sales and profit for each region
map = function() {
|
// this will become the second parameter to 'emit()'
|
var ret = { sales: 0.0 , profit: 0.0, count: 1 };
|
|
// iterate over 'finance[]' array
|
this.finance.forEach( function (i) {
|
// save the matching values
|
if ( i.k == "sales" ) ret.sales = i.v.v ;
|
if ( i.k == "profit" ) ret.profit = i.v.v ;
|
} );
|
|
emit( this.region, ret );
|
}
|
|
reduce = function(key, values) {
|
// This structure exactly matches the second parameter to 'emit()'
|
var ret = { sales: 0.0 , profit: 0.0, count: 0 };
|
|
//
|
// Iterate over the entries in 'values[]'
|
// Each one matches the structure passed to the second parameter to 'emit()'
|
//
|
values.forEach(function(v) {
|
// Accumulate the sums
|
ret.sales += v.sales;
|
ret.profit += v.profit;
|
ret.count += v.count;
|
});
|
|
// the structure returned exactly matches the structure passed
|
// as the second parameter to 'emit()'
|
return ret;
|
};
|
//
|
// run map/reduce
|
//
|
res = SOURCE.mapReduce( map, reduce );
|
|
|
It is EXTREMELY important to note the following:
- The second parameter to the 'reduce()' function ('values') is an array of some object type
- The return value of the 'reduce()' function must be the EXACT same type as a SINGLE element of the 'values' array'
- The 'reduce()' function must be coded to iterate over an array of objects in the 'values' array
- The second parameter to the 'emit()' function must be the EXACT same type as the return value of the 'reduce()'
- The first parameter to the 'emit()' function becomes the unique key for the 'reduce()' function. All documents output with the same unique key will eventually be reduced down to a single output document.
- The first parameter to the 'emit()' function can be an arbitrary complex document
Here is a template for a map/reduce job in JavaScript. It counts page views per URL per day:
map = function() {
|
// Note that this page was seen on this day:
|
emit({ url: this.url, day: this.date.getDay() }, {count: 1});
|
}
|
|
reduce = function(key, values) {
|
var count = 0;
|
// iterate over the 'values[]' array
|
values.forEach(function(v) {
|
// increment the local count for each document seen
|
count += v['count'];
|
});
|
// return value matches the structure of the 'values[]' array and the second parameter to 'emit()'
|
return {count: count};
|
};
|
|
//
|
// Run the map/reduce job
|
//
|
res = db.source_collection.mapReduce( map, reduce,
|
{ out: "dest_collection",
|
verbose: true
|
}
|
);
|
|
|
|