Show
1) on mongo shell paste following to define a js function to output test documents with n fields
function TestDoc (n) {
var doc={};
doc['lang']=['da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'nl', 'pt', 'ro', 'ru', 'sv', 'tr'][Math.floor(Math.random() * 13)]
for (var i = 0;
i < n; i++) { doc['fld_'+i]=Math.random().toString(34).slice(2)}
return doc;
}
2) define a function to insert nDocs documents with nFields to a collection
function InsertTestDocs (colName, nDocs, nFields) {
for (var i = 0;
i < nNocs; i++) { doc=TestDoc(nFields); doc['_id']=i;
db[colName].insert(doc)
}
}
3) insert 1000000 test documents
insertTestDocs("tmp_col", 1000000, 100)
4) create index on 'lang' field
db.tmp_col.ensureIndex({lang: 1})
5) run a mapReduce job that simply sums distincts on lang field
db.runCommand({ mapreduce: "tmp_col", map: function () { emit(this.lang, 1); }, reduce: function (key, values) { return Array.sum(values); }, out: { inline: 1 }})
6) you get results of the following form
"timeMillis" : 116705,
"counts" : {
"input" : 1000000,
"emit" : 1000000,
"reduce" : 65000,
"output" : 13
},
"ok" : 1
7) run same map Reduce except specify a sort at this time
db.runCommand({ mapreduce: "tmp_col", map: function () { emit(this.lang, 1); }, reduce: function (key, values) { return Array.sum(values); }, sort:{lang:1}, out: { inline: 1 }})
7) you get following results
"timeMillis" : 1478708,
"counts" : {
"input" : 1000000,
"emit" : 1000000,
"reduce" : 8474,
"output" : 13
},
"ok" : 1
Notice that now it takes 1478708 instead of 116705 (that is ~10X slower) when run without sort option