Show
1) on mongo shell paste following to define a js function to output test documents with n fields
function TestDoc (n) {
var doc={};
doc[ 'lang' ]=[ 'da' , 'de' , 'en' , 'es' , 'fi' , 'fr' , 'hu' , 'it' , 'nl' , 'pt' , 'ro' , 'ru' , 'sv' , 'tr' ][ Math .floor( Math .random() * 13)]
for ( var i = 0;
i < n; i++) { doc[ 'fld_' +i]= Math .random().toString(34).slice(2)}
return doc;
}
2) define a function to insert nDocs documents with nFields to a collection
function InsertTestDocs (colName, nDocs, nFields) {
for ( var i = 0;
i < nNocs; i++) { doc=TestDoc(nFields); doc[ '_id' ]=i;
db[colName].insert(doc)
}
}
3) insert 1000000 test documents
insertTestDocs( "tmp_col" , 1000000, 100)
4) create index on 'lang' field
db.tmp_col.ensureIndex({lang: 1})
5) run a mapReduce job that simply sums distincts on lang field
db.runCommand({ mapreduce: "tmp_col" , map: function () { emit( this .lang, 1); }, reduce: function (key, values) { return Array.sum(values); }, out: { inline: 1 }})
6) you get results of the following form
"timeMillis" : 116705,
"counts" : {
"input" : 1000000,
"emit" : 1000000,
"reduce" : 65000,
"output" : 13
},
"ok" : 1
7) run same map Reduce except specify a sort at this time
db.runCommand({ mapreduce: "tmp_col" , map: function () { emit( this .lang, 1); }, reduce: function (key, values) { return Array.sum(values); }, sort:{lang:1}, out: { inline: 1 }})
7) you get following results
"timeMillis" : 1478708,
"counts" : {
"input" : 1000000,
"emit" : 1000000,
"reduce" : 8474,
"output" : 13
},
"ok" : 1
Notice that now it takes 1478708 instead of 116705 (that is ~10X slower) when run without sort option