-
Type: Bug
-
Resolution: Done
-
Priority: Major - P3
-
Affects Version/s: 1.6.2
-
Component/s: Index Maintenance
-
None
-
Environment:Ubuntu 10.0.4 on EC2
-
Minor Change
-
ALL
I've been investigating some unusual numbers in my map-reduce results
and made an interesting discovery. If the map reduce query is on an
indexed array, and more than one value in the array matches the query,
the document is mapped more than once. A simple example is below:
// make sure the collection is empty
> db.example.drop()
true
> db.example.save(
)
// just aggregate by _id
> map = function()
function () {
emit(this._id, 1);
}
// count the values
> reduce = function(k,vals)
function (k, vals) {
var sum = 0;
for (var i in vals)
return sum;
}
// the first M/R finds the document once & produces the correct count
> res = db.example.mapReduce(map,reduce, { query : {} })
{
"result" : "tmp.mr.mapreduce_1286336126_70",
"timeMillis" : 16,
"counts" :
,
"ok" : 1,
}
> db[res.result].find()
// the second query matches the array without an index, and still
produces expected results
> res = db.example.mapReduce(map,reduce, { query : { arr: {$gte:0} } })
{
"result" : "tmp.mr.mapreduce_1286336141_71",
"timeMillis" : 12,
"counts" :
,
"ok" : 1,
}
> db[res.result].find()
// now index on the array and run the exact same M/R - note that it
now has 2 inputs & 2 emits, and the count has doubled
> db.example.ensureIndex(
)
> res = db.example.mapReduce(map,reduce, { query : { arr: {$gte:0} } })
{
"result" : "tmp.mr.mapreduce_1286336171_72",
"timeMillis" : 15,
"counts" :
,
"ok" : 1,
}
> db[res.result].find()
This seems bad - is this expected behavior?