Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-1904

map-reduce produces different results for an indexed query

    XMLWordPrintableJSON

Details

    • Icon: Bug Bug
    • Resolution: Done
    • Icon: Major - P3 Major - P3
    • 1.7.3
    • 1.6.2
    • Index Maintenance
    • None
    • Ubuntu 10.0.4 on EC2
    • Minor Change
    • ALL

    Description

      I've been investigating some unusual numbers in my map-reduce results
      and made an interesting discovery. If the map reduce query is on an
      indexed array, and more than one value in the array matches the query,
      the document is mapped more than once. A simple example is below:
      // make sure the collection is empty
      > db.example.drop()
      true
      > db.example.save(

      { arr : [1, 2] }

      )

      // just aggregate by _id
      > map = function()

      { emit(this._id, 1) }

      function () {
      emit(this._id, 1);
      }

      // count the values
      > reduce = function(k,vals)

      { ... var sum=0; ... for(var i in vals) sum += vals[i]; ... return sum; ... }


      function (k, vals) {
      var sum = 0;
      for (var i in vals)

      { sum += vals[i]; }


      return sum;
      }

      // the first M/R finds the document once & produces the correct count
      > res = db.example.mapReduce(map,reduce, { query : {} })

      {
      "result" : "tmp.mr.mapreduce_1286336126_70",
      "timeMillis" : 16,
      "counts" :

      { "input" : 1, "emit" : 1, "output" : 1 }

      ,
      "ok" : 1,
      }
      > db[res.result].find()

      { "_id" : ObjectId("4cabee4ac0f7095167a5ab62"), "value" : 1 }

      // the second query matches the array without an index, and still
      produces expected results
      > res = db.example.mapReduce(map,reduce, { query : { arr: {$gte:0} } })

      {
      "result" : "tmp.mr.mapreduce_1286336141_71",
      "timeMillis" : 12,
      "counts" :

      { "input" : 1, "emit" : 1, "output" : 1 }

      ,
      "ok" : 1,
      }
      > db[res.result].find()

      { "_id" : ObjectId("4cabee4ac0f7095167a5ab62"), "value" : 1 }

      // now index on the array and run the exact same M/R - note that it
      now has 2 inputs & 2 emits, and the count has doubled
      > db.example.ensureIndex(

      {arr:1}

      )
      > res = db.example.mapReduce(map,reduce, { query : { arr: {$gte:0} } })

      {
      "result" : "tmp.mr.mapreduce_1286336171_72",
      "timeMillis" : 15,
      "counts" :

      { "input" : 2, "emit" : 2, "output" : 1 }

      ,
      "ok" : 1,
      }
      > db[res.result].find()

      { "_id" : ObjectId("4cabee4ac0f7095167a5ab62"), "value" : 2 }

      This seems bad - is this expected behavior?

      Attachments

        Activity

          People

            eliot Eliot Horowitz (Inactive)
            iragsdale Ian Ragsdale
            Votes:
            1 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: