Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-22060

Sharded mapReduce output option {replace: "coll", sharded: true} can create an invalid sharded collection

    XMLWordPrintable

Details

    • Bug
    • Status: Closed
    • Major - P3
    • Resolution: Duplicate
    • None
    • None
    • Sharding
    • None
    • ALL
    • Hide

      (function() {
          'use strict';
       
          var st = new ShardingTest({shards: 2});
          var sdb = st.s.getDB("test");
          var inputColl = sdb.input;
          var outputColl = sdb.output;
       
          assert.commandWorked(sdb.adminCommand({enableSharding: sdb.getName()}));
          sdb.adminCommand({movePrimary: sdb.getName(), to: "shard0000"});
       
          assert.writeOK(inputColl.insert({a: 1, b: 5}));
          assert.writeOK(inputColl.insert({a: 1, b: 6}));
          assert.writeOK(inputColl.insert({a: 2, b: 7}));
          assert.writeOK(inputColl.insert({a: 2, b: 8}));
       
          // Create a sharded collection with a chunk on each shard.
          assert.commandWorked(outputColl.ensureIndex({c: 1}));
          assert.writeOK(outputColl.insert({c: -1}));
          assert.writeOK(outputColl.insert({c: 1}));
          assert.commandWorked(sdb.adminCommand({
              shardCollection: outputColl.getFullName(),
              key: {c: 1}
          }));
          assert.commandWorked(sdb.adminCommand({split: outputColl.getFullName(), middle: {c: 0}}));
          assert.commandWorked(sdb.adminCommand({
              moveChunk: outputColl.getFullName(),
              find: {c: 1},
              to: "shard0001"
          }));
       
          // This creates a collection sharded by {c: 1}, whose documents do not contain the shard key.
          sdb.runCommand({
              mapReduce: inputColl.getName(),
              map: function() {
                  emit(this.a, this.b);
              },
              reduce: function(key, values) {
                  var sum = 0;
                  for (var i = 0; i < values.length; i++) {
                      sum += values[i];
                  }
                  return sum;
              },
              out: {replace: "output", sharded: true}
          });
       
          assert.eq(2, outputColl.find().itcount());
      })();
      

      Show
      (function() { 'use strict';   var st = new ShardingTest({shards: 2}); var sdb = st.s.getDB("test"); var inputColl = sdb.input; var outputColl = sdb.output;   assert.commandWorked(sdb.adminCommand({enableSharding: sdb.getName()})); sdb.adminCommand({movePrimary: sdb.getName(), to: "shard0000"});   assert.writeOK(inputColl.insert({a: 1, b: 5})); assert.writeOK(inputColl.insert({a: 1, b: 6})); assert.writeOK(inputColl.insert({a: 2, b: 7})); assert.writeOK(inputColl.insert({a: 2, b: 8}));   // Create a sharded collection with a chunk on each shard. assert.commandWorked(outputColl.ensureIndex({c: 1})); assert.writeOK(outputColl.insert({c: -1})); assert.writeOK(outputColl.insert({c: 1})); assert.commandWorked(sdb.adminCommand({ shardCollection: outputColl.getFullName(), key: {c: 1} })); assert.commandWorked(sdb.adminCommand({split: outputColl.getFullName(), middle: {c: 0}})); assert.commandWorked(sdb.adminCommand({ moveChunk: outputColl.getFullName(), find: {c: 1}, to: "shard0001" }));   // This creates a collection sharded by {c: 1}, whose documents do not contain the shard key. sdb.runCommand({ mapReduce: inputColl.getName(), map: function() { emit(this.a, this.b); }, reduce: function(key, values) { var sum = 0; for (var i = 0; i < values.length; i++) { sum += values[i]; } return sum; }, out: {replace: "output", sharded: true} });   assert.eq(2, outputColl.find().itcount()); })();

    Description

      Suppose you have a collection "coll" sharded by some non-_id shard key, {shardKey: 1}. Users are allowed to run a mapReduce operation with the output option {replace: "coll", sharded: true}. This means that "coll" should be replaced by the output of the mapReduce, and that the new collection should be sharded.

      New sharded collections created by mapReduce are sharded by {_id: 1}, an assumption which is made in several places in the code. During the replace, however, the collection is never re-sharded by {_id: 1}. Instead, the sharding metadata continues to show that the shard key is {shardKey: 1}.

      The documents in the mapReduce output collection are of the form

      {_id: <key>, value: <value>}
      

      These documents are missing the shard key! The collection at this point is broken and queries against it can return incorrect results. See the repro steps for details.

      Attachments

        Issue Links

          Activity

            People

              backlog-server-sharding Backlog - Sharding Team
              david.storch@mongodb.com David Storch
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: