cluster mapReduce should always call shardCollection for sharded output, since its CatalogCache may be stale

XMLWordPrintableJSON

    • Type: Bug
    • Resolution: Won't Fix
    • Priority: Major - P3
    • None
    • Affects Version/s: 3.6.0-rc3
    • Component/s: Sharding
    • None
    • Sharding
    • ALL
    • Hide
      /**
       * Test that mongos always ensures the sharded output collection of a mapReduce is written to the
       * sharding catalog (even if the mongos was stale and believed the sharded output collection
       * already existed, but the collection had been dropped from another mongos).
       */
      (function() {
          "use strict";
      
          const dbName = "test";
          const inputCollName = "inputColl";
          const outputCollName = "outputColl";
          const outputNs = dbName + "." + outputCollName;
      
          const st = new ShardingTest({mongos: 2, config: 1, shards: 1, rs: {nodes: 1}});
      
          const staleMongos = st.s0;
          const freshMongos = st.s1;
      
          jsTest.log("Insert some data into the input collection");
          var bulk = staleMongos.getDB(dbName).getCollection(inputCollName).initializeUnorderedBulkOp();
          for (let j = 0; j < 100; j++) {
              for (let i = 0; i < 512; i++) {
                  bulk.insert({j: j, i: i});
              }
          }
          assert.writeOK(bulk.execute());
      
          jsTest.log("Use the stale mongos to initially shard the output collection.");
          assert.commandWorked(staleMongos.adminCommand({enableSharding: dbName}));
          assert.commandWorked(staleMongos.adminCommand({shardCollection: outputNs, key: {_id: 1}}));
      
          jsTest.log("Force the stale mongos to refresh its routing table cache for the output coll");
          assert.commandWorked(staleMongos.getDB(dbName).runCommand({find: outputCollName}));
      
          jsTest.log("Drop the sharded output collection from the fresh mongos.");
          assert.commandWorked(freshMongos.getDB(dbName).runCommand({drop: outputCollName}));
      
          jsTest.log("Run a mapReduce with the input coll and sharded output coll from the staleMongos.");
          const map = function() {
              emit(this.i, 1);
          };
          const reduce = function(key, values) {
              return Array.sum(values);
          };
          assert.commandWorked(staleMongos.getDB(dbName).runCommand({
              mapReduce: inputCollName,
              map: map,
              reduce: reduce,
              out: {replace: outputCollName, sharded: true}
          }));
      
          jsTest.log("Check that the output collection was re-sharded by the stale mongos.");
          const resArr =
              staleMongos.getDB("config").getCollection("collections").find({_id: outputNs}).toArray();
          assert.eq(1, resArr.length);
      
          st.stop();
      })();
      
      Show
      /** * Test that mongos always ensures the sharded output collection of a mapReduce is written to the * sharding catalog (even if the mongos was stale and believed the sharded output collection * already existed, but the collection had been dropped from another mongos). */ (function() { "use strict" ; const dbName = "test" ; const inputCollName = "inputColl" ; const outputCollName = "outputColl" ; const outputNs = dbName + "." + outputCollName; const st = new ShardingTest({mongos: 2, config: 1, shards: 1, rs: {nodes: 1}}); const staleMongos = st.s0; const freshMongos = st.s1; jsTest.log( "Insert some data into the input collection" ); var bulk = staleMongos.getDB(dbName).getCollection(inputCollName).initializeUnorderedBulkOp(); for (let j = 0; j < 100; j++) { for (let i = 0; i < 512; i++) { bulk.insert({j: j, i: i}); } } assert .writeOK(bulk.execute()); jsTest.log( "Use the stale mongos to initially shard the output collection." ); assert .commandWorked(staleMongos.adminCommand({enableSharding: dbName})); assert .commandWorked(staleMongos.adminCommand({shardCollection: outputNs, key: {_id: 1}})); jsTest.log( "Force the stale mongos to refresh its routing table cache for the output coll" ); assert .commandWorked(staleMongos.getDB(dbName).runCommand({find: outputCollName})); jsTest.log( "Drop the sharded output collection from the fresh mongos." ); assert .commandWorked(freshMongos.getDB(dbName).runCommand({drop: outputCollName})); jsTest.log( "Run a mapReduce with the input coll and sharded output coll from the staleMongos." ); const map = function() { emit( this .i, 1); }; const reduce = function(key, values) { return Array.sum(values); }; assert .commandWorked(staleMongos.getDB(dbName).runCommand({ mapReduce: inputCollName, map: map, reduce: reduce, out: {replace: outputCollName, sharded: true } })); jsTest.log( "Check that the output collection was re-sharded by the stale mongos." ); const resArr = staleMongos.getDB( "config" ).getCollection( "collections" ).find({_id: outputNs}).toArray(); assert .eq(1, resArr.length); st.stop(); })();
    • None
    • 3
    • None
    • None
    • None
    • None
    • None
    • None

      For mapReduce with sharded output, mongos gets the output collection's entry from its CatalogCache, and if the entry indicates the collection is already sharded, mongos does not call shardCollection.

      Since mongos's cache may be stale (if the collection was dropped from another mongos), the mongos will fail the mapReduce with NamespaceNotFound (see repro script).

      Note: since we are deprecating output to new sharded collections, we probably don't need to do anything to fix this.

            Assignee:
            [DO NOT USE] Backlog - Sharding Team
            Reporter:
            Esha Maharishi (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: