Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-18175

Chunk manager references removed shard, if it was tagged

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Duplicate
    • Affects Version/s: 3.0.2, 3.1.0
    • Fix Version/s: None
    • Component/s: Sharding
    • Labels:
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

      3 mongod nodes
      1 mongod config server
      1 mongos
      Connect mongo to mongos

      ns="test.users";
      // Add shards and move primary
      sh.addShard(<ipaddr:port>);
      sh.addShard(<ipaddr:port>);
      db.adminCommand({movePrimary: "test", to: "shard0000"});
      // Add shard tags
      sh.addShardTag("shard0000", "East");
      sh.addShardTag("shard0001", "West");
      sh.enableSharding("test");
      // Shard collections users
      sh.shardCollection(ns, {a: 1});
      // Pre-split in middle
      sh.splitAt(ns, {a: 50000});
      db.users.ensureIndex({a: 1});
      sh.status();
      // Populate collection, with shard key a having value 0-99999
      function getRandomInt(digits) {
          return Math.floor(Math.random()*10000000)%Math.pow(10,digits);
      }
      var largeStr = new Array(1000).join('x');
      for (var i=0; i<10; i++) {
          for (var j=0; j<1000; j++) {
              var doc = {a: getRandomInt(5), b: largeStr};
              var bulk = db.users.initializeUnorderedBulkOp();
              bulk.insert(doc);
              var res = bulk.execute();
          }
          print((i+1)*j,doc.a);
      }
      // Add another shard node
      sh.addShard(<ipaddr:port>);
      // Add shard tag West to shard0002
      sh.addShardTag("shard0002", "West");
      // Remove shard0001
      db.adminCommand({removeShard: "shard0001"});
      db.adminCommand({removeShard: "shard0001"});
      // Add shard range tags
      sh.addTagRange(ns, { a: 0 }, { a: 49999 }, "East");
      sh.addTagRange(ns, { a: 50000 }, { a: 99999 }, "West");
      

      Show
      3 mongod nodes 1 mongod config server 1 mongos Connect mongo to mongos ns="test.users"; // Add shards and move primary sh.addShard(<ipaddr:port>); sh.addShard(<ipaddr:port>); db.adminCommand({movePrimary: "test", to: "shard0000"}); // Add shard tags sh.addShardTag("shard0000", "East"); sh.addShardTag("shard0001", "West"); sh.enableSharding("test"); // Shard collections users sh.shardCollection(ns, {a: 1}); // Pre-split in middle sh.splitAt(ns, {a: 50000}); db.users.ensureIndex({a: 1}); sh.status(); // Populate collection, with shard key a having value 0-99999 function getRandomInt(digits) { return Math.floor(Math.random()*10000000)%Math.pow(10,digits); } var largeStr = new Array(1000).join('x'); for (var i=0; i<10; i++) { for (var j=0; j<1000; j++) { var doc = {a: getRandomInt(5), b: largeStr}; var bulk = db.users.initializeUnorderedBulkOp(); bulk.insert(doc); var res = bulk.execute(); } print((i+1)*j,doc.a); } // Add another shard node sh.addShard(<ipaddr:port>); // Add shard tag West to shard0002 sh.addShardTag("shard0002", "West"); // Remove shard0001 db.adminCommand({removeShard: "shard0001"}); db.adminCommand({removeShard: "shard0001"}); // Add shard range tags sh.addTagRange(ns, { a: 0 }, { a: 49999 }, "East"); sh.addTagRange(ns, { a: 50000 }, { a: 99999 }, "West");
    • Case:

      Description

      The chunk manager references a removed shard, when the shards are tagged and range tags are attached. This is observed in the mongos log file:

      2015-04-22T13:23:55.858-0400 I SHARDING [Balancer] distributed lock 'balancer/CAPJA-CentOS6-2:27017:1429712957:1804289383' acquired, ts : 5537d92be357234b47f80f7e
      2015-04-22T13:23:55.882-0400 I -        [Balancer] Assertion: 13129:can't find shard for: shard0001
      2015-04-22T13:23:55.888-0400 I CONTROL  [Balancer]
       0xa0c179 0x9b5811 0x998f8f 0x99903c 0x92cdf6 0x87e693 0x87ee0b 0x8e2287 0x8e3266 0x8645b8 0x8663f0 0x99b844 0xa5c883 0x352c0079d1 0x352bce88fd
      ----- BEGIN BACKTRACE -----
      {"backtrace":[{"b":"400000","o":"60C179"},{"b":"400000","o":"5B5811"},{"b":"400000","o":"598F8F"},{"b":"400000","o":"59903C"},{"b":"400000","o":"52CDF6"},{"b":"400000","o":"47E693"},{"b":"400000","o":"47EE0B"},{"b":"400000","o":"4E2287"},{"b":"400000","o":"4E3266"},{"b":"400000","o":"4645B8"},{"b":"400000","o":"4663F0"},{"b":"400000","o":"59B844"},{"b":"400000","o":"65C883"},{"b":"352C000000","o":"79D1"},{"b":"352BC00000","o":"E88FD"}],"processInfo":{ "mongodbVersion" : "3.1.0", "gitVersion" : "7d15cd965ccb3ad684d8ae4e4f09d5b1e9394552", "uname" : { "sysname" : "Linux", "release" : "2.6.32-504.8.1.el6.x86_64", "version" : "#1 SMP Wed Jan 28 21:11:36 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "A62869EDFD2AEB8FF0632ED6752EEC30DE597C05" }, { "b" : "7FFFCC2FF000", "elfType" : 3, "buildId" : "71A96B79419FE40FF1706D59ED06D6DBE37C8E97" }, { "path" : "/usr/lib64/libssl.so.10", "elfType" : 3, "buildId" : "DAF114120DA5C9DBEB1E5A704CE83ACB9B8B7B54" }, { "path" : "/usr/lib64/libcrypto.so.10", "elfType" : 3, "buildId" : "F523EAC46D068A8E0869CF93BCD84B414937993A" }, { "path" : "/lib64/librt.so.1", "elfType" : 3, "buildId" : "583411D8786F86A1D6B8741C502831E6122445A7" }, { "path" : "/lib64/libdl.so.2", "elfType" : 3, "buildId" : "454F8FC6CC6502C6401E5F9E221564D80665D277" }, { "path" : "/usr/lib64/libstdc++.so.6", "elfType" : 3, "buildId" : "F07F2E7CF4BFB393CC9BBE8CDC6463652E14DB07" }, { "path" : "/lib64/libm.so.6", "elfType" : 3, "buildId" : "7D8E9374F4A4EA38A7C1E763F32240EA113E4208" }, { "path" : "/lib64/libgcc_s.so.1", "elfType" : 3, "buildId" : "246C3BAB0AB093AFD59D34C8CBF29E786DE4BE97" }, { "path" : "/lib64/libpthread.so.0", "elfType" : 3, "buildId" : "B8DFF8E53D9F2B80C3C382E83EC17C828B536A39" }, { "path" : "/lib64/libc.so.6", "elfType" : 3, "buildId" : "E4EAB3C200B7D8444FF95AB01F6466924A6A5F5F" }, { "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "6F8E59B70E469F3A924A268911FF8FD0C37E7460" }, { "path" : "/lib64/libgssapi_krb5.so.2", "elfType" : 3, "buildId" : "B7F7FF323B3A4A12310A6285412F01ACE8C74E47" }, { "path" : "/lib64/libkrb5.so.3", "elfType" : 3, "buildId" : "7920917F74AFAD0B8CB197CABBE472AF39D94C34" }, { "path" : "/lib64/libcom_err.so.2", "elfType" : 3, "buildId" : "8CE28F280150E62296240E70ECAC64E4A57AB826" }, { "path" : "/lib64/libk5crypto.so.3", "elfType" : 3, "buildId" : "05733977F4E41652B86070B27A0CFC2C1EA7719D" }, { "path" : "/lib64/libz.so.1", "elfType" : 3, "buildId" : "5FA8E5038EC04A774AF72A9BB62DC86E1049C4D6" }, { "path" : "/lib64/libkrb5support.so.0", "elfType" : 3, "buildId" : "C8D01C2839F6950988CE32B4266A8F89C521ACB0" }, { "path" : "/lib64/libkeyutils.so.1", "elfType" : 3, "buildId" : "AF374BAFB7F5B139A0B431D3F06D82014AFF3251" }, { "path" : "/lib64/libresolv.so.2", "elfType" : 3, "buildId" : "F8B68F301C19BF06AF56B4B06E0A69F89D2C1F8D" }, { "path" : "/lib64/libselinux.so.1", "elfType" : 3, "buildId" : "E6798A06BEE17CF102BBA44FD512FF8B805CEAF1" } ] }}
       mongos(_ZN5mongo15printStackTraceERSo+0x29) [0xa0c179]
       mongos(_ZN5mongo10logContextEPKc+0xE1) [0x9b5811]
       mongos(_ZN5mongo11msgassertedEiPKc+0xAF) [0x998f8f]
       mongos(+0x59903C) [0x99903c]
       mongos(_ZN5mongo5Shard5resetERKSs+0x1C6) [0x92cdf6]
       mongos(_ZN5mongo12ChunkManager5_loadERKSsRSt3mapINS_7BSONObjEN5boost10shared_ptrIKNS_5ChunkEEENS_10BSONObjCmpESaISt4pairIKS4_S9_EEERSt3setINS_5ShardESt4lessISI_ESaISI_EERS3_ISsNS_12ChunkVersionESJ_ISsESaISB_IS1_SO_EEEPKS0_+0x5F3) [0x87e693]
       mongos(_ZN5mongo12ChunkManager18loadExistingRangesERKSsPKS0_+0x16B) [0x87ee0b]
       mongos(_ZN5mongo8DBConfig15getChunkManagerERKSsbb+0x617) [0x8e2287]
       mongos(_ZN5mongo8DBConfig23getChunkManagerIfExistsERKSsbb+0x56) [0x8e3266]
       mongos(_ZN5mongo8Balancer15_doBalanceRoundERNS_12DBClientBaseEPSt6vectorIN5boost10shared_ptrINS_11MigrateInfoEEESaIS7_EE+0x1038) [0x8645b8]
       mongos(_ZN5mongo8Balancer3runEv+0x9F0) [0x8663f0]
       mongos(_ZN5mongo13BackgroundJob7jobBodyEv+0x124) [0x99b844]
       mongos(+0x65C883) [0xa5c883]
       libpthread.so.0(+0x79D1) [0x352c0079d1]
       libc.so.6(clone+0x6D) [0x352bce88fd]
      -----  END BACKTRACE  -----
      2015-04-22T13:23:55.888-0400 W SHARDING [Balancer] chunk manager not found for test.users :: caused by :: 13129 can't find shard for: shard0001
      2015-04-22T13:23:55.888-0400 W SHARDING [Balancer] could not load chunks to balance test.users collection
      2015-04-22T13:23:55.921-0400 I SHARDING [Balancer] distributed lock 'balancer/CAPJA-CentOS6-2:27017:1429712957:1804289383' unlocked.
      

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              backlog-server-sharding Backlog - Sharding Team
              Reporter:
              jonathan.abrahams Jonathan Abrahams
              Participants:
              Votes:
              1 Vote for this issue
              Watchers:
              10 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: