[SERVER-18175] Chunk manager references removed shard, if it was tagged Created: 22/Apr/15  Updated: 06/Dec/22  Resolved: 20/Sep/19

Status: Closed
Project: Core Server
Component/s: Sharding
Affects Version/s: 3.0.2, 3.1.0
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Jonathan Abrahams Assignee: [DO NOT USE] Backlog - Sharding Team
Resolution: Duplicate Votes: 1
Labels: 32qa
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Duplicate
duplicates SERVER-32871 ReplicaSetMonitorRemoved and ShardNot... Closed
Related
Assigned Teams:
Sharding
Operating System: ALL
Steps To Reproduce:

3 mongod nodes
1 mongod config server
1 mongos
Connect mongo to mongos

ns="test.users";
// Add shards and move primary
sh.addShard(<ipaddr:port>);
sh.addShard(<ipaddr:port>);
db.adminCommand({movePrimary: "test", to: "shard0000"});
// Add shard tags
sh.addShardTag("shard0000", "East");
sh.addShardTag("shard0001", "West");
sh.enableSharding("test");
// Shard collections users
sh.shardCollection(ns, {a: 1});
// Pre-split in middle
sh.splitAt(ns, {a: 50000});
db.users.ensureIndex({a: 1});
sh.status();
// Populate collection, with shard key a having value 0-99999
function getRandomInt(digits) {
    return Math.floor(Math.random()*10000000)%Math.pow(10,digits);
}
var largeStr = new Array(1000).join('x');
for (var i=0; i<10; i++) {
    for (var j=0; j<1000; j++) {
        var doc = {a: getRandomInt(5), b: largeStr};
        var bulk = db.users.initializeUnorderedBulkOp();
        bulk.insert(doc);
        var res = bulk.execute();
    }
    print((i+1)*j,doc.a);
}
// Add another shard node
sh.addShard(<ipaddr:port>);
// Add shard tag West to shard0002
sh.addShardTag("shard0002", "West");
// Remove shard0001
db.adminCommand({removeShard: "shard0001"});
db.adminCommand({removeShard: "shard0001"});
// Add shard range tags
sh.addTagRange(ns, { a: 0 }, { a: 49999 }, "East");
sh.addTagRange(ns, { a: 50000 }, { a: 99999 }, "West");

Participants:
Case:

 Description   

The chunk manager references a removed shard, when the shards are tagged and range tags are attached. This is observed in the mongos log file:

2015-04-22T13:23:55.858-0400 I SHARDING [Balancer] distributed lock 'balancer/CAPJA-CentOS6-2:27017:1429712957:1804289383' acquired, ts : 5537d92be357234b47f80f7e
2015-04-22T13:23:55.882-0400 I -        [Balancer] Assertion: 13129:can't find shard for: shard0001
2015-04-22T13:23:55.888-0400 I CONTROL  [Balancer]
 0xa0c179 0x9b5811 0x998f8f 0x99903c 0x92cdf6 0x87e693 0x87ee0b 0x8e2287 0x8e3266 0x8645b8 0x8663f0 0x99b844 0xa5c883 0x352c0079d1 0x352bce88fd
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"60C179"},{"b":"400000","o":"5B5811"},{"b":"400000","o":"598F8F"},{"b":"400000","o":"59903C"},{"b":"400000","o":"52CDF6"},{"b":"400000","o":"47E693"},{"b":"400000","o":"47EE0B"},{"b":"400000","o":"4E2287"},{"b":"400000","o":"4E3266"},{"b":"400000","o":"4645B8"},{"b":"400000","o":"4663F0"},{"b":"400000","o":"59B844"},{"b":"400000","o":"65C883"},{"b":"352C000000","o":"79D1"},{"b":"352BC00000","o":"E88FD"}],"processInfo":{ "mongodbVersion" : "3.1.0", "gitVersion" : "7d15cd965ccb3ad684d8ae4e4f09d5b1e9394552", "uname" : { "sysname" : "Linux", "release" : "2.6.32-504.8.1.el6.x86_64", "version" : "#1 SMP Wed Jan 28 21:11:36 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "A62869EDFD2AEB8FF0632ED6752EEC30DE597C05" }, { "b" : "7FFFCC2FF000", "elfType" : 3, "buildId" : "71A96B79419FE40FF1706D59ED06D6DBE37C8E97" }, { "path" : "/usr/lib64/libssl.so.10", "elfType" : 3, "buildId" : "DAF114120DA5C9DBEB1E5A704CE83ACB9B8B7B54" }, { "path" : "/usr/lib64/libcrypto.so.10", "elfType" : 3, "buildId" : "F523EAC46D068A8E0869CF93BCD84B414937993A" }, { "path" : "/lib64/librt.so.1", "elfType" : 3, "buildId" : "583411D8786F86A1D6B8741C502831E6122445A7" }, { "path" : "/lib64/libdl.so.2", "elfType" : 3, "buildId" : "454F8FC6CC6502C6401E5F9E221564D80665D277" }, { "path" : "/usr/lib64/libstdc++.so.6", "elfType" : 3, "buildId" : "F07F2E7CF4BFB393CC9BBE8CDC6463652E14DB07" }, { "path" : "/lib64/libm.so.6", "elfType" : 3, "buildId" : "7D8E9374F4A4EA38A7C1E763F32240EA113E4208" }, { "path" : "/lib64/libgcc_s.so.1", "elfType" : 3, "buildId" : "246C3BAB0AB093AFD59D34C8CBF29E786DE4BE97" }, { "path" : "/lib64/libpthread.so.0", "elfType" : 3, "buildId" : "B8DFF8E53D9F2B80C3C382E83EC17C828B536A39" }, { "path" : "/lib64/libc.so.6", "elfType" : 3, "buildId" : "E4EAB3C200B7D8444FF95AB01F6466924A6A5F5F" }, { "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "6F8E59B70E469F3A924A268911FF8FD0C37E7460" }, { "path" : "/lib64/libgssapi_krb5.so.2", "elfType" : 3, "buildId" : "B7F7FF323B3A4A12310A6285412F01ACE8C74E47" }, { "path" : "/lib64/libkrb5.so.3", "elfType" : 3, "buildId" : "7920917F74AFAD0B8CB197CABBE472AF39D94C34" }, { "path" : "/lib64/libcom_err.so.2", "elfType" : 3, "buildId" : "8CE28F280150E62296240E70ECAC64E4A57AB826" }, { "path" : "/lib64/libk5crypto.so.3", "elfType" : 3, "buildId" : "05733977F4E41652B86070B27A0CFC2C1EA7719D" }, { "path" : "/lib64/libz.so.1", "elfType" : 3, "buildId" : "5FA8E5038EC04A774AF72A9BB62DC86E1049C4D6" }, { "path" : "/lib64/libkrb5support.so.0", "elfType" : 3, "buildId" : "C8D01C2839F6950988CE32B4266A8F89C521ACB0" }, { "path" : "/lib64/libkeyutils.so.1", "elfType" : 3, "buildId" : "AF374BAFB7F5B139A0B431D3F06D82014AFF3251" }, { "path" : "/lib64/libresolv.so.2", "elfType" : 3, "buildId" : "F8B68F301C19BF06AF56B4B06E0A69F89D2C1F8D" }, { "path" : "/lib64/libselinux.so.1", "elfType" : 3, "buildId" : "E6798A06BEE17CF102BBA44FD512FF8B805CEAF1" } ] }}
 mongos(_ZN5mongo15printStackTraceERSo+0x29) [0xa0c179]
 mongos(_ZN5mongo10logContextEPKc+0xE1) [0x9b5811]
 mongos(_ZN5mongo11msgassertedEiPKc+0xAF) [0x998f8f]
 mongos(+0x59903C) [0x99903c]
 mongos(_ZN5mongo5Shard5resetERKSs+0x1C6) [0x92cdf6]
 mongos(_ZN5mongo12ChunkManager5_loadERKSsRSt3mapINS_7BSONObjEN5boost10shared_ptrIKNS_5ChunkEEENS_10BSONObjCmpESaISt4pairIKS4_S9_EEERSt3setINS_5ShardESt4lessISI_ESaISI_EERS3_ISsNS_12ChunkVersionESJ_ISsESaISB_IS1_SO_EEEPKS0_+0x5F3) [0x87e693]
 mongos(_ZN5mongo12ChunkManager18loadExistingRangesERKSsPKS0_+0x16B) [0x87ee0b]
 mongos(_ZN5mongo8DBConfig15getChunkManagerERKSsbb+0x617) [0x8e2287]
 mongos(_ZN5mongo8DBConfig23getChunkManagerIfExistsERKSsbb+0x56) [0x8e3266]
 mongos(_ZN5mongo8Balancer15_doBalanceRoundERNS_12DBClientBaseEPSt6vectorIN5boost10shared_ptrINS_11MigrateInfoEEESaIS7_EE+0x1038) [0x8645b8]
 mongos(_ZN5mongo8Balancer3runEv+0x9F0) [0x8663f0]
 mongos(_ZN5mongo13BackgroundJob7jobBodyEv+0x124) [0x99b844]
 mongos(+0x65C883) [0xa5c883]
 libpthread.so.0(+0x79D1) [0x352c0079d1]
 libc.so.6(clone+0x6D) [0x352bce88fd]
-----  END BACKTRACE  -----
2015-04-22T13:23:55.888-0400 W SHARDING [Balancer] chunk manager not found for test.users :: caused by :: 13129 can't find shard for: shard0001
2015-04-22T13:23:55.888-0400 W SHARDING [Balancer] could not load chunks to balance test.users collection
2015-04-22T13:23:55.921-0400 I SHARDING [Balancer] distributed lock 'balancer/CAPJA-CentOS6-2:27017:1429712957:1804289383' unlocked.



 Comments   
Comment by Rob Reid [ 11/Jun/15 ]

I was able to work around this by restarting the mongos instances.

Generated at Thu Feb 08 03:46:47 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.