In testing a scenario where a large collection is sharded, the collection ends up with too many jumbo chunks, which prevent the chunks from being placed on the proper shard nodes (which are designated using addRangeTag).
Schema
- {lname: <string>, fname: <string>, address: <string>, ssn: <string>, zip: <int>}
- Note - doesn't matter if zip is int or string type
- Indexes:
- {ssn: 1}
- {lname: 1, fname: 1}
- {lname: 1, zip: 1}
- {zip: 1}
- Shard key: {zip:1}
Steps
- On standalone (or replica set) node add 10 million documents using this basic query:
function pad ( num, size ) { if (num.toString().length >= size) return num.toString(); return ( Math.pow( 10, size ) + Math.floor(num) ).toString().substring( 1 ); } function getRandomInt(digits) { return Math.floor(Math.random()*10000000)%Math.pow(10,digits); } function getSsn() { return pad(getRandomInt(3),3)+"-"+pad(getRandomInt(2),2)+"-"+pad(getRandomInt(4),4); } function getAddr () { return getRandomInt(3)+" "+ streets[Math.floor(Math.random()*10000000)%streets.length]+" "+ streetTypes[Math.floor(Math.random()*10000000)%streetTypes.length]; } function getZip() { return pad(getRandomInt(5),5); } function getFname() { return firstNames[Math.floor(Math.random()*10000000)%firstNames.length]; } function getLname() { return lastNames[Math.floor(Math.random()*10000000)%lastNames.length]; } var cnt = db.users.count(); x=Math.floor(cnt/1000); for (var i=x; i<50000; i++) { for (var j=0; j<1000; j++) { var doc = {ssn: getSsn(), lname: getLname(), fname: getFname(), addr: getAddr(), zip: getZip()}; var bulk = db.users.initializeUnorderedBulkOp(); bulk.insert(doc); var res = bulk.execute(); } print((i+1)*j,tojson(doc)); }
- Add 2 more mongod processes and shard the replica set
sh.addShard("<ip:port>"); sh.addShard("<ip:port>"); sh.addShard("<ip:port>"); sh.enableSharding("test"); sh.shardCollection("test.users", {zip: 1}); db.adminCommand({ movePrimary : "test", to : "shard0000" }); sh.status(); // Shard Tags sh.addShardTag("rs0", "East"); sh.addShardTag("rs1", "Central"); sh.addShardTag("rs2", "West"); sh.addTagRange("test.users", { zip: "00000" }, { zip: "39999" }, "East"); sh.addTagRange("test.users", { zip: "40000" }, { zip: "79999" }, "Central"); sh.addTagRange("test.users", { zip: "80000" }, { zip: "99999" }, "West");
As the chunks are auto-split, most will be marked as jumbo and will not be able to move to their tagged shard. Manual splitting still works on theses chunks.