In testing a scenario where a large collection is sharded, the collection ends up with too many jumbo chunks, which prevent the chunks from being placed on the proper shard nodes (which are designated using addRangeTag).
- {lname: <string>, fname: <string>, address: <string>, ssn: <string>, zip: <int>}
- Note - doesn't matter if zip is int or string type
- Indexes:
- {ssn: 1}
- {lname: 1, fname: 1}
- {lname: 1, zip: 1}
- {zip: 1}
- Shard key: {zip:1}
- On standalone (or replica set) node add 10 million documents using this basic query:
function pad ( num, size ) { if (num.toString().length >= size) return num.toString(); return ( Math.pow( 10, size ) + Math.floor(num) ).toString().substring( 1 ); } function getRandomInt(digits) { return Math.floor(Math.random()*10000000)%Math.pow(10,digits); } function getSsn() { return pad(getRandomInt(3),3)+"-"+pad(getRandomInt(2),2)+"-"+pad(getRandomInt(4),4); } function getAddr () { return getRandomInt(3)+" "+ streets[Math.floor(Math.random()*10000000)%streets.length]+" "+ streetTypes[Math.floor(Math.random()*10000000)%streetTypes.length]; } function getZip() { return pad(getRandomInt(5),5); } function getFname() { return firstNames[Math.floor(Math.random()*10000000)%firstNames.length]; } function getLname() { return lastNames[Math.floor(Math.random()*10000000)%lastNames.length]; } var cnt = db.users.count(); x=Math.floor(cnt/1000); for (var i=x; i<50000; i++) { for (var j=0; j<1000; j++) { var doc = {ssn: getSsn(), lname: getLname(), fname: getFname(), addr: getAddr(), zip: getZip()}; var bulk = db.users.initializeUnorderedBulkOp(); bulk.insert(doc); var res = bulk.execute(); } print((i+1)*j,tojson(doc)); }
- Add 2 more mongod processes and shard the replica set
sh.addShard("<ip:port>"); sh.addShard("<ip:port>"); sh.addShard("<ip:port>"); sh.enableSharding("test"); sh.shardCollection("test.users", {zip: 1}); db.adminCommand({ movePrimary : "test", to : "shard0000" }); sh.status(); // Shard Tags sh.addShardTag("rs0", "East"); sh.addShardTag("rs1", "Central"); sh.addShardTag("rs2", "West"); sh.addTagRange("test.users", { zip: "00000" }, { zip: "39999" }, "East"); sh.addTagRange("test.users", { zip: "40000" }, { zip: "79999" }, "Central"); sh.addTagRange("test.users", { zip: "80000" }, { zip: "99999" }, "West");
As the chunks are auto-split, most will be marked as jumbo and will not be able to move to their tagged shard. Manual splitting still works on theses chunks.