Currently, setFCV to 4.4 on a shard iterates each collection and for each, builds a vector of orphaned ranges. It then inserts range deletion tasks for the orphaned ranges one at a time.
On clusters with collections with a huge number of chunks, inserting the range deletion tasks takes a long time (e.g., ~15 minutes per shard for a collection with 100k chunks).
We should see if batching the inserts by doing something like this (where the inserts are grouped into batches here) speeds it up.
This script creates a cluster with 100k chunks:
(function() {
'use strict';
// Skip checking orphans because it takes a long time when there are so many chunks. Also we don't
// actually insert any data so there can't be orphans.
TestData.skipCheckOrphans = true;
const dbName = "db1";
const collName = "foo";
const ns = dbName + "." + collName;
const st = new ShardingTest({
shards: 2,
config: 1,
other: {
// How to run this test with a specific binary version.
// mongosOptions: {binVersion: "4.4"},
// shardOptions: {binVersion: "4.4"},
// configOptions: {binVersion: "4.4"},
}
});
jsTest.log("Set FCV to 4.2 since we want to test upgrading the FCV to 4.4");
assert.commandWorked(st.s.adminCommand({ setFeatureCompatibilityVersion: "4.2" }));
jsTest.log("Create a database.");
// enableSharding creates the databases.
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
st.ensurePrimaryShard(dbName, st.shard0.shardName);
jsTest.log("Shard a collection with a huge number of initial chunks");
const NUM_CHUNKS = 100000;
assert.commandWorked(st.s.adminCommand({ shardCollection: ns, key: {x: "hashed"}, numInitialChunks: NUM_CHUNKS }));
assert.gt(st.s.getDB("config").chunks.count(), NUM_CHUNKS - 1);
jsTest.log("Set FCV to 4.4");
assert.commandWorked(st.s.adminCommand({ setFeatureCompatibilityVersion: "4.4" }));
st.stop();
})();
For this script to work, we'll have to disable the limit on numInitialChunks in the server:
diff --git a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
index 68af077751..9f37b7da94 100644
--- a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
@@ -107,7 +107,7 @@ void validateAndDeduceFullRequestOptions(OperationContext* opCtx,
// Cannot have more than 8192 initial chunks per shard. Setting a maximum of 1,000,000
// chunks in total to limit the amount of memory this command consumes so there is less
// danger of an OOM error.
- const int maxNumInitialChunksForShards = numShards * 8192;
+ /*const int maxNumInitialChunksForShards = numShards * 8192;
const int maxNumInitialChunksTotal = 1000 * 1000; // Arbitrary limit to memory consumption
int numChunks = request->getNumInitialChunks();
uassert(ErrorCodes::InvalidOptions,
@@ -116,7 +116,7 @@ void validateAndDeduceFullRequestOptions(OperationContext* opCtx,
<< maxNumInitialChunksTotal,
numChunks >= 0 && numChunks <= maxNumInitialChunksForShards &&
numChunks <= maxNumInitialChunksTotal);
-
+ */
// Retrieve the collection metadata in order to verify that it is legal to shard this
// collection.
BSONObj res;
We also want to distribute the chunks round-robin between the shards so that each shard has many unowned ranges:
diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp
index 1af9ceb57f..dfb054f4fa 100644
--- a/src/mongo/db/s/config/initial_split_policy.cpp
+++ b/src/mongo/db/s/config/initial_split_policy.cpp
@@ -193,7 +193,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::generateShardColle
// shards, and we need to be sure that at least one chunk is placed on the primary shard
const ShardId shardId = (i == 0 && finalSplitPoints.size() + 1 < allShardIds.size())
? databasePrimaryShardId
- : allShardIds[(i / numContiguousChunksPerShard) % allShardIds.size()];
+ : allShardIds[(i / 1 /*numContiguousChunksPerShard*/) % allShardIds.size()]; appendChunk(nss, min, max, &version, validAfter, shardId, &chunks);
}
Here's an example of timing how long a section of code takes:
void myFunc() {
Timer t;
// do something time-consuming
auto micros = t.micros();
LOGV2(123456, "How long it took", "micros"_attr = micros);
}