Details
Description
Currently, setFCV to 4.4 on a shard iterates each collection and for each, builds a vector of orphaned ranges. It then inserts range deletion tasks for the orphaned ranges one at a time.
On clusters with collections with a huge number of chunks, inserting the range deletion tasks takes a long time (e.g., ~15 minutes per shard for a collection with 100k chunks).
We should see if batching the inserts by doing something like this (where the inserts are grouped into batches here) speeds it up.
This script creates a cluster with 100k chunks:
(function() {
|
'use strict';
|
|
// Skip checking orphans because it takes a long time when there are so many chunks. Also we don't
|
// actually insert any data so there can't be orphans.
|
TestData.skipCheckOrphans = true;
|
|
const dbName = "db1";
|
const collName = "foo";
|
const ns = dbName + "." + collName;
|
|
|
const st = new ShardingTest({
|
shards: 2,
|
config: 1,
|
other: {
|
// How to run this test with a specific binary version.
|
// mongosOptions: {binVersion: "4.4"},
|
// shardOptions: {binVersion: "4.4"},
|
// configOptions: {binVersion: "4.4"},
|
}
|
});
|
|
jsTest.log("Set FCV to 4.2 since we want to test upgrading the FCV to 4.4");
|
assert.commandWorked(st.s.adminCommand({ setFeatureCompatibilityVersion: "4.2" }));
|
|
jsTest.log("Create a database.");
|
// enableSharding creates the databases.
|
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
|
st.ensurePrimaryShard(dbName, st.shard0.shardName);
|
|
jsTest.log("Shard a collection with a huge number of initial chunks");
|
const NUM_CHUNKS = 100000;
|
assert.commandWorked(st.s.adminCommand({ shardCollection: ns, key: {x: "hashed"}, numInitialChunks: NUM_CHUNKS }));
|
assert.gt(st.s.getDB("config").chunks.count(), NUM_CHUNKS - 1);
|
|
jsTest.log("Set FCV to 4.4");
|
assert.commandWorked(st.s.adminCommand({ setFeatureCompatibilityVersion: "4.4" }));
|
|
st.stop();
|
})();
|
For this script to work, we'll have to disable the limit on numInitialChunks in the server:
diff --git a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
|
index 68af077751..9f37b7da94 100644
|
--- a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
|
+++ b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
|
@@ -107,7 +107,7 @@ void validateAndDeduceFullRequestOptions(OperationContext* opCtx,
|
// Cannot have more than 8192 initial chunks per shard. Setting a maximum of 1,000,000
|
// chunks in total to limit the amount of memory this command consumes so there is less
|
// danger of an OOM error.
|
- const int maxNumInitialChunksForShards = numShards * 8192;
|
+ /*const int maxNumInitialChunksForShards = numShards * 8192;
|
const int maxNumInitialChunksTotal = 1000 * 1000; // Arbitrary limit to memory consumption
|
int numChunks = request->getNumInitialChunks();
|
uassert(ErrorCodes::InvalidOptions,
|
@@ -116,7 +116,7 @@ void validateAndDeduceFullRequestOptions(OperationContext* opCtx,
|
<< maxNumInitialChunksTotal,
|
numChunks >= 0 && numChunks <= maxNumInitialChunksForShards &&
|
numChunks <= maxNumInitialChunksTotal);
|
-
|
+ */
|
// Retrieve the collection metadata in order to verify that it is legal to shard this
|
// collection.
|
BSONObj res;
|
We also want to distribute the chunks round-robin between the shards so that each shard has many unowned ranges:
diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp
|
index 1af9ceb57f..dfb054f4fa 100644
|
--- a/src/mongo/db/s/config/initial_split_policy.cpp
|
+++ b/src/mongo/db/s/config/initial_split_policy.cpp
|
@@ -193,7 +193,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::generateShardColle
|
// shards, and we need to be sure that at least one chunk is placed on the primary shard
|
const ShardId shardId = (i == 0 && finalSplitPoints.size() + 1 < allShardIds.size())
|
? databasePrimaryShardId
|
- : allShardIds[(i / numContiguousChunksPerShard) % allShardIds.size()];
|
+ : allShardIds[(i / 1 /*numContiguousChunksPerShard*/) % allShardIds.size()]; appendChunk(nss, min, max, &version, validAfter, shardId, &chunks);
|
}
|
Here's an example of timing how long a section of code takes:
void myFunc() {
|
Timer t;
|
|
// do something time-consuming
|
|
auto micros = t.micros();
|
|
LOGV2(123456, "How long it took", "micros"_attr = micros);
|
}
|