<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 06:19:32 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-71627] Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks</title>
                <link>https://jira.mongodb.org/browse/SERVER-71627</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;Refreshing routing info happens under a lot of circumstances on mongos &amp;amp; mongod, e.g. splitting &amp;amp; moving chunks &amp;amp; shard version Check(when routing requests for read/write queries), etc. Efficiency of refreshing is crucial to MongoDB sharded cluster&#8217;s core functionalities.&lt;br/&gt;
In production clusters, chunk number grows rapidly with data keeps flowing in, resulting longer refreshing duration, all client requests are blocked. Although the sql of client requests is simple and the system load (CPU, MEM, IO) is low, client request jitter time has high latency during the route refreshing. For example, a cluster with 1 million chunks, it&#8217;d take seconds to do the refresh, severely blocking all client queries. &lt;/p&gt;</description>
                <environment></environment>
        <key id="2195305">SERVER-71627</key>
            <summary>Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks</summary>
                <type id="4" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14710&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="tommaso.tocci@mongodb.com">Tommaso Tocci</assignee>
                                    <reporter username="1147952115@qq.com">y yz</reporter>
                        <labels>
                            <label>balancer-round-perf</label>
                            <label>chunkmap-improvements</label>
                    </labels>
                <created>Sat, 26 Nov 2022 08:11:53 +0000</created>
                <updated>Mon, 18 Dec 2023 09:11:33 +0000</updated>
                            <resolved>Tue, 11 Jul 2023 20:27:49 +0000</resolved>
                                                    <fixVersion>7.1.0-rc0</fixVersion>
                    <fixVersion>4.2.25</fixVersion>
                    <fixVersion>7.0.1</fixVersion>
                    <fixVersion>6.0.10</fixVersion>
                    <fixVersion>5.0.21</fixVersion>
                    <fixVersion>4.4.25</fixVersion>
                                                        <votes>1</votes>
                                    <watches>38</watches>
                                                                                                                <comments>
                            <comment id="5646750" author="xgen-internal-githook" created="Mon, 21 Aug 2023 14:02:21 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Tommaso Tocci&apos;, &apos;email&apos;: &apos;tommaso.tocci@mongodb.com&apos;, &apos;username&apos;: &apos;toto-dev&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-71627&quot; title=&quot;Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-71627&quot;&gt;&lt;del&gt;SERVER-71627&lt;/del&gt;&lt;/a&gt; Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&lt;br/&gt;
Branch: v6.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/00ae536924d887841bc5decd691ed46c92b1f7a6&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/00ae536924d887841bc5decd691ed46c92b1f7a6&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5646644" author="xgen-internal-githook" created="Mon, 21 Aug 2023 13:34:47 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Tommaso Tocci&apos;, &apos;email&apos;: &apos;tommaso.tocci@mongodb.com&apos;, &apos;username&apos;: &apos;toto-dev&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-71627&quot; title=&quot;Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-71627&quot;&gt;&lt;del&gt;SERVER-71627&lt;/del&gt;&lt;/a&gt; Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&lt;br/&gt;
Branch: v4.4&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/24c9b7498f68eef681d6154bbbda73d3d4f6d9b9&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/24c9b7498f68eef681d6154bbbda73d3d4f6d9b9&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5646247" author="xgen-internal-githook" created="Mon, 21 Aug 2023 10:35:47 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Tommaso Tocci&apos;, &apos;email&apos;: &apos;tommaso.tocci@mongodb.com&apos;, &apos;username&apos;: &apos;toto-dev&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-71627&quot; title=&quot;Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-71627&quot;&gt;&lt;del&gt;SERVER-71627&lt;/del&gt;&lt;/a&gt; Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&lt;br/&gt;
Branch: v5.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/1bfdeba2f93bf65327a73badd3a50f5b856e27c1&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/1bfdeba2f93bf65327a73badd3a50f5b856e27c1&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5641505" author="xgen-internal-githook" created="Thu, 17 Aug 2023 20:12:39 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Tommaso Tocci&apos;, &apos;email&apos;: &apos;tommaso.tocci@mongodb.com&apos;, &apos;username&apos;: &apos;toto-dev&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-71627&quot; title=&quot;Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-71627&quot;&gt;&lt;del&gt;SERVER-71627&lt;/del&gt;&lt;/a&gt; Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&lt;br/&gt;
Branch: v7.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/58a9e97df59b7ed86821555d86097f224886eb71&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/58a9e97df59b7ed86821555d86097f224886eb71&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5635300" author="xgen-internal-githook" created="Tue, 15 Aug 2023 15:57:29 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Tommaso Tocci&apos;, &apos;email&apos;: &apos;tommaso.tocci@mongodb.com&apos;, &apos;username&apos;: &apos;toto-dev&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-71627&quot; title=&quot;Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-71627&quot;&gt;&lt;del&gt;SERVER-71627&lt;/del&gt;&lt;/a&gt; Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&lt;/p&gt;

&lt;p&gt;BACKPORT-16609&lt;br/&gt;
Branch: v4.2&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/aed3561d4cf3463e78801ef73fbf563919b1397c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/aed3561d4cf3463e78801ef73fbf563919b1397c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5558692" author="xgen-internal-githook" created="Tue, 11 Jul 2023 20:19:37 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Tommaso Tocci&apos;, &apos;email&apos;: &apos;tommaso.tocci@mongodb.com&apos;, &apos;username&apos;: &apos;toto-dev&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-71627&quot; title=&quot;Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-71627&quot;&gt;&lt;del&gt;SERVER-71627&lt;/del&gt;&lt;/a&gt; Refreshed cached collection route info will severely block all client request when a cluster with 1 million chunks&lt;/p&gt;

&lt;p&gt;Many thanks to yangyazhou, demonyang, ycycyyc, pengzhenyi2015, wujunyuyuyu, lakeleisu and jerrygxx for the original idea on how to significantly improve performance of updating the routing table.&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/502ee4edd68cf833bd5b2b5f98c4538a6d9ce6eb&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/502ee4edd68cf833bd5b2b5f98c4538a6d9ce6eb&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5558174" author="tommaso.tocci" created="Tue, 11 Jul 2023 17:58:44 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=1147952115%40qq.com&quot; class=&quot;user-hover&quot; rel=&quot;1147952115@qq.com&quot;&gt;1147952115@qq.com&lt;/a&gt; !&lt;/p&gt;

&lt;p&gt;I would like to thank you, and the entire Tencent MongoDB team, again for the detailed report and the code change proposal. We really appreciate the big effort.&lt;/p&gt;

&lt;p&gt;We reviewed and tested thoroughly the code you submitted, and we really like the idea of Two-Dimensional Sorting &amp;amp; Search. The performance improvements for incremental refreshes are excellent.&lt;br/&gt;
On the other hand, we found some issues with the proposed implementation. In particular, we discovered:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;A correctness bug in &lt;tt&gt;ChunkMap::forEachOverlappingChunk&lt;/tt&gt; that keeps processing chunks even if they are not overlapping with the given range. In particular, &lt;a href=&quot;https://github.com/mongodb/mongo/pull/1506/files#diff-11dcc8869266005d0155da0dc20195a94584e7a5151acdcb2d6accc8a0722a02R185&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;the problem is caused by this break&lt;/a&gt; that is only interrupting the inner for-loop but not the outer one.&lt;/li&gt;
	&lt;li&gt;A performance regression in &lt;tt&gt;ChunkMap::_findIntersectingChunk&lt;/tt&gt; caused by a &lt;a href=&quot;https://github.com/mongodb/mongo/pull/1506/files#diff-1d1b637cfa171660984e1644202f541857db8cd9b6db57528146f31b8bd4538dR641&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;vector copy&lt;/a&gt; that can be avoided. This is regression is particularly relevant because this function is on the hot path executed by most data queries.&lt;/li&gt;
	&lt;li&gt;A correctness bug in &lt;tt&gt;ChunkMap::makeUpdated&lt;/tt&gt;. The algorithm is producing a wrong chunk map in case updated chunks contain merged chunks that span across chunk vectors boundaries.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Thus, we ended up changing the code quite a bit to fix the bug and the performance regressions. In particular, we re-wrote entirely the merging algorithm to support all possible chunk operations.&lt;/p&gt;</comment>
                            <comment id="5192828" author="1147952115@qq.com" created="Mon, 13 Feb 2023 13:36:22 +0000"  >&lt;p&gt;hi, &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=matt.panton%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;matt.panton@mongodb.com&quot;&gt;matt.panton@mongodb.com&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;It&apos;s a great honor to get your approval,&#160; we have signed the agreement and email to you.&lt;/p&gt;

&lt;p&gt;thanks.&lt;/p&gt;</comment>
                            <comment id="5184208" author="JIRAUSER1270641" created="Thu, 9 Feb 2023 19:41:48 +0000"  >&lt;p&gt;Hello &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=1147952115%40qq.com&quot; class=&quot;user-hover&quot; rel=&quot;1147952115@qq.com&quot;&gt;1147952115@qq.com&lt;/a&gt;!&lt;/p&gt;

&lt;p&gt;I am Matt, a Product Manager at MongoDB. After reviewing your pull request, we want to include the optimization in future releases of MongoDB!&lt;/p&gt;

&lt;p&gt;Before we can include the optimization in a future release, we need you and all of the members of your team that worked on this optimization to sign the &lt;a href=&quot;https://www.mongodb.com/legal/contributor-agreement&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Contributor Agreement&lt;/a&gt;. You can email the signed agreement(s) to me at &lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;mailto:matt.panton@mongodb.com&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;matt.panton@mongodb.com&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.mongodb.org/images/icons/mail_small.gif&quot; height=&quot;12&quot; width=&quot;13&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&#160;&lt;/p&gt;

&lt;p&gt;After filling out and returning the signed contributor agreement, MongoDB can begin merging the code.&lt;/p&gt;

&lt;p&gt;Thank you for your contribution! I am thrilled to see the future performance benefits this optimization will deliver to you, your organization, and customers worldwide.&lt;/p&gt;</comment>
                            <comment id="5009026" author="JIRAUSER1265262" created="Sat, 26 Nov 2022 20:06:45 +0000"  >&lt;p&gt;Thank you for your incredibly detailed summary with supporting documentation!&#160;&lt;/p&gt;

&lt;p&gt;I&apos;m going to pass this on to the relevant team to look into.&lt;/p&gt;</comment>
                            <comment id="5008653" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:47:35 +0000"  >&lt;p&gt;&lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/attachment/418134/418134_MongoDB+Routing++Info+Refresh+Optimization.pdf&quot; title=&quot;MongoDB Routing  Info Refresh Optimization.pdf attached to SERVER-71627&quot;&gt;MongoDB Routing  Info Refresh Optimization.pdf&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.mongodb.org/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="5008650" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:45:54 +0000"  >&lt;p&gt;&quot;MongoDB routing refresh mechanism limitation analysis&quot; and &quot;Proposed MongoDB Incremental Routing Info Refresh Method: Two-Dimensional Sorting &amp;amp; Search&quot; please refer to the attached PDF(MongoDB Routing &#160;Info Refresh Optimization.pdf).&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;The optimization code push: &lt;a href=&quot;https://github.com/mongodb/mongo/pull/1506&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/pull/1506&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="5008648" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:38:04 +0000"  >&lt;p&gt;Tencent MongoDB team have solved the problem thoroughly by Two-Dimensional Sorting &amp;amp; Search. After optimization, latency consumption remained at 2ms regardless of the data-size of the shrading cluster.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Performance comparison before and after optimization&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;b&gt;MongoDB Version&lt;/b&gt;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;b&gt;Total Data Size(TB)&lt;/b&gt;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;b&gt;Total Chunk Number(M)&lt;/b&gt;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;b&gt;Elapsed Time of queries(ms)&lt;/b&gt;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;b&gt;Elapsed Time after optimization (ms)&lt;/b&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;3.6&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;80&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4.5&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4500&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4.0&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1.2&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;0.25&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;300&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4.2&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;25&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1.5&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1200&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5.0&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;30&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;910&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5.0&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;80&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2600&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;


&lt;p&gt;After optimization, refreshing incremental routing info&#8217;s time cost is around 2ms, and most of the elapsed time is spent on retrieving changed chunks from the Config Server, while generating the new ChunkMap only takes a very short period (&amp;lt; 1ms)&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; instructions&#65306;Here, 5.0 version&apos;s shard key is int id(ranging from 0 to 100,000,000), so it take less time.&#160; In fact, the shard key in product is more complex, so it take more time than id shard key.&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; The optimization code address: &lt;a href=&quot;https://github.com/mongodb/mongo/pull/1506&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/pull/1506&lt;/a&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Logs before and after optimization(5M chunk size)&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Logs before optimization:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;{&quot;t&quot;: \{&quot;$date&quot;: &quot;2022-10-17T11: 15: 56.209+08: 00&quot;}
&lt;p&gt;,&quot;s&quot;: &quot;I&quot;,&#160;&#160;&quot;c&quot;: &quot;SH_REFR&quot;,&#160;&#160;&quot;id&quot;: 4619901,&#160;&quot;ctx&quot;: &quot;CatalogCache-3&quot;,&quot;msg&quot;: &quot;Refreshed&#160;cached&#160;collection&quot;,&quot;attr&quot;: {&quot;namespace&quot;: &quot;test.test2&quot;,&quot;lookupSinceVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49188,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;newVersion&quot;: {&quot;chunkVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49189,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;forcedRefreshSequenceNum&quot;: 15,&quot;epochDisambiguatingSequenceNum&quot;: 17},&quot;timeInStore&quot;: {&quot;chunkVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49189,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;forcedRefreshSequenceNum&quot;: 15,&quot;epochDisambiguatingSequenceNum&quot;: 16}&lt;b&gt;,&quot;durationMillis&quot;: 2442}}&lt;/b&gt;&#160;&#160;&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;Logs after optimization:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;{&quot;t&quot;: \{&quot;$date&quot;: &quot;2022-10-17T15: 40: 01.742+08: 00&quot;}
&lt;p&gt;,&quot;s&quot;: &quot;I&quot;,&#160;&#160;&quot;c&quot;: &quot;SH_REFR&quot;,&#160;&#160;&quot;id&quot;: 4619901,&#160;&quot;ctx&quot;: &quot;CatalogCache-6&quot;,&quot;msg&quot;: &quot;Refreshed&#160;cached&#160;collection&quot;,&quot;attr&quot;: {&quot;namespace&quot;: &quot;test.test2&quot;,&quot;lookupSinceVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49185,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;newVersion&quot;: {&quot;chunkVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49186,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;forcedRefreshSequenceNum&quot;: 27,&quot;epochDisambiguatingSequenceNum&quot;: 29},&quot;timeInStore&quot;: {&quot;chunkVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49186,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;forcedRefreshSequenceNum&quot;: 27,&quot;epochDisambiguatingSequenceNum&quot;: 28},&lt;b&gt;&quot;durationMillis&quot;: 2}}&lt;/b&gt;&#160;&#160;&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418133/418133_image-2022-11-26-16-34-25-167.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="5008630" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:15:57 +0000"  >&lt;p&gt;I&apos;m terribly sorry&#65292;this is an improved feature, but the jira platform changed it as a fault type.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;I am from tencent cloud mongodb team, Our company is a partner with yours.&lt;/p&gt;

&lt;p&gt;In the past several years, Tencent MongoDB team has noticed unusual slow queries on sharded clusters out of blue while there&#8217;s no sign of any system resource shortage (CPU, RAM, I/O, etc). Further looking into this symptom, the team figured out retrieving incremental routing info would take a lot of time when total chunk number exceeds certain threshold on shared clusters. For instance, a sharded cluster with 250k chunks requires around 300ms to refresh routing info; For larger clusters, like a cluster with 1 million chunks, it&#8217;d take seconds to do the refresh, severely blocking all client queries.&lt;/p&gt;

&lt;p&gt;Other than that, refreshing routing info could consume more CPU, when a cluster has multiple shard tables, CPU jitter is more obvious when refreshing routing info at the same time. it increased the cost of business development and limits the distributed function&lt;/p&gt;

&lt;p&gt;Below are some of the cases we found in production &amp;amp; testing environment.&lt;/p&gt;
&lt;h1&gt;&lt;a name=&quot;1.%C2%A0%C2%A0Background&quot;&gt;&lt;/a&gt;1.&#160; &#160;Background&lt;/h1&gt;
&lt;h2&gt;&lt;a name=&quot;Case1%3Av4.0ProductClusterwith250kchunks&quot;&gt;&lt;/a&gt;Case 1: v4.0 Product Cluster with 250k chunks&lt;/h2&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Cluster Info&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418132/418132_image-2022-11-26-16-23-01-172.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;

&lt;p&gt;Data size: &#160;5.5 billion docs, 1.2TB in total size;&lt;/p&gt;

&lt;p&gt;Chunk number: 250k;&lt;/p&gt;

&lt;p&gt;Refreshing routing info duration: &#160;200ms for mongos, 300ms for mongod.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Related mongos logs&lt;/b&gt;&lt;b&gt;{&lt;/b&gt;}&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418131/418131_image-2022-11-26-16-23-18-848.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 28: 42.556&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-85148&amp;#93;&lt;/span&gt;&#160;Refresh&#160;&lt;b&gt;for&lt;/b&gt;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|686||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|701||62d157722a3a66acadc3b7a4&#160;took&#160;190&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 28: 44.914&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-85148&amp;#93;&lt;/span&gt;&#160;Refresh&#160;&lt;b&gt;for&lt;/b&gt;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|701||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|704||62d157722a3a66acadc3b7a4&#160;took&#160;183&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 29: 41.923&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-85149&amp;#93;&lt;/span&gt;&#160;Refresh&#160;&lt;b&gt;for&lt;/b&gt;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|704||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|707||62d157722a3a66acadc3b7a4&#160;took&#160;194&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 32: 02.121&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-85151&amp;#93;&lt;/span&gt;&#160;Refresh&#160;&lt;b&gt;for&lt;/b&gt;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|707||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|723||62d157722a3a66acadc3b7a4&#160;took&#160;198&#160;ms&#160;&#160;&lt;/li&gt;
&lt;/ul&gt;


&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Related mongod logs&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418130/418130_image-2022-11-26-16-23-53-761.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 24: 11.358&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-191078&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|603||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|628||62d157722a3a66acadc3b7a4&#160;took&#160;262&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 24: 21.306&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-191078&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|628||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|631||62d157722a3a66acadc3b7a4&#160;took&#160;285&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 24: 45.905&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-191078&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;orderSchedule.OrderDispatchLogDetail&#160;from&#160;version&#160;102961|631||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|634||62d157722a3a66acadc3b7a4&#160;took&#160;265&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;Thu&#160;Oct&#160;&#160;6&#160;11: 25: 20.979&#160;I&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-191078&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;OrderDispatchLogDetailfrom&#160;version&#160;102961|634||62d157722a3a66acadc3b7a4&#160;to&#160;version&#160;102961|644||62d157722a3a66acadc3b7a4&#160;took&#160;252&#160;ms&#160;&#160;&lt;/li&gt;
&lt;/ul&gt;


&lt;h2&gt;&lt;a name=&quot;Case2%3Av4.2ProductClusterwith1.5mchunks&quot;&gt;&lt;/a&gt;Case 2: v4.2 Product Cluster with 1.5m chunks&lt;/h2&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Cluster Info&lt;/b&gt;&lt;b&gt;{&lt;/b&gt;}&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418128/418128_image-2022-11-26-16-24-40-471.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;

&lt;p&gt;Data size: 15.5 billion docs&#65292;22.5TB in total size;&lt;/p&gt;

&lt;p&gt;Chunk number: 1.5 million;&lt;/p&gt;

&lt;p&gt;Refreshing routing info duration: 800ms for mongos, 1.2s for mongod.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Related mongos logs&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418127/418127_image-2022-11-26-16-24-57-224.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;2022-10-05T04: 18: 41.359+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-136639&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;102910|1||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;102910|4||626ba80f5fa7cb632d7bf264&#160;took&#160;788&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;2022-10-05T04: 18: 50.800+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-136639&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;102910|4||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;102910|7||626ba80f5fa7cb632d7bf264&#160;took&#160;780&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;2022-10-05T04: 19: 18.546+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-136639&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;102910|7||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;102911|1||626ba80f5fa7cb632d7bf264&#160;took&#160;778&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;2022-10-05T04: 20: 01.105+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-136640&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;102911|1||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;102912|1||626ba80f5fa7cb632d7bf264&#160;took&#160;781&#160;ms&#160;&lt;/li&gt;
&lt;/ul&gt;


&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Related mongod logs&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418126/418126_image-2022-11-26-16-25-41-574.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;2022-10-06T10: 54: 49.219+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-141236&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;103200|584||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;103200|593||626ba80f5fa7cb632d7bf264&#160;took&#160;1001&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;2022-10-06T10: 57: 42.071+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-141237&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;103200|593||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;103200|608||626ba80f5fa7cb632d7bf264&#160;took&#160;1200&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;2022-10-06T11: 00: 36.781+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-141240&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;103200|608||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;103200|623||626ba80f5fa7cb632d7bf264&#160;took&#160;1146&#160;ms&#160;&#160;&lt;/li&gt;
	&lt;li&gt;2022-10-06T11: 03: 34.142+0800&#160;I&#160;&#160;SH_REFR&#160;&#160;&lt;span class=&quot;error&quot;&gt;&amp;#91;ConfigServerCatalogCacheLoader-141241&amp;#93;&lt;/span&gt;&#160;Refresh&#160;for&#160;collection&#160;wukong.actions&#160;from&#160;version&#160;103200|623||626ba80f5fa7cb632d7bf264&#160;to&#160;version&#160;103200|632||626ba80f5fa7cb632d7bf264&#160;took&#160;1129&#160;ms&#160;&#160;&lt;/li&gt;
&lt;/ul&gt;


&lt;h2&gt;&lt;a name=&quot;Case3%3Av3.6ProductClusterwith4.3mchunks&quot;&gt;&lt;/a&gt;Case 3: v3.6 Product Cluster with 4.3m chunks&lt;/h2&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Cluster Info&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Data size: 120 billion docs, 80TB data size in total;&lt;/p&gt;

&lt;p&gt;Chunk number: 430 million;&lt;/p&gt;

&lt;p&gt;Refreshing routing info duration: 4s for mongos, 4.6s for mongod.&lt;/p&gt;
&lt;h2&gt;&lt;a name=&quot;Case4%3Av5.0TestClusterwith2mchunks&quot;&gt;&lt;/a&gt;Case 4: v5.0 Test Cluster with 2m chunks&lt;/h2&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Cluster Info&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Set up a v5.0 sharded cluster with 2 million chunks &#8211; Use &#8220;id&#8221; field as shard key, ranging from 0 to 100,000,000, generate 2m chunks by pre-splitting chunks.&lt;/p&gt;

&lt;p&gt;instructions&#65306;Here, 5.0 version&apos;s shard key is int id(ranging from 0 to 100,000,000), so it take less time.&#160; In fact, the shard key in product is more complex, so it take more time than the simple id shard key.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Related mongos logs&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;//Refreshing routing info&lt;/p&gt;

&lt;p&gt;{&quot;t&quot;:&lt;/p&gt;

{&quot;$date&quot;: &quot;2022-10-06T17: 46: 25.479+08: 00&quot;}

&lt;p&gt;,&quot;s&quot;: &quot;I&quot;,&#160;&#160;&quot;c&quot;: &quot;SH_REFR&quot;,&#160;&#160;&quot;id&quot;: 4619901,&#160;&quot;ctx&quot;: &quot;CatalogCache-2&quot;,&quot;msg&quot;: &quot;Refreshed&#160;cached&#160;collection&quot;,&quot;attr&quot;: {&quot;namespace&quot;: &quot;test.test2&quot;,&quot;lookupSinceVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49182,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;: {&quot;$timestamp&quot;:&lt;/p&gt;

{&quot;t&quot;: 1651140151,&quot;i&quot;: 6}

&lt;p&gt;}},&quot;newVersion&quot;: {&quot;chunkVersion&quot;: {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49182,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;: {&quot;$timestamp&quot;:&lt;/p&gt;

{&quot;t&quot;: 1651140151,&quot;i&quot;: 6}

&lt;p&gt;}},&quot;forcedRefreshSequenceNum&quot;: 21,&quot;epochDisambiguatingSequenceNum&quot;: 18},&quot;timeInStore&quot;: {&quot;chunkVersion&quot;: : {&quot;0&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 49182,&quot;i&quot;: 1}},&quot;1&quot;: {&quot;$oid&quot;: &quot;626a663821072b82d9059209&quot;},&quot;2&quot;: {&quot;$timestamp&quot;: {&quot;t&quot;: 1651140151,&quot;i&quot;: 6}},&quot;forcedRefreshSequenceNum&quot;: 20,&quot;epochDisambiguatingSequenceNum&quot;: 17}&lt;b&gt;,&quot;durationMillis&quot;: 896}}&lt;/b&gt;&#160;&lt;/p&gt;
&lt;h2&gt;&lt;a name=&quot;Case5%3Av5.0TestClusterwith5mchunks&quot;&gt;&lt;/a&gt;Case 5: v5.0 Test Cluster with 5m chunks&lt;/h2&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Cluster Info&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Set up a v5.0 sharded cluster with 5 million chunks &#8211; Use &#8220;id&#8221; field as shard key, generate 5m chunks by pre-splitting chunks.&lt;/p&gt;

&lt;p&gt;instructions&#65306;Here, 5.0 version&apos;s shard key is int id(ranging from 0 to 100,000,000), so it take less time.&#160; In fact, the shard key in product is more complex, so it take more time than the simple id shard key.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Related mongod logs&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;ol&gt;
	&lt;li&gt;{&quot;t&quot;: \{&quot;$date&quot;:&quot;2022-10-17T16:15:56.209+08:00&quot;}
&lt;p&gt;,&quot;s&quot;:&quot;I&quot;,&#160;&#160;&quot;c&quot;:&quot;SH_REFR&quot;,&#160;&#160;&quot;id&quot;:4619901,&#160;&quot;ctx&quot;:&quot;CatalogCache-3&quot;,&quot;msg&quot;:&quot;Refreshed&#160;cached&#160;collection&quot;,&quot;attr&quot;:{&quot;namespace&quot;:&quot;test.test2&quot;,&quot;lookupSinceVersion&quot;:{&quot;0&quot;:{&quot;$timestamp&quot;:{&quot;t&quot;:49188,&quot;i&quot;:1}},&quot;1&quot;:{&quot;$oid&quot;:&quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;newVersion&quot;:{&quot;chunkVersion&quot;:{&quot;0&quot;:{&quot;$timestamp&quot;:{&quot;t&quot;:49189,&quot;i&quot;:1}},&quot;1&quot;:{&quot;$oid&quot;:&quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;forcedRefreshSequenceNum&quot;:15,&quot;epochDisambiguatingSequenceNum&quot;:17},&quot;timeInStore&quot;:{&quot;chunkVersion&quot;:{&quot;0&quot;:{&quot;$timestamp&quot;:{&quot;t&quot;:49189,&quot;i&quot;:1}},&quot;1&quot;:{&quot;$oid&quot;:&quot;626a663821072b82d9059209&quot;},&quot;2&quot;:&lt;br/&gt;
Unknown macro: {&quot;$timestamp&quot;}&lt;br/&gt;
,&quot;forcedRefreshSequenceNum&quot;:15,&quot;epochDisambiguatingSequenceNum&quot;:16}&lt;b&gt;,&quot;durationMillis&quot;:2442}}&lt;/b&gt;&#160;&#160;&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;


&lt;h1&gt;&lt;a name=&quot;2.%C2%A0%C2%A0IssueImpact&quot;&gt;&lt;/a&gt;2.&#160; &#160;Issue Impact&lt;/h1&gt;

&lt;p&gt;More chunks and more complex shardkey&#65292;more time it takes. If Refreshing routing info takes too long, the main effects are as follows:&#160;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Cluster becomes un-responsive&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;All requests will be blocked when mongos/mongod is retrieving incremental routing info, henceforth the bigger the cluster is, the more un-responsive it could be.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;Uneven data distribution among shards&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;A sharded cluster with 1.4 million chunks had to disable balancer except for several hours during midnight(The&#160; client request qps is very low, CPU &amp;amp; IO &amp;amp; MEM are very low load), due to the frequent slow queries caused by refreshing routing info. However the balancing progress made during midnights never caught up the gap and the shards imbalance ends up like below, and still worsening:&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/418125/418125_image-2022-11-26-16-28-47-370.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&#160;After analysis, the cluster jitter is caused by route refreshing, the system load is low during this period.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;increasing the cost of business development and limits the distributed function&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;In order to avoid serious service jitter caused by routing problems, many users strictly limit the data amount of a collection(data of a single collection should not exceed 4T).&lt;/p&gt;

&lt;p&gt;When the amount of data in a collection exceeds 4T, the user needs to separate the collection. In this way, we lose the core distributed advantage that we have.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;b&gt;High CPU consumption&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Multiple ChunkVector iterator, a lot of CPU resources are used, Especially if there&apos;s a lot of collection and a lot of chunks.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10420">
                    <name>Backports</name>
                                            <outwardlinks description="backported by">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                            <outwardlinks description="depends on">
                                        <issuelink>
            <issuekey id="2331691">SERVER-76828</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is depended on by">
                                        <issuelink>
            <issuekey id="2076495">SERVER-67529</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="1539159">SERVER-52776</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="2339542">SERVER-77090</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="2379268">SERVER-78495</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10320">
                    <name>Documented</name>
                                                                <inwardlinks description="is documented by">
                                        <issuelink>
            <issuekey id="2388879">DOCS-16257</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10520">
                    <name>Problem/Incident</name>
                                            <outwardlinks description="causes">
                                        <issuelink>
            <issuekey id="2466990">SERVER-81966</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="2431246">SERVER-80596</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="418135" name="MongoDB Routing  Info Refresh Optimization-1.pdf" size="2486909" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:49:07 +0000"/>
                            <attachment id="418134" name="MongoDB Routing  Info Refresh Optimization.pdf" size="2486909" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:47:36 +0000"/>
                            <attachment id="418124" name="image-2022-11-26-16-21-42-695.png" size="59659" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:21:42 +0000"/>
                            <attachment id="418132" name="image-2022-11-26-16-23-01-172.png" size="59659" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:23:00 +0000"/>
                            <attachment id="418131" name="image-2022-11-26-16-23-18-848.png" size="187927" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:23:18 +0000"/>
                            <attachment id="418130" name="image-2022-11-26-16-23-53-761.png" size="176934" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:23:53 +0000"/>
                            <attachment id="418129" name="image-2022-11-26-16-24-26-994.png" size="113754" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:24:26 +0000"/>
                            <attachment id="418128" name="image-2022-11-26-16-24-40-471.png" size="113754" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:24:40 +0000"/>
                            <attachment id="418127" name="image-2022-11-26-16-24-57-224.png" size="135715" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:24:56 +0000"/>
                            <attachment id="418126" name="image-2022-11-26-16-25-41-574.png" size="152181" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:25:41 +0000"/>
                            <attachment id="418125" name="image-2022-11-26-16-28-47-370.png" size="50271" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:28:46 +0000"/>
                            <attachment id="418133" name="image-2022-11-26-16-34-25-167.png" size="349815" author="1147952115@qq.com" created="Sat, 26 Nov 2022 08:34:25 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25133"><![CDATA[Sharding EMEA]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12450" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Backport Requested</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25578"><![CDATA[v7.0]]></customfieldvalue>
    <customfieldvalue key="23470"><![CDATA[v6.0]]></customfieldvalue>
    <customfieldvalue key="21777"><![CDATA[v5.0]]></customfieldvalue>
    <customfieldvalue key="18953"><![CDATA[v4.4]]></customfieldvalue>
    <customfieldvalue key="16775"><![CDATA[v4.2]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[5006R00001ve2vDQAQ]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Sat, 26 Nov 2022 20:06:45 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        24 weeks, 2 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[<s><a href='https://jira.mongodb.org/browse/SERVER-76828'>SERVER-76828</a></s>]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_17052" key="com.atlassian.jira.plugin.system.customfieldtypes:textarea">
                        <customfieldname>Downstream Changes Summary</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Expose a new server parameter named {{routingTableCacheChunkBucketSize}}. It is available both for mongod and mongos and allow specifying &#8220;Size of the routing table cache buckets used to implement chunk grouping optimization&#8221;.</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_17050" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Downstream Team Attention</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16942"><![CDATA[Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>tommaso.tocci@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            24 weeks, 2 days ago
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_16465" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Linked BF Score</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>chris.kelly@mongodb.com</customfieldvalue>
            <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>matt.panton@mongodb.com</customfieldvalue>
            <customfieldvalue>tommaso.tocci@mongodb.com</customfieldvalue>
            <customfieldvalue>1147952115@qq.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i1k547:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|i12qkw:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_22250" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Special Downgrade Instructions Required</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="23343"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="6810">Sharding EMEA 2023-01-09</customfieldvalue>
    <customfieldvalue id="6811">Sharding EMEA 2023-01-23</customfieldvalue>
    <customfieldvalue id="6812">Sharding EMEA 2023-02-06</customfieldvalue>
    <customfieldvalue id="6813">Sharding EMEA 2023-02-20</customfieldvalue>
    <customfieldvalue id="6814">Sharding EMEA 2023-03-06</customfieldvalue>
    <customfieldvalue id="6815">Sharding EMEA 2023-03-20</customfieldvalue>
    <customfieldvalue id="6817">Sharding EMEA 2023-04-03</customfieldvalue>
    <customfieldvalue id="6818">Sharding EMEA 2023-04-17</customfieldvalue>
    <customfieldvalue id="7185">Sharding EMEA 2023-05-01</customfieldvalue>
    <customfieldvalue id="7186">Sharding EMEA 2023-05-15</customfieldvalue>
    <customfieldvalue id="7187">Sharding EMEA 2023-05-29</customfieldvalue>
    <customfieldvalue id="7188">Sharding EMEA 2023-06-12</customfieldvalue>
    <customfieldvalue id="7189">Sharding EMEA 2023-06-26</customfieldvalue>
    <customfieldvalue id="7190">Sharding EMEA 2023-07-10</customfieldvalue>
    <customfieldvalue id="7191">Sharding EMEA 2023-07-24</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_17051" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Teams Impacted</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16944"><![CDATA[Docs]]></customfieldvalue>
    <customfieldvalue key="16946"><![CDATA[Triage and Release]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                    <customfieldvalue><![CDATA[chris.kelly@mongodb.com]]></customfieldvalue>
    

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i1jr9j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>