<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 04:39:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-35219] Regain MongoDB balancer performance with sessions</title>
                <link>https://jira.mongodb.org/browse/SERVER-35219</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;h6&gt;&lt;a name=&quot;%C2%A0OriginalSummary&quot;&gt;&lt;/a&gt;&#160;Original Summary&lt;/h6&gt;
&lt;p&gt;MongoDB 3.6 Balancer much slower than 3.4&lt;/p&gt;

&lt;h6&gt;&lt;a name=&quot;OriginalDescription&quot;&gt;&lt;/a&gt;Original Description&lt;/h6&gt;
&lt;p&gt;I have a test for balancer between 3.6 and 3.4, and I found balancing on 3.6 is much slower than 3.4.&lt;/p&gt;

&lt;p&gt;I insert 100 million docs to 3.6 and 3.4, the single doc size is 2 kbytes.&#160;&lt;/p&gt;

&lt;p&gt;Collection initial as below:&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;db.runCommand({shardCollection: &quot;ycsb.test&quot;, key: {_id: &quot;hashed&quot;}, numInitialChunks: 6500})&#160;&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Test Results:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;3.4.15 with MMAPv1 engine:&#160;
	&lt;ol&gt;
		&lt;li&gt;from 1 shard to 2 shards: use 41866 seconds, after balance, 3250 chunks on shard.&lt;/li&gt;
		&lt;li&gt;from 2 sahrds to 4 shards: use 30630 seconds, after balance 1625 chunks on 1 shard.&lt;/li&gt;
	&lt;/ol&gt;
	&lt;/li&gt;
	&lt;li&gt;3.6.5 with MMAPv1 engine:
	&lt;ol&gt;
		&lt;li&gt;from 1 shard to 2 shards: use 90200 seconds, after balance, 3250 chunks on shard.&lt;/li&gt;
		&lt;li&gt;from 2 sahrds to 4 shards: use 44679 seconds, after balance 1625 chunks on 1 shard.&lt;/li&gt;
	&lt;/ol&gt;
	&lt;/li&gt;
	&lt;li&gt;3.4.15 with wiredTiger engine:
	&lt;ol&gt;
		&lt;li&gt;from 1 shard to 2 shards: use 35635 seconds, after balance, 3250 chunks on shard.&lt;/li&gt;
		&lt;li&gt;from 2 sahrds to 4 shards: use 10740 seconds, after balance 1625 chunks on 1 shard.&lt;/li&gt;
	&lt;/ol&gt;
	&lt;/li&gt;
	&lt;li&gt;3.6.5 with wiredTiger engine:
	&lt;ol&gt;
		&lt;li&gt;from 1 shard to 2 shards: use 49762 seconds, after balance, 3250 chunks on shard.&lt;/li&gt;
		&lt;li&gt;from 2 sahrds to 4 shards: use 18961 seconds, after balance 1625 chunks on 1 shard.&lt;/li&gt;
	&lt;/ol&gt;
	&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;MongoDB configuration for MMAPv1 engine:&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;security:&lt;br/&gt;
 &#160; authorization: disabled&lt;br/&gt;
 sharding:&lt;br/&gt;
 &#160; clusterRole: shardsvr&lt;br/&gt;
 replication:&lt;br/&gt;
 &#160; replSetName: rs1&lt;br/&gt;
 systemLog:&lt;br/&gt;
 &#160; logAppend: true&lt;br/&gt;
 &#160; destination: file&lt;br/&gt;
 &#160; path: /home/adun/3.4/log/mongod.log&lt;/p&gt;

&lt;p&gt;processManagement:&lt;br/&gt;
 &#160; fork: true&lt;br/&gt;
 &#160; pidFilePath: /home/adun/3.4/log/mongod.pid&lt;/p&gt;

&lt;p&gt;net:&lt;br/&gt;
 &#160; port: 27017&lt;br/&gt;
 &#160; bindIp: 127.0.0.1,192.168.10.31&lt;br/&gt;
 &#160; maxIncomingConnections: 65536&lt;/p&gt;

&lt;p&gt;storage:&lt;br/&gt;
 &#160; dbPath: /home/adun/3.4/data&lt;br/&gt;
 &#160; directoryPerDB: true&lt;br/&gt;
 &#160; engine: mmapv1&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;MongoDB configuration for wiredTiger engine:&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;security:&lt;br/&gt;
 &#160; authorization: disabled&lt;br/&gt;
 sharding:&lt;br/&gt;
 &#160; clusterRole: shardsvr&lt;br/&gt;
 replication:&lt;br/&gt;
 &#160; replSetName: rs1&lt;br/&gt;
 systemLog:&lt;br/&gt;
 &#160; logAppend: true&lt;br/&gt;
 &#160; destination: file&lt;br/&gt;
 &#160; path: /home/adun/3.4/log/mongod.log&lt;/p&gt;

&lt;p&gt;processManagement:&lt;br/&gt;
 &#160; fork: true&lt;br/&gt;
 &#160; pidFilePath: /home/adun/3.4/log/mongod.pid&lt;/p&gt;

&lt;p&gt;net:&lt;br/&gt;
 &#160; port: 27017&lt;br/&gt;
 &#160; bindIp: 127.0.0.1,192.168.10.31&lt;br/&gt;
 &#160; maxIncomingConnections: 65536&lt;/p&gt;

&lt;p&gt;storage:&lt;br/&gt;
 &#160; dbPath: /home/adun/3.4/data&lt;br/&gt;
 &#160; directoryPerDB: true&lt;br/&gt;
 &#160; engine: wiredTiger&lt;/p&gt;

&lt;p&gt;&#160; wiredTiger:&lt;br/&gt;
 &#160; &#160; engineConfig:&lt;br/&gt;
 &#160; &#160; &#160; cacheSizeGB: 32&lt;br/&gt;
 &#160; &#160; &#160; directoryForIndexes: true&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Other Questions:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;If we set&#160;numInitialChunks to a small value, such as 100, mongodb will create/split chunk by itself, but even if we insert the same data/same records number, 3.6 will create chunks abont 10% more than 3.4. (both MMAPv1 and wiredTiger)&lt;/li&gt;
&lt;/ol&gt;
</description>
                <environment></environment>
        <key id="549616">SERVER-35219</key>
            <summary>Regain MongoDB balancer performance with sessions</summary>
                <type id="4" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14710&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="randolph@mongodb.com">Randolph Tan</assignee>
                                    <reporter username="stutiredboy@gmail.com">Adun</reporter>
                        <labels>
                            <label>bkp</label>
                    </labels>
                <created>Fri, 25 May 2018 05:24:38 +0000</created>
                <updated>Sun, 29 Oct 2023 22:31:25 +0000</updated>
                            <resolved>Wed, 20 Mar 2019 19:19:21 +0000</resolved>
                                    <version>3.6.5</version>
                                    <fixVersion>3.6.12</fixVersion>
                    <fixVersion>4.0.8</fixVersion>
                    <fixVersion>4.1.10</fixVersion>
                                    <component>Sharding</component>
                                        <votes>2</votes>
                                    <watches>27</watches>
                                                                                                                <comments>
                            <comment id="2188826" author="xgen-internal-githook" created="Fri, 22 Mar 2019 16:55:53 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Randolph Tan&apos;, &apos;username&apos;: &apos;renctan&apos;, &apos;email&apos;: &apos;randolph@10gen.com&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-35219&quot; title=&quot;Regain MongoDB balancer performance with sessions&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-35219&quot;&gt;&lt;del&gt;SERVER-35219&lt;/del&gt;&lt;/a&gt; Change the sleep on the destination side into a cond var wait on the donor side of session migration.&lt;/p&gt;

&lt;p&gt;(cherry picked from commit 6d774652650dff718a8fa89c2bc845c3b11aa051)&lt;br/&gt;
Branch: v4.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/2b576d56ab6ac150ce7b1a5b0f592ffdcca105e9&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/2b576d56ab6ac150ce7b1a5b0f592ffdcca105e9&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2187280" author="xgen-internal-githook" created="Thu, 21 Mar 2019 14:29:19 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Randolph Tan&apos;, &apos;username&apos;: &apos;renctan&apos;, &apos;email&apos;: &apos;randolph@10gen.com&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-35219&quot; title=&quot;Regain MongoDB balancer performance with sessions&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-35219&quot;&gt;&lt;del&gt;SERVER-35219&lt;/del&gt;&lt;/a&gt; Change the sleep on the destination side into a cond var wait on the donor side of session migration.&lt;/p&gt;

&lt;p&gt;(cherry picked from commit 6d774652650dff718a8fa89c2bc845c3b11aa051)&lt;br/&gt;
Branch: v3.6&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/979d456ccf4e6756e433799a626ca373a493fc8a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/979d456ccf4e6756e433799a626ca373a493fc8a&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2186514" author="xgen-internal-githook" created="Wed, 20 Mar 2019 18:38:59 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;email&apos;: &apos;randolph@10gen.com&apos;, &apos;name&apos;: &apos;Randolph Tan&apos;, &apos;username&apos;: &apos;renctan&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-35219&quot; title=&quot;Regain MongoDB balancer performance with sessions&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-35219&quot;&gt;&lt;del&gt;SERVER-35219&lt;/del&gt;&lt;/a&gt; Change the sleep on the destination side into a cond var wait on the donor side of session migration.&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/6d774652650dff718a8fa89c2bc845c3b11aa051&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/6d774652650dff718a8fa89c2bc845c3b11aa051&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2113858" author="kaloian.manassiev" created="Mon, 14 Jan 2019 13:36:43 +0000"  >&lt;p&gt;This plan SGTM, but I think the trick will be in the implementation details.&lt;/p&gt;

&lt;p&gt;Minor detail is that &lt;tt&gt;enterCriticalSection&lt;/tt&gt;&#160;can also be counted as a producer since it needs to cause top/pop to return the EOF token.&lt;/p&gt;

&lt;p&gt;One thing to be mindful of is that this needs to be backported to 3.6 and I believe the futures code is not there. Not sure how easy it is to backport so it might be simpler to use &lt;tt&gt;Notification&amp;lt;void/OpTime&amp;gt;&lt;/tt&gt; instead (CC &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=redbeard0531&quot; class=&quot;user-hover&quot; rel=&quot;redbeard0531&quot;&gt;redbeard0531&lt;/a&gt;).&lt;/p&gt;

&lt;p&gt;You will also add &lt;a href=&quot;https://github.com/mongodb/mongo/blob/master/src/mongo/db/s/session_catalog_migration_source_test.cpp&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;unit-tests&lt;/a&gt; for the &lt;tt&gt;enterCriticalSection&lt;/tt&gt; scenario, right?&lt;/p&gt;</comment>
                            <comment id="2111332" author="misha.tyulenev" created="Thu, 10 Jan 2019 17:40:18 +0000"  >&lt;p&gt;Capturing offline discussion with &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=kaloian.manassiev&quot; class=&quot;user-hover&quot; rel=&quot;kaloian.manassiev&quot;&gt;kaloian.manassiev&lt;/a&gt;&lt;br/&gt;
Identified producer and consumer running on the separate threads:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;The producer is notifyNewWriteOpTime that comes from the opLogObserver - i.e. this is any new write to an oplog that comes from multiple threads&lt;/li&gt;
	&lt;li&gt;The consumer is the _getNextSessionMods command that runs on the runCommanbd thread and consumes the buffer of opTimes filled by the producer by fetching the oplogs matching the opTimes.&lt;/li&gt;
	&lt;li&gt;The MigrationSourceManager which is also runs on its own thread initiates enterCriticalSection which blocks all new writes&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;The idea is to encapsulate all the produce consumer processing in an object that has the following API&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;top()&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;pop()&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;push()&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;enterCriticalSection()&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;
&lt;p&gt;It looks a lot like producerConsumer queue already implemented in the util, the major difference is that it will return a &lt;br/&gt;
setFuture, unsetFuture or set with none future to indicate no more data will be written - as outlined in the previous comments.&lt;/p&gt;

&lt;p&gt;the top() performs a little more as it needs to merge the data from transaction table and datga written to the oplog.&lt;/p&gt;

&lt;p&gt;The unset future needs to bubble up all the way so the caller will wait on it outside the collection lock that is set for write error retry loop.&lt;/p&gt;</comment>
                            <comment id="2109912" author="kaloian.manassiev" created="Wed, 9 Jan 2019 15:29:03 +0000"  >&lt;blockquote&gt;&lt;p&gt;1. please clarify how to indicate to waitForMajority on the returned data. opLogResult has the boolean indicating if it needs to be waited or not&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;The contract of the &lt;a href=&quot;https://github.com/mongodb/mongo/blob/5d0f13334445fca6e2c5bfc496b5d5b1cb7e0f8a/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp#L287&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;_getNextSessionMods&lt;/tt&gt;&lt;/a&gt; command (which is what fetches entries from &lt;a href=&quot;https://github.com/mongodb/mongo/blob/5d0f13334445fca6e2c5bfc496b5d5b1cb7e0f8a/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp#L279&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;nextSessionMigrationBatch&lt;/tt&gt;&lt;/a&gt; is that it must not return non-majority committed session entries.&lt;/p&gt;

&lt;p&gt;The easiest would be to wait on the maximum optime from the entries in the populated &lt;tt&gt;arrBuilder&lt;/tt&gt;, but since it is not optimal to re-scan it every time, I&apos;d say there could just be a second output parameter to &lt;tt&gt;nextSessionMigrationBatch&lt;/tt&gt;, which will contain what is the maximum opTime in that array.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;2. not sure what structure you mean here, if it&apos;s PromiseAndFuture it cannot be exposed and not supposed to be held &lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;a href=&quot;https://github.com/mongodb/mongo/blob/5d0f13334445fca6e2c5bfc496b5d5b1cb7e0f8a/src/mongo/db/transaction_coordinator.h#L201&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;This is&lt;/a&gt; the concept that I was imagining. The &lt;tt&gt;SessionCatalogSource&lt;/tt&gt; is essentially a fairly simple multi-producer/single-consumer queue and its only complication comes from the fact that waiting cannot be done under the collection lock.&lt;/p&gt;

&lt;p&gt;So conceptually, its contract becomes &quot;if I can give you data, I will give it, otherwise I will give you a future to wait on outside of the lock so you can call me again&quot;. The only time where data may not be available is if there aren&apos;t any retryable writes that ran.&lt;/p&gt;

&lt;p&gt;Does that make sense?&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;3. I like this idea but to have a benefit of futurizing the following changes seems to me inline with future use:&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I am not sure I understand that, let&apos;s talk in person.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;5. depends on how the nextSessionMigrationBatch is implemented - it will be great to avoid converting Future&amp;lt;boost::optional&amp;lt;OplogEntry&amp;gt;&amp;gt; into Future&amp;lt;bool&amp;gt;&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;The &lt;tt&gt;SessionCatalogMigrationSource&lt;/tt&gt; and the &lt;tt&gt;MigrationChunkClonerSourceLegacy&lt;/tt&gt; have different interfaces. The former gives you one session entry (OplogEntry) at a time, while the latter fills a whole buffer. There is no way to avoid converting them, but don&apos;t see an issue with that either.&lt;/p&gt;</comment>
                            <comment id="2109264" author="misha.tyulenev" created="Tue, 8 Jan 2019 22:32:45 +0000"  >&lt;p&gt;Thank you for the proposal &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=kaloian.manassiev&quot; class=&quot;user-hover&quot; rel=&quot;kaloian.manassiev&quot;&gt;kaloian.manassiev&lt;/a&gt;&lt;br/&gt;
Here are my thoughts, could you please clarify?&lt;br/&gt;
1. please clarify how to indicate to waitForMajority on the returned data. opLogResult has the boolean indicating if it needs to be waited or not&lt;br/&gt;
2. not sure what structure you mean here, if it&apos;s &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r4.1.6/src/mongo/util/future.h#L614&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;PromiseAndFuture&lt;/a&gt; it cannot be exposed and not supposed to be held &lt;br/&gt;
3. I like this idea but to have a benefit of futurizing the following changes seems to me inline with future use:&lt;/p&gt;

&lt;p&gt;a) &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r4.1.6/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp#L690&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;MigrationChunkClonerSourceLegacy::nextSessionMigrationBatch &lt;/a&gt;got to be changed from the current model of synchronous iteration to&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;while (true) {&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;   auto future = _sessionCatalogSource-&amp;gt;fetchNextFutureOplog();&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;   if (future.isReady()) {&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;      // add  data to buffer or return&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;   }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;   else {&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;     return future;&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;   }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;}&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;

&lt;p&gt;b) fetchNextFutureOplog() or in your suggestions a code that sets _lastReturnedFuture has to differentiate between writeOpLog buffer, or sessionOpLog data.&lt;br/&gt;
Can it be both possible futures? &lt;br/&gt;
4. 6 Agree and it eliminates the need for &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-38874&quot; title=&quot;Add ability to observe entering critical section in ShardingMigrationCriticalSection&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-38874&quot;&gt;&lt;del&gt;SERVER-38874&lt;/del&gt;&lt;/a&gt;&lt;br/&gt;
5. depends on how the nextSessionMigrationBatch is implemented - it will be great to avoid converting  Future&amp;lt;boost::optional&amp;lt;OplogEntry&amp;gt;&amp;gt; into Future&amp;lt;bool&amp;gt;&lt;br/&gt;
Thanks!&lt;/p&gt;
</comment>
                            <comment id="2107770" author="kaloian.manassiev" created="Mon, 7 Jan 2019 20:33:40 +0000"  >&lt;p&gt;In terms of implementation, I propose the following:&lt;/p&gt;

&lt;ol&gt;
	&lt;li&gt;Get rid of &lt;tt&gt;SessionCatalogMigrationSource::OplogResult&lt;/tt&gt; and since &lt;tt&gt;OplogEntry&lt;/tt&gt; is self-describing and contains its optime, instead use that directly&lt;/li&gt;
	&lt;li&gt;Add a &lt;tt&gt;boost::optional&amp;lt;SharedFutureAndPromise&amp;lt;OplogResult&amp;gt;&amp;gt; _lastReturnedFuture&lt;/tt&gt; to &lt;tt&gt;SessionCatalogMigrationSource&lt;/tt&gt;.&lt;/li&gt;
	&lt;li&gt;Change &lt;tt&gt;SessionCatalogMigrationSource::getLastFetchedOplog&lt;/tt&gt; to return &lt;tt&gt;Future&amp;lt;boost::optional&amp;lt;OplogEntry&amp;gt;&amp;gt;&lt;/tt&gt;, where the return values mean the following:
	&lt;ul&gt;
		&lt;li&gt;Set future with OplogEntry means &quot;append that entry to the out buffer&quot;&lt;/li&gt;
		&lt;li&gt;Set future with boost::none means &quot;no more session oplog entries are necessary to be migrated&quot; (this can only happen if &lt;tt&gt;onCommitStarted&lt;/tt&gt; was called)&lt;/li&gt;
		&lt;li&gt;Unset future means the caller needs to block waiting to see what to do next&lt;/li&gt;
		&lt;li&gt;Exception thrown means abandon migration&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;Add function called &lt;tt&gt;SessionCatalogMigrationSource::onCommitStarted&lt;/tt&gt;, which will be called from &lt;tt&gt;MigrationChunkClonerSourceLegacy::commitClone&lt;/tt&gt; and will indicate that &lt;tt&gt;SessionCatalogMigrationSource::notifyNewWriteOpTime&lt;/tt&gt; will no longer be called and that &lt;tt&gt;SessionCatalogMigrationSource::getLastFetchedOplog&lt;/tt&gt; must return &lt;tt&gt;boost::none&lt;/tt&gt; instead of unset futures if it doesn&apos;t find anything in the buffer and that it should set the &lt;tt&gt;_lastReturnedFuture.promise to boost::none&lt;/tt&gt;.&lt;/li&gt;
	&lt;li&gt;Change &lt;tt&gt;MigrationChunkClonerSourceLegacy::nextSessionMigrationBatch&lt;/tt&gt; to return &lt;tt&gt;Future&amp;lt;bool&amp;gt;&lt;/tt&gt; where the return values mean the following:
	&lt;ul&gt;
		&lt;li&gt;Set future with &quot;true&quot; means something was put in the buffer and there is more data, so &lt;tt&gt;nextSessionMigrationBatch&lt;/tt&gt; can be called again and will put more stuff in the buffer&lt;/li&gt;
		&lt;li&gt;Set future with &quot;false&quot; means nothing was available to put in the buffer and the &quot;no more session oplog entries are necessary to be migrated&quot;&lt;/li&gt;
		&lt;li&gt;Unset future means the caller needs to drop locks and block waiting to see what to do next&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;Change &lt;tt&gt;MigrationChunkClonerSourceLegacy::commitClone&lt;/tt&gt; to call &lt;tt&gt;SessionCatalogMigrationSource::onCommitStarted&lt;/tt&gt;.&lt;/li&gt;
&lt;/ol&gt;
</comment>
                            <comment id="2084526" author="kaloian.manassiev" created="Fri, 7 Dec 2018 22:00:11 +0000"  >&lt;p&gt;I agree - they can go away in 4.2 and the loop on the recipient side can be simplified since it doesn&apos;t need to expect or send them anymore, nor does it need to sleep.&lt;/p&gt;

&lt;p&gt;Please don&apos;t forget to file a ticket.&lt;/p&gt;</comment>
                            <comment id="2084397" author="misha.tyulenev" created="Fri, 7 Dec 2018 20:29:33 +0000"  >&lt;p&gt;Yes, thanks for the detailed description. &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=kaloian.manassiev&quot; class=&quot;user-hover&quot; rel=&quot;kaloian.manassiev&quot;&gt;kaloian.manassiev&lt;/a&gt;  Will only add that those parameters must be marked as transietory and should be removed in the version followed 4.2&lt;/p&gt;</comment>
                            <comment id="2083896" author="kaloian.manassiev" created="Fri, 7 Dec 2018 15:25:46 +0000"  >&lt;p&gt;Steps 1-3 sound good to me. I made a couple of small clarifications on 2 that the command will become blocking and that it&apos;s protocol will become - call it until it returns no results (EOF) or until it fails with anything other than &lt;tt&gt;TimeLimitExceeded&lt;/tt&gt; (if the caller specified a MaxTimeMS, which I think is prudent to do).&lt;/p&gt;

&lt;p&gt;For Step 4 - the (a) and (b) variants are actually both needed, they are not two different solutions as far as I can see. Let me try to clarify what you described to make sure we are on the same page:&lt;br/&gt;
The &lt;tt&gt;_getNextSessionsMods&lt;/tt&gt; command will change like this:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Introduce a new optional boolean parameter called say &lt;tt&gt;waitForData&lt;/tt&gt;, which when specified will request the &quot;long-poll&quot; behaviour described in Step 2 and if set to &lt;tt&gt;false&lt;/tt&gt; or not specified, will behave as it does currently and return empty results.&lt;/li&gt;
	&lt;li&gt;Make &lt;tt&gt;_getNextSessionMods&lt;/tt&gt; command return a boolean &lt;tt&gt;waited&lt;/tt&gt; field (or &lt;tt&gt;done&lt;/tt&gt; like you proposed, which is the inverted value), which will be set if the command blocked waiting for results.&lt;/li&gt;
&lt;/ul&gt;


&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;The &lt;b&gt;new version&lt;/b&gt; recipient will call &lt;tt&gt;_getNextSessionModes, waitForData: true&lt;/tt&gt; and will sleep &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/session_catalog_migration_destination.cpp#L430&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;here&lt;/a&gt; only if the return from that command was missing the &lt;tt&gt;waited&lt;/tt&gt; field in the response.&lt;/li&gt;
	&lt;li&gt;The *new version&quot; donor, when executing the &lt;tt&gt;_getNextSessionMods&lt;/tt&gt; command, will only block if &lt;tt&gt;waitForData: true&lt;/tt&gt; - otherwise it will behave exactly as it does today.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;That way the following combinations of partial upgrade will continue to work seamlessly:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Old donor, New recipient - Old donor will not include the &lt;tt&gt;waited&lt;/tt&gt; field so the New recipient will back-off like it does today&lt;/li&gt;
	&lt;li&gt;New donor, Old recipient - Old recipient will not specify the &lt;tt&gt;waitForData&lt;/tt&gt; field, so the New donor will return immediately&lt;/li&gt;
	&lt;li&gt;New donor, New recipient - The recipient will specify &lt;tt&gt;waitForData: true&lt;/tt&gt; and the donor will sleep, returning &lt;tt&gt;waited:true&lt;/tt&gt;, which will cause the recipient to &lt;b&gt;not&lt;/b&gt; sleep&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Does this match your understanding?&lt;/p&gt;</comment>
                            <comment id="2083328" author="misha.tyulenev" created="Thu, 6 Dec 2018 22:39:43 +0000"  >&lt;p&gt;Here is the breakdown of the proposed approach to the finer steps per a discussion with &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=kaloian.manassiev&quot; class=&quot;user-hover&quot; rel=&quot;kaloian.manassiev&quot;&gt;kaloian.manassiev&lt;/a&gt;&lt;/p&gt;

&lt;p&gt; 1. Sleep no more on the recipient:&lt;br/&gt;
 Remove the &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/session_catalog_migration_destination.cpp#L430&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;backoff sleep&lt;/a&gt; code. Instead, the recipient should &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/session_catalog_migration_destination.cpp#L182&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;wait on the donor&lt;/a&gt; to execute the _getNextSessionMods command, which will be made blocking instead.&lt;/p&gt;

&lt;p&gt;The &lt;tt&gt;_getNextSessionMods&lt;/tt&gt; command on the donor execution follows the following path:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;It is wrapped in the &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r4.1.6/src/mongo/db/concurrency/write_conflict_exception.h#L81&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;writeConflictRetry&lt;/a&gt; loop&lt;/li&gt;
	&lt;li&gt;It &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r4.1.6/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp#L699&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;iterates&lt;/a&gt; while &lt;tt&gt;SessionCatalogMigrationSource::hasMoreOplog()&lt;/tt&gt; and breaks out of the loop if the size of the result exceeds the limit.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;2. Make &lt;tt&gt;_getNextSessionMods&lt;/tt&gt; command blocking:&lt;br/&gt;
 In order to wait on the donor it would be sufficient to return a future that can be waited on in the &lt;tt&gt;_getNextSessionMods&lt;/tt&gt;&apos;s &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r4.1.6/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp#L272&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;run&lt;/a&gt; method.&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;while( auto future = autoCloner.getCloner()-&amp;gt;nextSessionMigrationBatch()) {&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;    future-&amp;gt;wait();&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;}&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;

&lt;p&gt;3. &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r4.1.6/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp#L699&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;This loop &lt;/a&gt; should be modified to return the future.&lt;/p&gt;

&lt;p&gt;4. Lastly, the multi version shards should support this protocol. There are two ways to solve it:&lt;br/&gt;
&#160;a. by signalling that no more wait is needed by returning 0&lt;br/&gt;
&#160;b. use an extra parameter _done in the command as in &lt;a href=&quot;https://github.com/mongodb/mongo/commit/2dc87c711cb0bf85fdc8dd1d65b5a83e144509fa&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;SERVER-32886&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2076535" author="kaloian.manassiev" created="Fri, 30 Nov 2018 10:17:51 +0000"  >&lt;p&gt;While looking at the retryable writes migration code for an unrelated reason, I noticed &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/session_catalog_migration_destination.cpp#L430&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;this backoff sleep&lt;/a&gt; and I am pretty sure this is what is contributing to the sometimes 400ms stall during the migration critical section.&lt;/p&gt;

&lt;p&gt;As implemented currently I think it is likely to be at least one 200 msec stall at transaction commit, because we need to &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/session_catalog_migration_destination.cpp#L401&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;drain the oplog&lt;/a&gt; at least once after the recipient enters the commit phase. However, in pathological situations the commit could be &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/migration_destination_manager.cpp#L487&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;entered&lt;/a&gt; just after the donor has &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/session_catalog_migration_destination.cpp#L392&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;returned&lt;/a&gt; an empty batch, but just before commit is entered. In this case there will be one 200 msec wait before entering the commit phase and one 200 msec wait after entering it.&lt;/p&gt;

&lt;p&gt;And I think actually in the current implementation, not having any retryable writes running would make it worse because that&apos;s when the back-off would get activated since the batches will always be empty &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.mongodb.org/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=renctan&quot; class=&quot;user-hover&quot; rel=&quot;renctan&quot;&gt;renctan&lt;/a&gt;, can you confirm my hypothesis above and whether the proposed solution makes sense?&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Proposed fix&lt;/b&gt;&lt;br/&gt;
Remove the back-off sleep from the recipient side and make the &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp#L267&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;_getNextSessionMods&lt;/tt&gt;&lt;/a&gt; command to use long-poll instead and make it the donor&apos;s responsibility to decide when end-of-stream is reached (which would be that there are no more oplog entries to return after &lt;a href=&quot;https://github.com/mongodb/mongo/blob/d17de2b958eb660593e2f5a180eff43ebe803b85/src/mongo/db/s/migration_source_manager.cpp#L325&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;entering the critical section&lt;/a&gt;).&lt;/p&gt;

&lt;p&gt;Basically the donor will block the &lt;tt&gt;_getNextSessionMods&lt;/tt&gt; call until there are any oplog entries be available to return. Empty return from that command should indicate &quot;no more entries to migrate&quot;.&lt;/p&gt;</comment>
                            <comment id="1958658" author="sarah.zhou" created="Fri, 27 Jul 2018 16:02:06 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=stutiredboy%40gmail.com&quot; class=&quot;user-hover&quot; rel=&quot;stutiredboy@gmail.com&quot;&gt;stutiredboy@gmail.com&lt;/a&gt;,&#160;&lt;/p&gt;

&lt;p&gt;Thanks for the report! The introduction of &lt;a href=&quot;https://docs.mongodb.com/manual/reference/server-sessions/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://docs.mongodb.com/manual/reference/server-sessions/&lt;/a&gt; and &lt;a href=&quot;https://docs.mongodb.com/manual/core/retryable-writes/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://docs.mongodb.com/manual/core/retryable-writes/&lt;/a&gt; in 3.6 led to additional overhead in the migration process which as a result slowed down the balancer. We ran tests comparing moveChunk in 3.4 and 3.6 (balancer issues moveChunk commands in the balancing process), and I&apos;ve attached two graphs from our findings which illustrate in what steps specifically the increase for moveChunk is. We will continue to further investigate this decrease in performance and keep you updated.&lt;br/&gt;
&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;a id=&quot;192832_thumb&quot; href=&quot;https://jira.mongodb.org/secure/attachment/192832/192832_chart.png&quot; title=&quot;chart.png&quot; file-preview-type=&quot;image&quot; file-preview-id=&quot;192832&quot; file-preview-title=&quot;chart.png&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/thumbnail/192832/_thumb_192832.png&quot; style=&quot;border: 0px solid black&quot; role=&quot;presentation&quot;/&gt;&lt;/a&gt;&lt;/span&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;a id=&quot;192831_thumb&quot; href=&quot;https://jira.mongodb.org/secure/attachment/192831/192831_chart-2.png&quot; title=&quot;chart-2.png&quot; file-preview-type=&quot;image&quot; file-preview-id=&quot;192831&quot; file-preview-title=&quot;chart-2.png&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/thumbnail/192831/_thumb_192831.png&quot; style=&quot;border: 0px solid black&quot; role=&quot;presentation&quot;/&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10420">
                    <name>Backports</name>
                                            <outwardlinks description="backported by">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                            <outwardlinks description="depends on">
                                        <issuelink>
            <issuekey id="667052">SERVER-38874</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10520">
                    <name>Problem/Incident</name>
                                            <outwardlinks description="causes">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="718610">SERVER-40187</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="192831" name="chart-2.png" size="8949" author="sarah.zhou@mongodb.com" created="Fri, 27 Jul 2018 15:48:36 +0000"/>
                            <attachment id="192832" name="chart.png" size="11078" author="sarah.zhou@mongodb.com" created="Fri, 27 Jul 2018 15:48:36 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_12450" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Backport Requested</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="15640"><![CDATA[v4.0]]></customfieldvalue>
    <customfieldvalue key="15141"><![CDATA[v3.6]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[500A000000cG8tEIAS, 5002K00000dDEZ0QAO]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 25 May 2018 18:48:58 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        4 years, 46 weeks, 5 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[<s><a href='https://jira.mongodb.org/browse/SERVER-38874'>SERVER-38874</a></s>]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_10857" key="com.pyxis.greenhopper.jira:gh-epic-link">
                        <customfieldname>Epic Link</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>PM-1261</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>luke.bonanomi@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            4 years, 46 weeks, 5 days ago
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_16465" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Linked BF Score</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>stutiredboy@gmail.com</customfieldvalue>
            <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>kaloian.manassiev@mongodb.com</customfieldvalue>
            <customfieldvalue>misha.tyulenev@mongodb.com</customfieldvalue>
            <customfieldvalue>randolph@mongodb.com</customfieldvalue>
            <customfieldvalue>sarah.zhou@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|htz14v:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hu43sn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="2386">Sharding 2018-07-16</customfieldvalue>
    <customfieldvalue id="2639">Sharding 2018-12-17</customfieldvalue>
    <customfieldvalue id="2640">Sharding 2018-12-31</customfieldvalue>
    <customfieldvalue id="2725">Sharding 2019-01-14</customfieldvalue>
    <customfieldvalue id="2726">Sharding 2019-01-28</customfieldvalue>
    <customfieldvalue id="2786">Sharding 2019-02-11</customfieldvalue>
    <customfieldvalue id="2787">Sharding 2019-02-25</customfieldvalue>
    <customfieldvalue id="2824">Sharding 2019-03-11</customfieldvalue>
    <customfieldvalue id="2825">Sharding 2019-03-25</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|htyne7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>