<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 05:44:24 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-58398] Tenant migration hung indefinitely</title>
                <link>https://jira.mongodb.org/browse/SERVER-58398</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;During serverless load testing 5 tenant migrations were issued as a result of an auto-scaling round. 4 of the 5 completed successfully (although they took ~7 hours to complete for a few MiB of data with minimal activity for those specific tenants). One migration (tenant ID&#160;60e4cf90ec86b15c50ab87b4 and migration id&#160;62894dd9-aa8f-46b6-aeb8-aa30c6dc7359) seemed to hang indefinitely (~13 hours) and ended up in&#160;FAILED_MIGRATION_CLEANUP_IN_PROGRESS.&lt;/p&gt;

&lt;p&gt;I will try to reproduce and gather artifacts that will paint a clearer picture. What artifacts exactly would be needed/desired?&lt;/p&gt;

&lt;p&gt;In the meantime, here are the mongod logs for the donor and recipient. Note, there was a rolling restart during the course of the migrations, so i&apos;ve attached both donor primary logs covering the period:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;donor-new-primary-stuck-on-aborting-index-builds-atlas-ysf0ds-shard-00-01.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log.gz
	&lt;ul&gt;
		&lt;li&gt;after the migration started, this node was selected as primary&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;donor-proxy-original-primary-stuck-on-aborting-index-builds-atlas-ysf0ds-shard-00-02.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log.gz
	&lt;ul&gt;
		&lt;li&gt;this is the proxy instance noted in the tenant migration document and the original primary&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;recipient-primary-stuck-on-aborting-index-builds-atlas-cqwy0o-shard-00-02.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log.gz
	&lt;ul&gt;
		&lt;li&gt;this was the primary for the duration of the test&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;recipient-proxy-stuck-on-aborting-index-builds-atlas-cqwy0o-shard-00-00.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log.gz
	&lt;ul&gt;
		&lt;li&gt;this is the proxy instance noted in the tenant migration document&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Rough timeline courtesy of&#160;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=tomer.yakir&quot; class=&quot;user-hover&quot; rel=&quot;tomer.yakir&quot;&gt;tomer.yakir&lt;/a&gt;:&lt;/p&gt;

&lt;p&gt;Donor:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Server restarted at 02:55&lt;/li&gt;
	&lt;li&gt;2:58 - some migration related data following stepUp&lt;/li&gt;
	&lt;li&gt;2:59 - oplog fetcher for migration&lt;/li&gt;
	&lt;li&gt;4:08 - server was slow&lt;/li&gt;
	&lt;li&gt;9:57 - some migrations finished&lt;/li&gt;
	&lt;li&gt;9:58 - got forgetMigration&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Recipient:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;2:48 - migrations started&lt;/li&gt;
	&lt;li&gt;2:59 - short read error&lt;/li&gt;
	&lt;li&gt;9:48 - migrations get committed&lt;/li&gt;
&lt;/ul&gt;
</description>
                <environment></environment>
        <key id="1813270">SERVER-58398</key>
            <summary>Tenant migration hung indefinitely</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.mongodb.org/images/icons/priorities/blocker.svg">Blocker - P1</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="jason.chan@mongodb.com">Jason Chan</assignee>
                                    <reporter username="greg.banks@mongodb.com">Gregory Banks</reporter>
                        <labels>
                    </labels>
                <created>Fri, 9 Jul 2021 15:37:56 +0000</created>
                <updated>Sun, 29 Oct 2023 21:51:07 +0000</updated>
                            <resolved>Tue, 13 Jul 2021 23:25:11 +0000</resolved>
                                    <version>5.0.0-rc7</version>
                                    <fixVersion>5.0.1</fixVersion>
                    <fixVersion>5.1.0-rc0</fixVersion>
                                    <component>Replication</component>
                                        <votes>0</votes>
                                    <watches>26</watches>
                                                                                                                <comments>
                            <comment id="3931932" author="xgen-internal-githook" created="Tue, 13 Jul 2021 23:24:20 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Jason Chan&apos;, &apos;email&apos;: &apos;jason.chan@mongodb.com&apos;, &apos;username&apos;: &apos;jasonjhchan&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-58398&quot; title=&quot;Tenant migration hung indefinitely&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-58398&quot;&gt;&lt;del&gt;SERVER-58398&lt;/del&gt;&lt;/a&gt; TenantMigrationDonor will not retry recipientSyncData on non-retriable interruption errors&lt;/p&gt;

&lt;p&gt;(cherry picked from commit bbd0b90085c06de2882e48d68812ac822a4412f9)&lt;br/&gt;
Branch: v5.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/92662765968eff784a82adea2f57ee5d1125712d&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/92662765968eff784a82adea2f57ee5d1125712d&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="3931628" author="xgen-internal-githook" created="Tue, 13 Jul 2021 20:11:31 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Jason Chan&apos;, &apos;email&apos;: &apos;jason.chan@mongodb.com&apos;, &apos;username&apos;: &apos;jasonjhchan&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-58398&quot; title=&quot;Tenant migration hung indefinitely&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-58398&quot;&gt;&lt;del&gt;SERVER-58398&lt;/del&gt;&lt;/a&gt; TenantMigrationDonor will not retry recipientSyncData on non-retriable interruption errors&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/bbd0b90085c06de2882e48d68812ac822a4412f9&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/bbd0b90085c06de2882e48d68812ac822a4412f9&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="3928853" author="esha.maharishi@10gen.com" created="Mon, 12 Jul 2021 18:12:26 +0000"  >&lt;p&gt;Thanks &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=lingzhi.deng&quot; class=&quot;user-hover&quot; rel=&quot;lingzhi.deng&quot;&gt;lingzhi.deng&lt;/a&gt; - good to know, that would be a useful tool for Cloud if a similar hang happens in the future.&lt;/p&gt;</comment>
                            <comment id="3928114" author="lingzhi.deng" created="Mon, 12 Jul 2021 14:21:07 +0000"  >&lt;p&gt;Yes. I think manually aborting the migration should work around this as long as the &lt;tt&gt;donorAbortMigration&lt;/tt&gt; command would stop the donor from retrying sending the &lt;tt&gt;recipientSyncData&lt;/tt&gt; command, which I think it &lt;a href=&quot;https://github.com/mongodb/mongo/blob/r5.0.0-rc8/src/mongo/db/repl/tenant_migration_donor_service.cpp#L395-L400&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;should&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="3928057" author="esha.maharishi@10gen.com" created="Mon, 12 Jul 2021 14:08:00 +0000"  >&lt;p&gt;I&apos;m curious if manually aborting the migration would have worked despite this hang.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10420">
                    <name>Backports</name>
                                            <outwardlinks description="backported by">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="324337" name="donor-new-primary-stuck-on-aborting-index-builds-atlas-ysf0ds-shard-00-01.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log-1.gz" size="21994838" author="greg.banks@mongodb.com" created="Fri, 9 Jul 2021 15:49:24 +0000"/>
                            <attachment id="324331" name="donor-proxy-original-primary-stuck-on-aborting-index-builds-atlas-ysf0ds-shard-00-02.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log.gz" size="1918927" author="greg.banks@mongodb.com" created="Fri, 9 Jul 2021 15:41:55 +0000"/>
                            <attachment id="324334" name="recipient-primary-stuck-on-aborting-index-builds-atlas-cqwy0o-shard-00-02.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log-1.gz" size="7829114" author="greg.banks@mongodb.com" created="Fri, 9 Jul 2021 15:47:18 +0000"/>
                            <attachment id="324332" name="recipient-proxy-stuck-on-aborting-index-builds-atlas-cqwy0o-shard-00-00.6oxx1.mmscloudteam.com_2021-07-07T02_30_00_2021-07-07T15_00_00_mongodb.log.gz" size="7669817" author="greg.banks@mongodb.com" created="Fri, 9 Jul 2021 15:44:04 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_12450" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Backport Requested</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="21777"><![CDATA[v5.0]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 9 Jul 2021 18:29:53 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        2 years, 30 weeks, 1 day ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_17050" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Downstream Team Attention</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16941"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>luke.bonanomi@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            2 years, 30 weeks, 1 day ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>esha.maharishi@mongodb.com</customfieldvalue>
            <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>greg.banks@mongodb.com</customfieldvalue>
            <customfieldvalue>jason.chan@mongodb.com</customfieldvalue>
            <customfieldvalue>lingzhi.deng@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzrbfr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hzbmlb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="5038">Repl 2021-07-26</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10750" key="com.atlassian.jira.plugin.system.customfieldtypes:textarea">
                        <customfieldname>Steps To Reproduce</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>&lt;p&gt;Hard to say here exactly. The test included two MTMs with ~50 tenants each. There was one tenant that was generating load significant enough to trigger auto-scaling, with the other 50 tenants on the donor generating minimal load. 5 migrations were issued for tenants (the least active tenants on the MTM) all with the same donor and recipient. 4 completed, 1 ended up in a &quot;hung&quot; state from the perspective of MMS.&lt;/p&gt;</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzqxov:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>