<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 05:06:05 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-44477] Map reduce with mode &quot;merge&quot; to an existing sharded collection may drop and recreate the target if no docs exist on the primary shard</title>
                <link>https://jira.mongodb.org/browse/SERVER-44477</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description></description>
                <environment></environment>
        <key id="992862">SERVER-44477</key>
            <summary>Map reduce with mode &quot;merge&quot; to an existing sharded collection may drop and recreate the target if no docs exist on the primary shard</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="nicholas.zolnierz@mongodb.com">Nicholas Zolnierz</assignee>
                                    <reporter username="nicholas.zolnierz@mongodb.com">Nicholas Zolnierz</reporter>
                        <labels>
                            <label>qopt-team</label>
                    </labels>
                <created>Thu, 7 Nov 2019 19:10:45 +0000</created>
                <updated>Sun, 29 Oct 2023 22:15:14 +0000</updated>
                            <resolved>Wed, 8 Jan 2020 18:04:26 +0000</resolved>
                                    <version>3.6.15</version>
                    <version>4.0.13</version>
                    <version>4.2.1</version>
                                    <fixVersion>3.6.17</fixVersion>
                    <fixVersion>4.2.3</fixVersion>
                    <fixVersion>4.0.15</fixVersion>
                                                        <votes>0</votes>
                                    <watches>3</watches>
                                                                                                                <comments>
                            <comment id="2724869" author="xgen-internal-githook" created="Tue, 14 Jan 2020 17:57:15 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Nicholas Zolnierz&apos;, &apos;email&apos;: &apos;nicholas.zolnierz@mongodb.com&apos;, &apos;username&apos;: &apos;nzolnierzmdb&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-44477&quot; title=&quot;Map reduce with mode &amp;quot;merge&amp;quot; to an existing sharded collection may drop and recreate the target if no docs exist on the primary shard&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-44477&quot;&gt;&lt;del&gt;SERVER-44477&lt;/del&gt;&lt;/a&gt; Use correct collection count in cluster MR when determining whether to drop and reshard target&lt;br/&gt;
Branch: v3.6&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/4f329c0b056c75d67567577773039da8f3114cf1&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/4f329c0b056c75d67567577773039da8f3114cf1&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2722594" author="xgen-internal-githook" created="Mon, 13 Jan 2020 18:38:45 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Nicholas Zolnierz&apos;, &apos;email&apos;: &apos;nicholas.zolnierz@mongodb.com&apos;, &apos;username&apos;: &apos;nzolnierzmdb&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-44477&quot; title=&quot;Map reduce with mode &amp;quot;merge&amp;quot; to an existing sharded collection may drop and recreate the target if no docs exist on the primary shard&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-44477&quot;&gt;&lt;del&gt;SERVER-44477&lt;/del&gt;&lt;/a&gt; Use correct collection count in cluster MR when determining whether to drop and reshard target&lt;br/&gt;
Branch: v4.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/0460c5964375a50df62811e36a40fc5abf8c7a5a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/0460c5964375a50df62811e36a40fc5abf8c7a5a&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2712382" author="xgen-internal-githook" created="Wed, 8 Jan 2020 17:16:56 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Nicholas Zolnierz&apos;, &apos;email&apos;: &apos;nicholas.zolnierz@mongodb.com&apos;, &apos;username&apos;: &apos;nzolnierzmdb&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-44477&quot; title=&quot;Map reduce with mode &amp;quot;merge&amp;quot; to an existing sharded collection may drop and recreate the target if no docs exist on the primary shard&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-44477&quot;&gt;&lt;del&gt;SERVER-44477&lt;/del&gt;&lt;/a&gt; Use correct collection count in cluster MR when determining whether to drop and reshard target&lt;br/&gt;
Branch: v4.2&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/da7de3e73ea35a7c56606ef53cd2069658d02f08&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/da7de3e73ea35a7c56606ef53cd2069658d02f08&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2526058" author="esha.maharishi@10gen.com" created="Fri, 8 Nov 2019 20:37:05 +0000"  >&lt;p&gt;Context on how this bug came to be:&lt;/p&gt;

&lt;p&gt;If the final output collection is empty on a shard, then at the end of the second phase, that shard will apply &lt;a href=&quot;https://github.com/mongodb/mongo/blob/f02841245d47c0118f18aa1f1ee88c72ddf012ab/src/mongo/db/commands/mr.cpp#L740&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;this optimization&lt;/a&gt; to drop the output collection and rename the temp collection into place. This would be a problem if the output collection is supposed to be sharded, because then the output collection on each shard would end up having a different UUID (each temp collection&apos;s UUID would be preserved across the rename).&lt;/p&gt;

&lt;p&gt;To work around this, I made the router check up front if the output collection is sharded and empty and, if so, &lt;em&gt;send&lt;/em&gt; the UUID to use for the temp collection. This way, the output collection would end up having the same UUID on each shard.&#160; (It was known and accepted that this workaround would not work if writes to the final output collection were happening concurrently with the mapReduce.)&lt;/p&gt;

&lt;p&gt;When I implemented this, I didn&apos;t consider that the output collection may be empty on &lt;em&gt;some&lt;/em&gt; shards but not others, and I didn&apos;t catch it because I accidentally made the router run the count&#160;&lt;a href=&quot;https://github.com/mongodb/mongo/blob/f02841245d47c0118f18aa1f1ee88c72ddf012ab/src/mongo/s/commands/cluster_map_reduce.cpp#L573&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;only against the primary shard&lt;/a&gt; instead of against all shards.&lt;/p&gt;

&lt;p&gt;As a result, there are two bugs:&lt;/p&gt;

&lt;p&gt;1. (Tracked by this ticket) If the output collection is &lt;b&gt;empty on the primary shard&lt;/b&gt; but has data on other shards, the data on the other shards will be lost because the router will drop and recreate the output collection in order to generate a new config.collections entry with a new UUID to send for the temp collections. (This uses a special option to shardCollection to have the config server generate the UUID to use in config.collections and ignore the one generated by the primary shard.)&lt;/p&gt;

&lt;p&gt;2. (Tracked by &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-44527&quot; title=&quot;Map reduce to an existing sharded collection can incorrectly create new collections on target shards&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-44527&quot;&gt;&lt;del&gt;SERVER-44527&lt;/del&gt;&lt;/a&gt;) If the output collection &lt;b&gt;has data on the primary shard&lt;/b&gt; but is empty on some other shard, the router will not send a UUID, so the empty shard will apply the optimization and end up with an output collection whose UUID does not match the UUID on the config server or primary shard.&lt;/p&gt;</comment>
                            <comment id="2525724" author="nicholas.zolnierz" created="Fri, 8 Nov 2019 18:55:06 +0000"  >&lt;p&gt;CC &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=esha.maharishi&quot; class=&quot;user-hover&quot; rel=&quot;esha.maharishi&quot;&gt;esha.maharishi&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2522967" author="nicholas.zolnierz" created="Thu, 7 Nov 2019 19:11:08 +0000"  >&lt;p&gt;Filing for posterity, this will be fixed by the new implementation.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10420">
                    <name>Backports</name>
                                            <outwardlinks description="backported by">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="880128">SERVER-42511</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_12450" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Backport Requested</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="15640"><![CDATA[v4.0]]></customfieldvalue>
    <customfieldvalue key="15141"><![CDATA[v3.6]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 8 Nov 2019 20:37:05 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        4 years, 4 weeks, 1 day ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_17050" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Downstream Team Attention</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16941"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>luke.bonanomi@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            4 years, 4 weeks, 1 day ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>esha.maharishi@mongodb.com</customfieldvalue>
            <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>nicholas.zolnierz@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hw135r:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hr6ri7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="3286">Query 2019-12-02</customfieldvalue>
    <customfieldvalue id="3287">Query 2019-12-16</customfieldvalue>
    <customfieldvalue id="3288">Query 2019-12-30</customfieldvalue>
    <customfieldvalue id="3289">Query 2020-01-13</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hw0pf3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>