<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 05:16:02 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-48067] Reduce memory consumption for unique index builds with large numbers of non-unique keys</title>
                <link>https://jira.mongodb.org/browse/SERVER-48067</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;We use &lt;a href=&quot;https://github.com/mongodb/mongo/blob/950dd1ff0ce5d62bbd434e8b874cce2ecbbda3ee/src/mongo/db/catalog/multi_index_block.cpp#L505&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;a vector&lt;/a&gt; to track all duplicate keys inserted on an index. Specifically, this is in the phase when we dump keys from the external sorter into the WT bulk inserter.&lt;/p&gt;

&lt;p&gt;Due to the nature of hybrid index builds, we must track these duplicates until we temporarily stop writes and can see all writes to the table.&lt;/p&gt;

&lt;p&gt;If a collection has a large number of duplicate key violations, this vector can build up without bound. We can improve this behavior by batching writes to reduce the memory impact.&lt;/p&gt;

&lt;p&gt;We should consider using this ticket to also address the conversion of &lt;a href=&quot;https://github.com/mongodb/mongo/blob/43e2423bae07e13cf624b9d5fb74e62bd1959b19/src/mongo/db/index/index_access_method.cpp#L677-L678&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;KeyString back to  BSONObj&lt;/a&gt; to record duplicates and also the memory amplification of &lt;a href=&quot;https://github.com/mongodb/mongo/blob/43e2423bae07e13cf624b9d5fb74e62bd1959b19/src/mongo/db/index/duplicate_key_tracker.cpp#L68-L76&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;creating new vectors to copy key data&lt;/a&gt;.&lt;/p&gt;</description>
                <environment></environment>
        <key id="1345173">SERVER-48067</key>
            <summary>Reduce memory consumption for unique index builds with large numbers of non-unique keys</summary>
                <type id="4" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14710&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="gregory.noma@mongodb.com">Gregory Noma</assignee>
                                    <reporter username="louis.williams@mongodb.com">Louis Williams</reporter>
                        <labels>
                    </labels>
                <created>Fri, 8 May 2020 21:41:37 +0000</created>
                <updated>Sun, 29 Oct 2023 22:08:27 +0000</updated>
                            <resolved>Mon, 20 Jul 2020 19:37:10 +0000</resolved>
                                    <version>4.2.0</version>
                                    <fixVersion>4.2.9</fixVersion>
                    <fixVersion>4.7.0</fixVersion>
                    <fixVersion>4.4.2</fixVersion>
                                                        <votes>0</votes>
                                    <watches>22</watches>
                                                                                                                <comments>
                            <comment id="3380788" author="xgen-internal-githook" created="Tue, 8 Sep 2020 20:18:00 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Gregory Noma&apos;, &apos;email&apos;: &apos;gregory.noma@gmail.com&apos;, &apos;username&apos;: &apos;gregorynoma&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-48067&quot; title=&quot;Reduce memory consumption for unique index builds with large numbers of non-unique keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-48067&quot;&gt;&lt;del&gt;SERVER-48067&lt;/del&gt;&lt;/a&gt; Reduce memory consumption for unique index builds with large numbers of non-unique keys&lt;/p&gt;

&lt;p&gt;(cherry picked from commit 3c2951938d65667d675c48511d9d1046655809a5)&lt;br/&gt;
Branch: v4.4&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/f2a4792e6bea5d403dcf3474698312c7178ece86&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/f2a4792e6bea5d403dcf3474698312c7178ece86&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="3295535" author="xgen-internal-githook" created="Tue, 21 Jul 2020 21:18:18 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Gregory Noma&apos;, &apos;email&apos;: &apos;gregory.noma@gmail.com&apos;, &apos;username&apos;: &apos;gregorynoma&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-48067&quot; title=&quot;Reduce memory consumption for unique index builds with large numbers of non-unique keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-48067&quot;&gt;&lt;del&gt;SERVER-48067&lt;/del&gt;&lt;/a&gt; Reduce memory consumption for unique index builds with large numbers of non-unique keys&lt;/p&gt;

&lt;p&gt;(cherry picked from commit 3c2951938d65667d675c48511d9d1046655809a5)&lt;br/&gt;
Branch: v4.2&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/98a7db0a562b818426c19203e8d16cc93980b279&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/98a7db0a562b818426c19203e8d16cc93980b279&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="3293212" author="xgen-internal-githook" created="Mon, 20 Jul 2020 19:35:40 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Gregory Noma&apos;, &apos;email&apos;: &apos;gregory.noma@gmail.com&apos;, &apos;username&apos;: &apos;gregorynoma&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-48067&quot; title=&quot;Reduce memory consumption for unique index builds with large numbers of non-unique keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-48067&quot;&gt;&lt;del&gt;SERVER-48067&lt;/del&gt;&lt;/a&gt; Reduce memory consumption for unique index builds with large numbers of non-unique keys&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/3c2951938d65667d675c48511d9d1046655809a5&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/3c2951938d65667d675c48511d9d1046655809a5&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="3277524" author="asya" created="Thu, 9 Jul 2020 14:47:14 +0000"  >&lt;p&gt;If there are duplicates in a unique index, wouldn&apos;t it be more efficient to fail the moment we detect the first duplicate?&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10420">
                    <name>Backports</name>
                                            <outwardlinks description="backported by">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10520">
                    <name>Problem/Incident</name>
                                            <outwardlinks description="causes">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_12450" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Backport Requested</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="18953"><![CDATA[v4.4]]></customfieldvalue>
    <customfieldvalue key="16775"><![CDATA[v4.2]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[5002K00000nBzCZQA0, 5002K00000nq1YoQAI, 5006R00001qmSfZQAU]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Thu, 9 Jul 2020 14:40:57 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        3 years, 22 weeks, 1 day ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_17050" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Downstream Team Attention</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16941"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>luke.bonanomi@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            3 years, 22 weeks, 1 day ago
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_16465" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Linked BF Score</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>asya.kamsky@mongodb.com</customfieldvalue>
            <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>gregory.noma@mongodb.com</customfieldvalue>
            <customfieldvalue>louis.williams@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hxkf8f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hr540n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="3940">Execution Team 2020-07-27</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hxk1hr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>