<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 06:09:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-68125] Index build on multi-key fields can consume more memory than limit</title>
                <link>https://jira.mongodb.org/browse/SERVER-68125</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;This bug describes a problem when indexing documents that generate multiple keys with many duplicate values. We don&apos;t count these duplicate keys towards the memory we are using, which can result in using significantly more memory than intended.&lt;/p&gt;</description>
                <environment></environment>
        <key id="2093900">SERVER-68125</key>
            <summary>Index build on multi-key fields can consume more memory than limit</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="louis.williams@mongodb.com">Louis Williams</assignee>
                                    <reporter username="louis.williams@mongodb.com">Louis Williams</reporter>
                        <labels>
                    </labels>
                <created>Tue, 19 Jul 2022 07:53:56 +0000</created>
                <updated>Wed, 24 Jan 2024 20:21:53 +0000</updated>
                            <resolved>Thu, 27 Oct 2022 08:13:38 +0000</resolved>
                                    <version>5.0.9</version>
                    <version>6.0.0-rc5</version>
                                    <fixVersion>6.0.4</fixVersion>
                    <fixVersion>6.2.0-rc0</fixVersion>
                                                        <votes>0</votes>
                                    <watches>9</watches>
                                                                                                                <comments>
                            <comment id="5008109" author="xgen-internal-githook" created="Fri, 25 Nov 2022 16:11:50 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Louis Williams&apos;, &apos;email&apos;: &apos;louis.williams@mongodb.com&apos;, &apos;username&apos;: &apos;louiswilliams&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-68125&quot; title=&quot;Index build on multi-key fields can consume more memory than limit&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-68125&quot;&gt;&lt;del&gt;SERVER-68125&lt;/del&gt;&lt;/a&gt; Correctly track all memory used during index builds&lt;/p&gt;

&lt;p&gt;This allows the Sorter to manage a memory pool that can be used to allocate memory for index builds. Previously, we relied on each key to report its individual memory usage, but there are some cases where we fail to represent the actual memory used by all keys because the memory was backed by a shared buffer. This new memory pool holds references to all of the buffers it allocates and does not free them until the caller requests so, in this case when the sorter spills to disk.&lt;/p&gt;

&lt;p&gt;This strategy keeps the performance the same for index builds without many duplicate keys (due to repeated array values). In the degenerate case where we&apos;re building an index with very large duplicate keys, we will end up with more spilling than we had before, at the cost of correctly tracking our memory usage.&lt;br/&gt;
Branch: v6.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/7090cf539845798a84ee4ac6488faf66783826c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/7090cf539845798a84ee4ac6488faf66783826c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="4929159" author="xgen-internal-githook" created="Wed, 26 Oct 2022 16:34:58 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Louis Williams&apos;, &apos;email&apos;: &apos;louis.williams@mongodb.com&apos;, &apos;username&apos;: &apos;louiswilliams&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-68125&quot; title=&quot;Index build on multi-key fields can consume more memory than limit&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-68125&quot;&gt;&lt;del&gt;SERVER-68125&lt;/del&gt;&lt;/a&gt; Correctly track all memory used during index builds&lt;/p&gt;

&lt;p&gt;This allows the Sorter to manage a memory pool that can be used to allocate memory for index builds. Previously, we relied on each key to report its individual memory usage, but there are some cases where we fail to represent the actual memory used by all keys because the memory was backed by a shared buffer. This new memory pool holds references to all of the buffers it allocates and does not free them until the caller requests so, in this case when the sorter spills to disk.&lt;/p&gt;

&lt;p&gt;This strategy keeps the performance the same for index builds without many duplicate keys (due to repeated array values). In the degenerate case where we&apos;re building an index with very large duplicate keys, we will end up with more spilling than we had before, at the cost of correctly tracking our memory usage.&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/ad1192b4f6fb77d0074227c375e8e83441654f7a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/ad1192b4f6fb77d0074227c375e8e83441654f7a&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="4713876" author="JIRAUSER1265133" created="Fri, 29 Jul 2022 09:56:06 +0000"  >&lt;p&gt;From the above reproducer, we have traced it down to &lt;a href=&quot;https://github.com/10gen/mongo/blob/bcd3d82cbb04a103f776e66705c4955326000a01/src/mongo/db/index/btree_key_generator.cpp#L257-L273&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;btree_key_generator.cpp&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;Since this is a multikey index:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;For each document, the generator allocates 1000 KeyStrings (~10 bytes each)&lt;/li&gt;
	&lt;li&gt;All KeyStrings are equal (the same array value), in the end the KeyStringSet reduces down to 1 unique element, which is added to the sorter.&lt;/li&gt;
	&lt;li&gt;Maximum SharedBufferFragmentBuilder size by default is 2MB, so each KeyString in the sorter is most probably pinning (allocated in same fragment, but actually unused) 10000 bytes allocation while only accounting for ~40bytes (10 bytes for KeyString itself and 32 for KeyString::Value).&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;The reproducer inserts 250000 documents, each with a field with 1000 elements.&lt;br/&gt;
250000 Documents * 1000 KeyStrings * 10 bytes = 2384.19 MiB&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10420">
                    <name>Backports</name>
                                            <outwardlinks description="backported by">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10520">
                    <name>Problem/Incident</name>
                                            <outwardlinks description="causes">
                                        <issuelink>
            <issuekey id="2497214">SERVER-83145</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="2117818">SERVER-68982</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="2470696">SERVER-82037</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="395529" name="Screenshot 2022-08-12 at 12.05.55.png" size="120885" author="yujin.kang@mongodb.com" created="Fri, 12 Aug 2022 10:06:22 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>3.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_12450" key="com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes">
                        <customfieldname>Backport Requested</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="23470"><![CDATA[v6.0]]></customfieldvalue>
    <customfieldvalue key="21777"><![CDATA[v5.0]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 29 Jul 2022 09:56:06 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        1 year, 10 weeks, 5 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_17050" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Downstream Team Attention</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16941"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>louis.williams@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            1 year, 10 weeks, 5 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>louis.williams@mongodb.com</customfieldvalue>
            <customfieldvalue>yujin.kang@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i12xjj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|i0d3dy:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_22250" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Special Downgrade Instructions Required</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="23343"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="6173">Execution Team 2022-08-08</customfieldvalue>
    <customfieldvalue id="6328">Execution Team 2022-08-22</customfieldvalue>
    <customfieldvalue id="6329">Execution Team 2022-09-05</customfieldvalue>
    <customfieldvalue id="6330">Execution Team 2022-09-19</customfieldvalue>
    <customfieldvalue id="6331">Execution Team 2022-10-03</customfieldvalue>
    <customfieldvalue id="6332">Execution Team 2022-10-17</customfieldvalue>
    <customfieldvalue id="6333">Execution Team 2022-10-31</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i12jov:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>