<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 04:12:26 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-26534] Text search uses excessive memory</title>
                <link>https://jira.mongodb.org/browse/SERVER-26534</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;As in &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-18926&quot; title=&quot;Full text search extremely slow and uses a lot of memory under WiredTiger&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-18926&quot;&gt;&lt;del&gt;SERVER-18926&lt;/del&gt;&lt;/a&gt; and &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-18961&quot; title=&quot;Avoid iterating the entire working set on every yield&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-18961&quot;&gt;&lt;del&gt;SERVER-18961&lt;/del&gt;&lt;/a&gt; create a collection containing 50 M documents totaling about 4 GB in size with a full-text index, then do a simple search for a single word on the full-text index that returns all the documents.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.mongodb.org/secure/attachment/140927/140927_text.png&quot; width=&quot;100%&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Total memory allocated excluding WT cache is roughly the size of the collection. The top four allocating stacks, accounting for most of the excess:&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;heapProfile stack44: { 0: &quot;tc_malloc&quot;, 1: &quot;mongo::mongoMalloc&quot;, 2: &quot;mongo::BSONObj::copy&quot;, 3: &quot;mongo::BSONObj::getOwned&quot;, 4: &quot;mongo::WorkingSetMember::makeObjOwnedIfNeeded&quot;, 5: &quot;mongo::TextOrStage::addTerm&quot;, 6: &quot;mongo::TextOrStage::readFromChildren&quot;, 7: &quot;mongo::TextOrStage::work&quot;, 8: &quot;mongo::TextMatchStage::work&quot;, 9: &quot;mongo::TextStage::work&quot;, 10: &quot;mongo::PlanExecutor::getNextImpl&quot;, 11: &quot;mongo::PlanExecutor::getNext&quot;, 12: &quot;mongo::FindCmd::run&quot;, 13: &quot;mongo::Command::run&quot;, 14: &quot;mongo::Command::execCommand&quot;, 15: &quot;mongo::runCommands&quot;, 16: &quot;mongo::assembleResponse&quot;, 17: &quot;mongo::MyMessageHandler::process&quot;, 18: &quot;mongo::PortMessageServer::handleIncomingMsg&quot;, 19: &quot;0x7f89ec7466aa&quot;, 20: &quot;clone&quot; }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;heapProfile stack41: { 0: &quot;tc_new&quot;, 1: &quot;mongo::WorkingSet::allocate&quot;, 2: &quot;mongo::IndexScan::work&quot;, 3: &quot;mongo::TextOrStage::readFromChildren&quot;, 4: &quot;mongo::TextOrStage::work&quot;, 5: &quot;mongo::TextMatchStage::work&quot;, 6: &quot;mongo::TextStage::work&quot;, 7: &quot;mongo::PlanExecutor::getNextImpl&quot;, 8: &quot;mongo::PlanExecutor::getNext&quot;, 9: &quot;mongo::FindCmd::run&quot;, 10: &quot;mongo::Command::run&quot;, 11: &quot;mongo::Command::execCommand&quot;, 12: &quot;mongo::runCommands&quot;, 13: &quot;mongo::assembleResponse&quot;, 14: &quot;mongo::MyMessageHandler::process&quot;, 15: &quot;mongo::PortMessageServer::handleIncomingMsg&quot;, 16: &quot;0x7f89ec7466aa&quot;, 17: &quot;clone&quot; }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;heapProfile stack46: { 0: &quot;tc_new&quot;, 1: &quot;void std::vector&amp;lt;mongo::IndexKeyDatum, std::allocator&amp;lt;mongo::IndexKeyDatum&amp;gt; &amp;gt;::_M_emplace_back_aux&amp;lt;mongo::IndexKeyDatum&amp;gt;&quot;, 2: &quot;mongo::IndexScan::work&quot;, 3: &quot;mongo::TextOrStage::readFromChildren&quot;, 4: &quot;mongo::TextOrStage::work&quot;, 5: &quot;mongo::TextMatchStage::work&quot;, 6: &quot;mongo::TextStage::work&quot;, 7: &quot;mongo::PlanExecutor::getNextImpl&quot;, 8: &quot;mongo::PlanExecutor::getNext&quot;, 9: &quot;mongo::FindCmd::run&quot;, 10: &quot;mongo::Command::run&quot;, 11: &quot;mongo::Command::execCommand&quot;, 12: &quot;mongo::runCommands&quot;, 13: &quot;mongo::assembleResponse&quot;, 14: &quot;mongo::MyMessageHandler::process&quot;, 15: &quot;mongo::PortMessageServer::handleIncomingMsg&quot;, 16: &quot;0x7f89ec7466aa&quot;, 17: &quot;clone&quot; }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;heapProfile stack45: { 0: &quot;tc_new&quot;, 1: &quot;mongo::TextOrStage::addTerm&quot;, 2: &quot;mongo::TextOrStage::readFromChildren&quot;, 3: &quot;mongo::TextOrStage::work&quot;, 4: &quot;mongo::TextMatchStage::work&quot;, 5: &quot;mongo::TextStage::work&quot;, 6: &quot;mongo::PlanExecutor::getNextImpl&quot;, 7: &quot;mongo::PlanExecutor::getNext&quot;, 8: &quot;mongo::FindCmd::run&quot;, 9: &quot;mongo::Command::run&quot;, 10: &quot;mongo::Command::execCommand&quot;, 11: &quot;mongo::runCommands&quot;, 12: &quot;mongo::assembleResponse&quot;, 13: &quot;mongo::MyMessageHandler::process&quot;, 14: &quot;mongo::PortMessageServer::handleIncomingMsg&quot;, 15: &quot;0x7f89ec7466aa&quot;, 16: &quot;clone&quot; }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;

&lt;p&gt;By experiment it appears that the amount of memory used is proportional (possibly roughly equal in size) to the number of documents returned.&lt;/p&gt;</description>
                <environment></environment>
        <key id="322216">SERVER-26534</key>
            <summary>Text search uses excessive memory</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="10038" iconUrl="https://jira.mongodb.org/images/icons/subtask.gif" description="">Backlog</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="backlog-query-integration">Backlog - Query Integration</assignee>
                                    <reporter username="bruce.lucas@mongodb.com">Bruce Lucas</reporter>
                        <labels>
                            <label>qi-text-search</label>
                            <label>query-44-grooming</label>
                            <label>storch</label>
                    </labels>
                <created>Sat, 8 Oct 2016 13:22:19 +0000</created>
                <updated>Wed, 27 Dec 2023 16:48:31 +0000</updated>
                                            <version>3.2.1</version>
                    <version>3.2.10</version>
                    <version>3.4.0-rc0</version>
                                                    <component>Text Search</component>
                                        <votes>11</votes>
                                    <watches>42</watches>
                                                                                                                <comments>
                            <comment id="5586818" author="JIRAUSER1253472" created="Mon, 24 Jul 2023 15:30:49 +0000"  >&lt;p&gt;Is this possibly related?&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-79244&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.mongodb.org/browse/SERVER-79244&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="1432440" author="david.storch" created="Fri, 11 Nov 2016 22:06:39 +0000"  >&lt;p&gt;All four stacks which Bruce pasted above are allocations made in order to setup the &lt;tt&gt;ScoreMap&lt;/tt&gt; data structure maintained by the &lt;tt&gt;TextOrStage&lt;/tt&gt;:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://github.com/mongodb/mongo/blob/r3.4.0-rc3/src/mongo/db/exec/text_or.h#L151-L152&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/blob/r3.4.0-rc3/src/mongo/db/exec/text_or.h#L151-L152&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This data structure maps from each matching document&apos;s &lt;tt&gt;RecordId&lt;/tt&gt; to a pair containing a copy of the corresponding document and its text score. We have to keep a copy of the document since during query yields the storage engine is allowed to free the memory housing the storage subsystem&apos;s copy. So it is indeed the case that text queries currently require memory proportional to the size of the result set.&lt;/p&gt;

&lt;p&gt;This behavior is baked into the current implementation of text search execution. It would require a significant overhaul to fix this in all cases. The good news is that we only need to maintain the &lt;tt&gt;ScoreMap&lt;/tt&gt; structure in order to support computation of text search relevance scores. We hold onto information about documents seen so far so that we can adjust the relevance score when we find a new index key for a document we&apos;ve already seen. This means that if the query does not request the text score, there is no need to maintain the &lt;tt&gt;ScoreMap&lt;/tt&gt;. This is part of the feature request tracked in related ticket &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-26833&quot; title=&quot;Permit non-blocking $text queries when user doesn&amp;#39;t request score projection&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-26833&quot;&gt;&lt;del&gt;SERVER-26833&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                            <outwardlinks description="depends on">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="323406">SERVER-26616</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="329920">SERVER-26923</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="291127">SERVER-24375</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="570734">SERVER-36087</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="591755">SERVER-36794</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="2399100">SERVER-79244</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="210040">SERVER-18926</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="210402">SERVER-18961</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="327756">SERVER-26833</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="148633" name="Text_search.png" size="41325" author="linda.qin@mongodb.com" created="Wed, 1 Feb 2017 00:23:47 +0000"/>
                            <attachment id="140927" name="text.png" size="171077" author="bruce.lucas@mongodb.com" created="Sat, 8 Oct 2016 13:22:19 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25467"><![CDATA[Query Integration]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[500A000000V6em4IAB, 500A000000YgTCyIAN, 500A000000ZdOaNIAV, 500A000000b9k4uIAA, 5002K00000cz46QQAQ, 5002K00000d8tuRQAQ, 5002K00000kFbXUQA0, 5002K00000ko4dWQAQ, 5002K00000msEzGQAU, 5002K00000nEECsQAO, 5002K00000npt7AQAQ, 5002K00000xmnpsQAA, 5006R00001nKLaCQAW]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Thu, 13 Oct 2016 14:53:40 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        28 weeks, 2 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[<s><a href='https://jira.mongodb.org/browse/TSEXP-1758'>TSEXP-1758</a></s>]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>ted.tuckman@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            28 weeks, 2 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>backlog-query-integration</customfieldvalue>
            <customfieldvalue>bruce.lucas@mongodb.com</customfieldvalue>
            <customfieldvalue>david.storch@mongodb.com</customfieldvalue>
            <customfieldvalue>josef.sabl@gmail.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrjtvb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hr2euf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hs5io7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>