<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 06:05:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-66510] Improve performance of large $in queries in SBE</title>
                <link>https://jira.mongodb.org/browse/SERVER-66510</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;The SBE implementation requires an entire $in array to be converted into an internal ArraySet type. This is a costly operation that needs to be performed for each input query, even with enabled auto-parameterization. The new plan cache cannot help here.&lt;/p&gt;

&lt;p&gt;Relevant microbenchmarks:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Queries.UnindexedLargeInNonMatching -24.57%&lt;/li&gt;
	&lt;li&gt;Queries.UnindexedVeryLargeInSortedMatching -50.26%&lt;/li&gt;
	&lt;li&gt;Queries.UnindexedLargeInMatching -24.36%&lt;/li&gt;
&lt;/ul&gt;
</description>
                <environment></environment>
        <key id="2048053">SERVER-66510</key>
            <summary>Improve performance of large $in queries in SBE</summary>
                <type id="3" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14718&amp;avatarType=issuetype">Task</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="1" iconUrl="https://jira.mongodb.org/images/icons/statuses/open.png" description="">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="backlog-query-execution">Backlog - Query Execution</assignee>
                                    <reporter username="ethan.zhang@mongodb.com">Ethan Zhang</reporter>
                        <labels>
                    </labels>
                <created>Mon, 16 May 2022 23:52:20 +0000</created>
                <updated>Thu, 30 Mar 2023 23:06:02 +0000</updated>
                                                                                                <votes>0</votes>
                                    <watches>16</watches>
                                                                                                                <comments>
                            <comment id="5304281" author="david.storch" created="Mon, 27 Mar 2023 20:58:49 +0000"  >&lt;p&gt;The current list of benchmarks for which we still see regressions considered &quot;critical&quot; is:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Aggregation.UnindexedLargeInMatching&lt;/li&gt;
	&lt;li&gt;Aggregation.UnindexedLargeInNonMatching&lt;/li&gt;
	&lt;li&gt;Aggregation.UnindexedVeryLargeInSortedMatching&lt;/li&gt;
	&lt;li&gt;Queries.IdentityView.UnindexedLargeInMatching&lt;/li&gt;
	&lt;li&gt;Queries.IdentityView.UnindexedLargeInNonMatching&lt;/li&gt;
	&lt;li&gt;Queries.IdentityView.UnindexedVeryLargeInSortedMatching&lt;/li&gt;
	&lt;li&gt;Queries.UnindexedLargeInMatching&lt;/li&gt;
	&lt;li&gt;Queries.UnindexedLargeInNonMatching&lt;/li&gt;
	&lt;li&gt;Queries.UnindexedVeryLargeInSortedMatching&lt;/li&gt;
	&lt;li&gt;Queries.UnindexedVeryLargeInUnsortedMatching&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="4761769" author="JIRAUSER1265798" created="Thu, 18 Aug 2022 20:04:38 +0000"  >&lt;p&gt;We decided to keep the sbe $in implementation as-is, the root cause of regression is that sbe stage builder constructs an additional hash table for $in array, this allows us to use faster hash lookup during runtime, the overhead is only significant for tiny collections. (details in &lt;a href=&quot;https://docs.google.com/document/d/1RZV7n3ObL8Zt3FHX63jKQXdNFSuIY0ybs6QseZrymFU/edit?pli=1#&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Large $in Benchmark and Profiling&lt;/a&gt;)&lt;/p&gt;</comment>
                            <comment id="4664645" author="kyle.suarez" created="Thu, 7 Jul 2022 20:25:05 +0000"  >&lt;p&gt;For an indexed $in, it turns out that the system &lt;em&gt;needs&lt;/em&gt; the sort to happen. We&apos;re a little blocked here. Need to schedule a meeting to brainstorm?&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=david.storch%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;david.storch@mongodb.com&quot;&gt;david.storch@mongodb.com&lt;/a&gt; says that we could generate the index bounds unsorted and then sort those bounds, but it feels like that defeats the entire purpose of avoiding the sort.&lt;/p&gt;</comment>
                            <comment id="4635339" author="david.storch" created="Thu, 23 Jun 2022 14:47:46 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=zixuan.zhuang%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;zixuan.zhuang@mongodb.com&quot;&gt;zixuan.zhuang@mongodb.com&lt;/a&gt; has discovered that part of the problem is that even when the $in executes in SBE, the &lt;tt&gt;MatchExpression&lt;/tt&gt; code still prepares a deduplicated and sorted vector of the $in elements. This is wasted effort since it is only needed for &lt;tt&gt;MatchExpression&lt;/tt&gt; execution, but the actual execution ends up happening in SBE.&lt;/p&gt;</comment>
                            <comment id="4622942" author="pawel.terlecki" created="Fri, 17 Jun 2022 08:49:05 +0000"  >&lt;p&gt;Thanks for the details, &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=anton.korshunov%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;anton.korshunov@mongodb.com&quot;&gt;anton.korshunov@mongodb.com&lt;/a&gt;. Yes makes sense to use the same trick.&lt;/p&gt;</comment>
                            <comment id="4622869" author="anton.korshunov" created="Fri, 17 Jun 2022 08:16:14 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=pawel.terlecki%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;pawel.terlecki@mongodb.com&quot;&gt;pawel.terlecki@mongodb.com&lt;/a&gt; It&apos;s both. First of all, we need to copy (and convert to an SBE value) each element in a large $in array into an internal ArraySet type, and we need to do it even with the plan cache and auto-parameterization when we bind in input parameters (that&apos;s why the plan cache project didn&apos;t make a huge difference in recovering the performance of large $in queries). The classic engine doesn&apos;t make any copies - it creates a new vector of BSONElements which point to the same memory in the original find command BSON, and then simply sorts and dedups this vector, so we really spend time only on sorting. This issue manifests in Queries.UnindexedLargeInNonMatching and Queries.UnindexedLargeInMatching queries.&lt;/p&gt;

&lt;p&gt;Secondly, we cannot exploit the fact that the input array is already pre-sorted, so the classic engine simply bails out if the original input arrays is already sorted, while in SBE we yet again create a full copy of the $in equalities. This manifests in Queries.UnindexedVeryLargeInSortedMatching.&lt;/p&gt;

&lt;p&gt;The last time we discussed it with &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=martin.neupauer%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;martin.neupauer@mongodb.com&quot;&gt;martin.neupauer@mongodb.com&lt;/a&gt; we figured that one way to recover the perf would be to use the same approach as the classic engine and use a binary search to lookup for matches.&lt;/p&gt;</comment>
                            <comment id="4620836" author="pawel.terlecki" created="Thu, 16 Jun 2022 14:43:40 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=anton.korshunov%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;anton.korshunov@mongodb.com&quot;&gt;anton.korshunov@mongodb.com&lt;/a&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=martin.neupauer%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;martin.neupauer@mongodb.com&quot;&gt;martin.neupauer@mongodb.com&lt;/a&gt; i may be remembering wrong. is it just about the expensive copying into an internal ArraySet type or also not using the binary search when array is sorted?&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25125"><![CDATA[Query Execution]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Sat, 4 Jun 2022 05:22:29 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        45 weeks, 2 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_10857" key="com.pyxis.greenhopper.jira:gh-epic-link">
                        <customfieldname>Epic Link</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>PM-3243</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>memento@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            45 weeks, 2 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>anton.korshunov@mongodb.com</customfieldvalue>
            <customfieldvalue>backlog-query-execution</customfieldvalue>
            <customfieldvalue>david.storch@mongodb.com</customfieldvalue>
            <customfieldvalue>ethan.zhang@mongodb.com</customfieldvalue>
            <customfieldvalue>kyle.suarez@mongodb.com</customfieldvalue>
            <customfieldvalue>pawel.terlecki@mongodb.com</customfieldvalue>
            <customfieldvalue>zixuan.zhuang@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0v3pj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|i0h3fm:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="5896">QE 2022-06-13</customfieldvalue>
    <customfieldvalue id="5898">QE 2022-06-27</customfieldvalue>
    <customfieldvalue id="5900">QE 2022-07-11</customfieldvalue>
    <customfieldvalue id="5902">QE 2022-07-25</customfieldvalue>
    <customfieldvalue id="5904">QE 2022-08-08</customfieldvalue>
    <customfieldvalue id="5906">QE 2022-08-22</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0upuv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>