<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 06:11:49 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-68819] Investigate why performance on BestBuy queries is substantially lower than with the original POC</title>
                <link>https://jira.mongodb.org/browse/SERVER-68819</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;&lt;a href=&quot;https://docs.google.com/spreadsheets/d/1bV9T-Rp3mlpAlaEcxDzcDB38_y95b2tR8scOxtDeF2A/edit#gid=0&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Original results&lt;/a&gt; from Mathias vs &lt;a href=&quot;https://docs.google.com/spreadsheets/d/1MEP1-mhUC9bHjvk9kIA6xiAHjQW42MdExOH59IJsJBM/edit#gid=2097567505&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Current results&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="2113423">SERVER-68819</key>
            <summary>Investigate why performance on BestBuy queries is substantially lower than with the original POC</summary>
                <type id="3" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14718&amp;avatarType=issuetype">Task</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="9">Done</resolution>
                                        <assignee username="ian.boros@mongodb.com">Ian Boros</assignee>
                                    <reporter username="pawel.terlecki@mongodb.com">Pawel Terlecki</reporter>
                        <labels>
                    </labels>
                <created>Sun, 14 Aug 2022 21:00:53 +0000</created>
                <updated>Mon, 14 Nov 2022 15:42:24 +0000</updated>
                            <resolved>Mon, 14 Nov 2022 15:42:24 +0000</resolved>
                                                                                        <votes>0</votes>
                                    <watches>3</watches>
                                                                                                                <comments>
                            <comment id="4978793" author="ian.boros" created="Mon, 14 Nov 2022 15:42:24 +0000"  >&lt;p&gt;Closing as &quot;done&quot; since we have a good understanding about the difference between the poc and SBE.&lt;/p&gt;</comment>
                            <comment id="4761636" author="ian.boros" created="Thu, 18 Aug 2022 19:31:21 +0000"  >&lt;p&gt;I&apos;ve also looked at the explains and run the queries through a profiler, and found the following:&lt;/p&gt;

&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;db.products.aggregate([{$match:{&apos;type&apos;: {$exists:true}}}, {$group:{_id:&apos;$type&apos;, count: {$sum:1}}}])&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;POC Hot: 818ms&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;SBE Hot: 1855ms&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;
&lt;p&gt;This query does not use the filter pushdown yet. It materializes a document, then runs the frankenMatcher on that document (which actually requires serializing it to BSON), and then does the $group. The most obvious way to improve the performance of this query is to push the $exists filter into the column. There is also a fair amount of time spent allocating and freeing values, which would be reduced if we avoid the skip materialization. There is a lot of low hanging fruit for improving the SBE implementation.&lt;/p&gt;

&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;db.products.aggregate([{$match:{&apos;type&apos;: &apos;asdf&apos;}}, {$group:{_id:&apos;$type&apos;, count: {$sum:1}}}])&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;POC Hot: 328ms&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;SBE Hot: 485ms&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;
&lt;p&gt;In this query the filter is pushed down to the scan layer. There&apos;s not as much low hanging fruit in the SBE implementation here. From what I see in the profiles, a lot of the time is spent in the VM evaluating the predicate (which we likely can optimize), and a lot of time is spent in wiredtiger. I&apos;m also seeing some time spent copying and freeing data just in to evaluate the predicate, which we should be able to avoid. The difference between the POC and SBE is also much narrower here.&lt;/p&gt;</comment>
                            <comment id="4754746" author="ian.boros" created="Tue, 16 Aug 2022 16:19:27 +0000"  >&lt;p&gt;The results labeled &quot;current&quot; actually don&apos;t use the zig zag scan at all, since that was only merged on Friday. So I&apos;ll have to re-run them. &lt;/p&gt;

&lt;p&gt;Also, Mathias&apos;s POC did a similar optimization to what we did with SBE $group pushdown since he added a special DocumentSourceColumnStore which reads directly from storage, skipping the find() layer. A good amount of the perf improvement for the warm case likely came from that. For this reason, I want to compare the latencies of the POC and master directly, instead of comparing the performance difference of collscan vs columns between the POC and master.&lt;/p&gt;</comment>
                            <comment id="4752295" author="pawel.terlecki" created="Tue, 16 Aug 2022 02:08:29 +0000"  >&lt;p&gt;I see that for $group on a single column and cold data perf is similar, 25x. So maybe other results are worse because zigzag filtering needs some work.&lt;/p&gt;

&lt;p&gt;The hot data perf is worth understanding too. Potentially less interesting for the EAP.&lt;/p&gt;</comment>
                            <comment id="4749330" author="pawel.terlecki" created="Mon, 15 Aug 2022 07:52:56 +0000"  >&lt;p&gt;I am thinking just the I/O for cold data first. Say we just retrieve a single scalar field / project : &lt;/p&gt;
{a:1}
&lt;p&gt; . I would expect this to have similar perf in both implementations.&lt;/p&gt;</comment>
                            <comment id="4749117" author="ian.boros" created="Mon, 15 Aug 2022 01:29:08 +0000"  >&lt;p&gt;A few notes for myself:&lt;br/&gt;
-Five out of the six queries should be able to use the filter pushdown. Per-column filters with SBE wasn&apos;t merged into master until Friday afternoon, so we should re-run all of the numbers.&lt;br/&gt;
-The SBE filter pushdown code will itself need to be optimized. (The patch for this was large and complicated so we decided to merge it without certain improvements, and leave those for later work).&lt;br/&gt;
-We should compare a mongod build of the POC directly against master, instead of comparing the POC % difference against the master % difference. In master we have SBE $group pushdown for collection scans which will definitely skew the results.&lt;/p&gt;</comment>
                            <comment id="4749054" author="pawel.terlecki" created="Sun, 14 Aug 2022 21:03:46 +0000"  >&lt;p&gt;cc: &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=ian.boros%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;ian.boros@mongodb.com&quot;&gt;ian.boros@mongodb.com&lt;/a&gt; &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=charlie.swanson%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;charlie.swanson@mongodb.com&quot;&gt;charlie.swanson@mongodb.com&lt;/a&gt; &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=colby.ing%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;colby.ing@mongodb.com&quot;&gt;colby.ing@mongodb.com&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 15 Aug 2022 01:29:08 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        1 year, 12 weeks, 2 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_10857" key="com.pyxis.greenhopper.jira:gh-epic-link">
                        <customfieldname>Epic Link</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>PM-2646</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>ian.boros@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            1 year, 12 weeks, 2 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>ian.boros@mongodb.com</customfieldvalue>
            <customfieldvalue>pawel.terlecki@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i1695r:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|i0p128:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="5910">QE 2022-09-19</customfieldvalue>
    <customfieldvalue id="5912">QE 2022-10-03</customfieldvalue>
    <customfieldvalue id="5914">QE 2022-10-17</customfieldvalue>
    <customfieldvalue id="5916">QE 2022-10-31</customfieldvalue>
    <customfieldvalue id="5918">QE 2022-11-14</customfieldvalue>
    <customfieldvalue id="5920">QE 2022-11-28</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i15vb3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>