<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 03:06:51 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-4745] Figuring out which shard to send a query to takes a long time when doing large $in queries on the shard key</title>
                <link>https://jira.mongodb.org/browse/SERVER-4745</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;The problem seems to be in ChunkManager::getShardsForQuery in chunk.cpp.  Seems to do n^2 comparisons to figure out where to send the queries.&lt;/p&gt;</description>
                <environment></environment>
        <key id="28879">SERVER-4745</key>
            <summary>Figuring out which shard to send a query to takes a long time when doing large $in queries on the shard key</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="9">Done</resolution>
                                        <assignee username="spencer@mongodb.com">Spencer Brody</assignee>
                                    <reporter username="spencer@mongodb.com">Spencer Brody</reporter>
                        <labels>
                    </labels>
                <created>Mon, 23 Jan 2012 17:48:16 +0000</created>
                <updated>Mon, 11 Jul 2016 18:35:48 +0000</updated>
                            <resolved>Fri, 10 Feb 2012 02:06:08 +0000</resolved>
                                    <version>1.8.4</version>
                    <version>2.0.2</version>
                                    <fixVersion>2.0.3</fixVersion>
                    <fixVersion>2.1.1</fixVersion>
                                    <component>Sharding</component>
                                        <votes>0</votes>
                                    <watches>5</watches>
                                                                                                                <comments>
                            <comment id="87148" author="auto" created="Fri, 10 Feb 2012 02:01:06 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{u&apos;login&apos;: u&apos;stbrody&apos;, u&apos;email&apos;: u&apos;spencer@10gen.com&apos;, u&apos;name&apos;: u&apos;Spencer T Brody&apos;}
&lt;p&gt;Message: Add unit test for &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-4745&quot; title=&quot;Figuring out which shard to send a query to takes a long time when doing large $in queries on the shard key&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-4745&quot;&gt;&lt;del&gt;SERVER-4745&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/550c8441917b58b5f09367ee28765c5bf6c0ec0a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/550c8441917b58b5f09367ee28765c5bf6c0ec0a&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87131" author="auto" created="Fri, 10 Feb 2012 00:26:54 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{u&apos;login&apos;: u&apos;stbrody&apos;, u&apos;name&apos;: u&apos;Spencer T Brody&apos;, u&apos;email&apos;: u&apos;spencer@10gen.com&apos;}
&lt;p&gt;Message: Calculating which shard(s) to send $in queries to was taking a long time. This fix changes mongos to stop limiting the shards to send to after the first $in clause - possibly sending the query to more shards than necessary, but saving time. &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-4745&quot; title=&quot;Figuring out which shard to send a query to takes a long time when doing large $in queries on the shard key&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-4745&quot;&gt;&lt;del&gt;SERVER-4745&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
Branch: v2.0&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/dd046106ba1c8de1c1ae8f5830981e18ce3dd597&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/dd046106ba1c8de1c1ae8f5830981e18ce3dd597&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="86797" author="auto" created="Thu, 9 Feb 2012 05:45:08 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{u&apos;login&apos;: u&apos;stbrody&apos;, u&apos;name&apos;: u&apos;Spencer T Brody&apos;, u&apos;email&apos;: u&apos;spencer@10gen.com&apos;}
&lt;p&gt;Message: Calculating which shard(s) to send $in queries to was taking a long time. This fix changes mongos to stop limiting the shards to send to after the first $in clause - possibly sending the query to more shards than necessary, but saving time. &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-4745&quot; title=&quot;Figuring out which shard to send a query to takes a long time when doing large $in queries on the shard key&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-4745&quot;&gt;&lt;del&gt;SERVER-4745&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/010488f6351acd460dcdcaad1919151e6bb50fa2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/010488f6351acd460dcdcaad1919151e6bb50fa2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="83223" author="remonvv" created="Mon, 30 Jan 2012 09:37:31 +0000"  >&lt;p&gt;Eliot pointed me to those issue; we&apos;re getting &quot;combinatorial limit of $in partitioning of result set exceeded&quot; errors with queries on large sharded collections with a simple (one field) shard key but multiple $in clauses. I do not see a reason for the error I&apos;m getting given my configuration but I&apos;ll keep an eye on this regardless. Full reference here : &lt;a href=&quot;http://groups.google.com/group/mongodb-user/browse_thread/thread/d86f1246c5ed4816/73c522eee341ef98#73c522eee341ef98&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://groups.google.com/group/mongodb-user/browse_thread/thread/d86f1246c5ed4816/73c522eee341ef98#73c522eee341ef98&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="81880" author="blee" created="Tue, 24 Jan 2012 03:52:18 +0000"  >&lt;p&gt;Makes sense, the level of effort should probably be reasonable given the number of shards as well?  In our case, with only four shards, we expect these large $in queries to hit all of them.&lt;/p&gt;</comment>
                            <comment id="81852" author="spencer" created="Tue, 24 Jan 2012 00:23:38 +0000"  >&lt;p&gt;Proposed solution: if there are multiple $in clauses in components of the shard key, only check all shard key combinations up to (and including) the first field with a $in.  So in the above example, you would check all values of &quot;a&quot; in &quot;aList&quot;, but you wouldn&apos;t take the cartesian product of values in &quot;aList&quot; with the values in &quot;bList&quot;.  This may result in sending the query to more shards than is necessary, but that will be worth it to avoid calculating the cartesian product of all values in the two (or more) $in lists.&lt;/p&gt;</comment>
                            <comment id="81848" author="spencer" created="Tue, 24 Jan 2012 00:03:55 +0000"  >&lt;p&gt;Seems to be a problem specifically when the shard key is compound and you&apos;re doing a query with a $in on multiple parts of the shard key.  For example, if the shard key is &lt;/p&gt;
{a:1,b:1}
&lt;p&gt;, and you do a query like &lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;db.foo.find({a : {$in : aList}, b : {$in : bList}})&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;, the mongos takes a very long time to figure out which shard(s) to send that query to.  It has to check every combination of the elements of aList and bList, so if aList is of length n and bList is of length m, then it must generate all n*m possible shard keys, then check each of them to find what shard(s) they hit.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                                                <inwardlinks description="is depended on by">
                                        <issuelink>
            <issuekey id="27240">SERVER-4555</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="30530">SERVER-4960</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 24 Jan 2012 03:52:18 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        12 years, 1 week, 6 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>ramon.fernandez@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            12 years, 1 week, 6 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>auto</customfieldvalue>
            <customfieldvalue>blee</customfieldvalue>
            <customfieldvalue>remonvv</customfieldvalue>
            <customfieldvalue>spencer@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrog27:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hrge1z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9186</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10166" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Tests Written</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10154"><![CDATA[Complete]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrixhr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>