<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 02:58:37 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-1982] fully support sharding on geo field</title>
                <link>https://jira.mongodb.org/browse/SERVER-1982</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;Currently &quot;works&quot; by routing geo queries to all shards. Correct results, but inefficient.&lt;/p&gt;</description>
                <environment></environment>
        <key id="13444">SERVER-1982</key>
            <summary>fully support sharding on geo field</summary>
                <type id="2" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14711&amp;avatarType=issuetype">New Feature</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="10038" iconUrl="https://jira.mongodb.org/images/icons/subtask.gif" description="">Backlog</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="backlog-query-integration">Backlog - Query Integration</assignee>
                                    <reporter username="mathias@mongodb.com">Mathias Stearn</reporter>
                        <labels>
                            <label>qi-geo</label>
                    </labels>
                <created>Wed, 20 Oct 2010 21:35:01 +0000</created>
                <updated>Thu, 28 Dec 2023 18:41:55 +0000</updated>
                                                                            <component>Geo</component>
                    <component>Sharding</component>
                                        <votes>53</votes>
                                    <watches>44</watches>
                                                                                                                <comments>
                            <comment id="5057438" author="JIRAUSER1265607" created="Thu, 15 Dec 2022 14:55:07 +0000"  >&lt;p&gt;linked to&#160;INIT-5 and send it back to QE backlog. &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=kateryna.kamenieva%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;kateryna.kamenieva@mongodb.com&quot;&gt;kateryna.kamenieva@mongodb.com&lt;/a&gt; &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=kyle.suarez%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;kyle.suarez@mongodb.com&quot;&gt;kyle.suarez@mongodb.com&lt;/a&gt; please review.&lt;/p&gt;</comment>
                            <comment id="673772" author="mkalish" created="Mon, 28 Jul 2014 17:11:18 +0000"  >&lt;p&gt;Any update on when this might make it in?  Seems like this would be vital for operating with multiple, geographically distant data centers.&lt;br/&gt;
As far as I can tell, the only real alternative is using the nearest when reading from secondaries, but this does not seem like a satisfactory solution.&lt;/p&gt;</comment>
                            <comment id="293569" author="jokeyrhyme" created="Wed, 20 Mar 2013 02:16:42 +0000"  >&lt;p&gt;Here&apos;s a scenario:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;you have a single MongoDB cluster than spans multiple data-centres across the Earth&lt;/li&gt;
	&lt;li&gt;you have application worker instances in each data-centre and geographic DNS routing&lt;/li&gt;
	&lt;li&gt;traffic to/from particular data-centres is expensive (e.g. South Africa) or slow (e.g. Singapore)&lt;/li&gt;
	&lt;li&gt;it is more important to limit network traffic to a particular zone than it is to optimise query time&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I realise you could accomplish this with separate MongoDB clusters, but then you have to maintain application-level logic to know which configurations to use and when. But this is a slippery slope: after all, I can use application-level logic to get sharding to work with MySQL.&lt;/p&gt;

&lt;p&gt;To me, the philosophy of MongoDB is to let the DB make intelligent choices about data storage and retrieval. This seems like an entirely reasonable request to increase the ability for MongoDB to make intelligent choices.&lt;/p&gt;</comment>
                            <comment id="105098" author="bgsosh@gmail.com" created="Fri, 30 Mar 2012 08:41:56 +0000"  >&lt;p&gt;I was exited to see the geo indexing capabilities of mongo, but was dissapointed that this has not been properly implemented with regards to sharding.  IMO this makes geo data a second class citizen in mongo. Are there plans for this to be implemented?&lt;/p&gt;</comment>
                            <comment id="94138" author="zantvoort" created="Thu, 1 Mar 2012 15:03:34 +0000"  >&lt;p&gt;@Eliot I don&apos;t have any performance issues at this moment. I was just checking the (theoretical) scalability properties of geo/sharding in case I do need to scale.&lt;/p&gt;

&lt;p&gt;Thanks for the response anyway!&lt;/p&gt;</comment>
                            <comment id="94108" author="eliot" created="Thu, 1 Mar 2012 13:07:46 +0000"  >&lt;p&gt;Yes - but its unclear if that&apos;s good or bad in this case.&lt;br/&gt;
Have you tried it and seen performance issues?&lt;/p&gt;</comment>
                            <comment id="94066" author="zantvoort" created="Thu, 1 Mar 2012 09:47:30 +0000"  >&lt;p&gt;You describe the benefits of sharding. I totally agree with you on those positive effects. &lt;/p&gt;

&lt;p&gt;In general, my statement boils down to: &lt;/p&gt;

&lt;p&gt;&quot;Sharding on key &apos;A&apos; and querying on (solely) key &apos;B&apos; is not efficient. It is more efficient compared to a non-partitioned approach for the reasons you describe above, but fact remains that all shards are involved in a search if the shard key is not part of the query.&quot;&lt;/p&gt;

&lt;p&gt;As long as sharding on geo key is not fully supported, it looks like Mongo behaves like this.&lt;/p&gt;</comment>
                            <comment id="93804" author="redbeard0531" created="Wed, 29 Feb 2012 16:48:24 +0000"  >&lt;p&gt;To be clear, you went from 30% to 20% load. Also, by spreading the data out you are able to keep more data in ram which is the #1 determiner of performance. Since having all data in ram is between 1000 (high-end SSD) to 1,000,000 (spinning disk) times faster so is much more important when it comes to scalability than reducing the processing that each server has to do.&lt;/p&gt;</comment>
                            <comment id="93800" author="zantvoort" created="Wed, 29 Feb 2012 16:43:10 +0000"  >&lt;p&gt;Again, taking O(log n) as an example.&lt;/p&gt;

&lt;p&gt;1000 documents, 1 shard would be:&lt;br/&gt;
Reponse time: 3&lt;br/&gt;
System load: 3&lt;/p&gt;

&lt;p&gt;1000 documents equally distribibuted of 10 shards:&lt;br/&gt;
Response time: 2&lt;br/&gt;
System load: 20&lt;/p&gt;

&lt;p&gt;I know there is more to it, but this is the point I&apos;m trying to make.&lt;/p&gt;
</comment>
                            <comment id="93796" author="zantvoort" created="Wed, 29 Feb 2012 16:34:24 +0000"  >&lt;p&gt;First of all, thanks for being responsive. This is appreciated!&lt;/p&gt;

&lt;p&gt;I&apos;m not sure what the time complexity is of geo spatial queries, but for sake of simplicity say it&apos;s O(log n). &lt;/p&gt;

&lt;p&gt;If I apply this to your suggestion, we would get the following:&lt;/p&gt;

&lt;p&gt;Geo key is used for sharding: O(log n)&lt;br/&gt;
Alternative key is used for sharding (key is not part of search query): O(s log n/s) where s is number of shards and n is total number of documents.&lt;/p&gt;

&lt;p&gt;This means that the more shards I add, the quicker each individual shard will find results (performance gain is sublinear), however this needs to be multiplied by the number of shards. &lt;/p&gt;

&lt;p&gt;So from a response time perspective (single user system, weakest link not taken into account), you are right about performance improvement using multiple shards. But from a scalability perspective, this approach will add more and more load on the system as a whole if more shards are added.&lt;/p&gt;
</comment>
                            <comment id="93780" author="eliot" created="Wed, 29 Feb 2012 16:00:02 +0000"  >&lt;p&gt;Sort of. &lt;br/&gt;
The problem is that if you search on a boundary, you still might be hitting many shards, so there is no guarantee you only hit 1 shard.&lt;/p&gt;


&lt;p&gt;If one shard is handling it, then it may have to look at 1000 documents.&lt;br/&gt;
If you have 10 shards, then each shard looks at 100, and mongos merges.&lt;/p&gt;

&lt;p&gt;Pros and cons, but I would test sharding by a different key as well.&lt;/p&gt;</comment>
                            <comment id="93752" author="zantvoort" created="Wed, 29 Feb 2012 14:56:38 +0000"  >&lt;p&gt;Hi Eliot,&lt;/p&gt;

&lt;p&gt;I would like to use the geo key for sharding, so I can lookup documents near a given location efficiently (and in a scalable way). &lt;/p&gt;

&lt;p&gt;If I understand correctly, if sharding for geo keys would be fully supported, only a single shard is accessed for getting documents near a given location (maybe two shards in case the given location is close to a range boundary). For that reason, it would be a waste of resources, if other shards are accessed, as they don&apos;t have any documents near this location.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Leon&lt;/p&gt;

</comment>
                            <comment id="93735" author="eliot" created="Wed, 29 Feb 2012 14:00:23 +0000"  >&lt;p&gt;A call to all shards scales pretty well, as each shard is doing 1/N the work.&lt;br/&gt;
So for large data sets, it can actually be considerably faster.&lt;/p&gt;</comment>
                            <comment id="93694" author="ignlg" created="Wed, 29 Feb 2012 10:10:09 +0000"  >&lt;p&gt;And it&apos;s critical since it is the common case scenario for a MongoDB setup.&lt;/p&gt;

&lt;p&gt;As an example: Foursquare uses MongoDB mainly because Sharding and Geospatial Index. (#ref &lt;a href=&quot;http://www.10gen.com/customers/foursquare&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://www.10gen.com/customers/foursquare&lt;/a&gt;)&lt;/p&gt;

&lt;p&gt;It&apos;s ironic that both features aren&apos;t compatible yet. I mean: efficient and useful, not a call to all shards.&lt;/p&gt;</comment>
                            <comment id="93692" author="zantvoort" created="Wed, 29 Feb 2012 09:57:26 +0000"  >&lt;p&gt;Hi Mathias,&lt;/p&gt;

&lt;p&gt;Since this case is created over 16 months ago &quot;Planning Bucket A&quot; is not telling me if this will be picked up within the coming 16 months or later... &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.mongodb.org/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;Leon&lt;/p&gt;</comment>
                            <comment id="93584" author="redbeard0531" created="Wed, 29 Feb 2012 00:00:36 +0000"  >&lt;p&gt;That is what this case is for.&lt;/p&gt;</comment>
                            <comment id="93575" author="zantvoort" created="Tue, 28 Feb 2012 23:29:23 +0000"  >&lt;p&gt;(Sorry for duplicating this comment, but I think this issue is better suited for it...)&lt;/p&gt;

&lt;p&gt;Are there any plans to support sharding on geo key?&lt;/p&gt;

&lt;p&gt;As long as this isn&apos;t supported, geospatial searching doesn&apos;t scale with Mongo, or am I missing something here?&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10820">
                    <name>Initiative</name>
                                                                <inwardlinks description="included in Initiative">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="11409">SERVER-926</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>17.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25467"><![CDATA[Query Integration]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[500A000000UaRvnIAF]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 28 Feb 2012 23:29:23 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        1 year, 7 weeks, 6 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>alyssa.clark@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            1 year, 7 weeks, 6 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10000" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Old_Backport</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10000"><![CDATA[No]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>backlog-query-integration</customfieldvalue>
            <customfieldvalue>eliot</customfieldvalue>
            <customfieldvalue>ignlg</customfieldvalue>
            <customfieldvalue>zantvoort</customfieldvalue>
            <customfieldvalue>mathias@mongodb.com</customfieldvalue>
            <customfieldvalue>mkalish</customfieldvalue>
            <customfieldvalue>jokeyrhyme</customfieldvalue>
            <customfieldvalue>bgsosh@gmail.com</customfieldvalue>
            <customfieldvalue>xiaochen.wu@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrpcpz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hrfzjj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6209</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hsv2av:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>