<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 03:04:39 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-4000] command to change shard key of a collection</title>
                <link>https://jira.mongodb.org/browse/SERVER-4000</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;Changing shard keys is fundamentally very expensive, but a helper to do this would be useful.  The main thing needed would be to do the operation with good parallelism.&lt;/p&gt;

&lt;p&gt;first cut might require the source collection be read only during the operation.  &lt;/p&gt;

&lt;p&gt;might do something like &lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;measure what the new distribution would be like by looking at a sampled set of records from the originating collection&lt;/li&gt;
	&lt;li&gt;presplit based on statistics above&lt;/li&gt;
	&lt;li&gt;cluster wide copy of data from src to dest collection&lt;/li&gt;
	&lt;li&gt;build the index(es) for dest after the copy to make things as fast as possible&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;i suppose this is just a better version of cloneCollection which we&apos;ll want anyway.&lt;/p&gt;

</description>
                <environment></environment>
        <key id="23132">SERVER-4000</key>
            <summary>command to change shard key of a collection</summary>
                <type id="2" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14711&amp;avatarType=issuetype">New Feature</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="9">Done</resolution>
                                        <assignee username="backlog-server-sharding-nyc">[DO NOT USE] Backlog - Sharding NYC</assignee>
                                    <reporter username="dwight@mongodb.com">Dwight Merriman</reporter>
                        <labels>
                            <label>sharding-lifecycle</label>
                    </labels>
                <created>Sun, 2 Oct 2011 16:40:31 +0000</created>
                <updated>Tue, 6 Dec 2022 05:40:07 +0000</updated>
                            <resolved>Fri, 30 Jul 2021 15:55:19 +0000</resolved>
                                                    <fixVersion>5.0.0</fixVersion>
                                    <component>Sharding</component>
                                        <votes>31</votes>
                                    <watches>44</watches>
                                                                                                                <comments>
                            <comment id="3973089" author="garaudy.etienne" created="Fri, 30 Jul 2021 16:01:28 +0000"  >&lt;p&gt;We launched Resharding in MongoDB 5.0. So closing this ticket as complete.&#160;&lt;/p&gt;</comment>
                            <comment id="1161755" author="jonhyman" created="Tue, 2 Feb 2016 22:27:17 +0000"  >&lt;p&gt;It would also be helpful to be able to extend a shard key for increased cardinality. We are running into an issue where we shard on&lt;/p&gt;

{a:1}

&lt;p&gt;and have an index on &lt;/p&gt;
{a:1, b:1}

&lt;p&gt;and after two years we&apos;re starting to see some jumbo chunks. It would be nice to be able to extend the shard key to &lt;/p&gt;

{a:1, b:1}

&lt;p&gt;and have the balancer now be able to split chunks on the added cardinality.&lt;/p&gt;

&lt;p&gt;EDIT: I see &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-4246&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.mongodb.org/browse/SERVER-4246&lt;/a&gt; exists for this. I&apos;ll vote, thanks.&lt;/p&gt;</comment>
                            <comment id="1120096" author="mghosh4@illinois.edu" created="Tue, 22 Dec 2015 02:52:56 +0000"  >&lt;p&gt;Hello,&lt;/p&gt;

&lt;p&gt;I am 4th year PhD student in UIUC working with Prof. Indranil Gupta. We have worked on this problem (wrote some code and published a paper). You can find the details of the solution in this link &lt;a href=&quot;http://dprg.cs.uiuc.edu/docs/ICAC2015/Conference.pdf&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://dprg.cs.uiuc.edu/docs/ICAC2015/Conference.pdf&lt;/a&gt;. We are currently in the process of porting the code to the new Mongo version as the original code was written v 2.2. Let me know if the solution is of interest to you and we can chat about it.&lt;/p&gt;

&lt;p&gt;Thanks and Regards,&lt;br/&gt;
Mainak Ghosh.&lt;/p&gt;</comment>
                            <comment id="769854" author="tubededentifrice" created="Fri, 21 Nov 2014 10:52:53 +0000"  >&lt;p&gt;@Anne, done =&amp;gt; &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-16264&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.mongodb.org/browse/SERVER-16264&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="769430" author="annetheagile" created="Thu, 20 Nov 2014 21:31:27 +0000"  >&lt;p&gt;@Vincent, it might be  a good idea since this ticket may possibly eventually get implemented in the full version.&lt;/p&gt;</comment>
                            <comment id="769354" author="tubededentifrice" created="Thu, 20 Nov 2014 20:21:41 +0000"  >&lt;p&gt;@Anne Nope I didn&apos;t, I thought this would be enough&lt;/p&gt;</comment>
                            <comment id="769334" author="annetheagile" created="Thu, 20 Nov 2014 20:02:55 +0000"  >&lt;p&gt;@Vincent, that idea of &apos;unshard-if-one-shard&apos; sounds good. Did you make a ticket for it?&lt;/p&gt;</comment>
                            <comment id="703357" author="lizhenyu2000" created="Mon, 25 Aug 2014 23:28:03 +0000"  >&lt;p&gt;I wish there is tool to change the shard key and re-distribute a large amount of data (in TB range). I know you guys can figure this out &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.mongodb.org/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="670890" author="tubededentifrice" created="Fri, 25 Jul 2014 01:00:31 +0000"  >&lt;p&gt;At least there should be a way to unshard a collection when all chunks are on the same shard. This way, to change the shard key you&apos;d simply migrate all the chunks to a single shard, unshard, reshard and rebalance. Not ideal, but could fit some use cases.&lt;/p&gt;</comment>
                            <comment id="112977" author="eliot" created="Tue, 24 Apr 2012 04:57:28 +0000"  >&lt;p&gt;Sadly not that simple.&lt;/p&gt;

&lt;p&gt;Lets say you have 100 shards, and you want to change shard key from (a) to (b).&lt;br/&gt;
If those keys aren&apos;t related, then 99% of the data has to be moved.&lt;/p&gt;

&lt;p&gt;So, question is how to do you move data while maintaining state and keeping data live.&lt;/p&gt;

&lt;p&gt;More later if your curious...&lt;/p&gt;</comment>
                            <comment id="112703" author="mhobbs" created="Mon, 23 Apr 2012 17:24:22 +0000"  >&lt;p&gt;Regarding my earlier comment from Mar 13:&lt;/p&gt;

&lt;p&gt;I&apos;m sure there are subtleties to the implementation of chunks that I&apos;m not fully appreciating, but if there was a special move operation, though, that could move individual records from one shard to another, it would facilitate the migration of records from one key to another. Perhaps such a move operation could be the first step in solving this problem?&lt;/p&gt;

&lt;p&gt;Again, I am ignorant about the implementation of chunks, so forgive me if such an idea is naive.&lt;/p&gt;</comment>
                            <comment id="98558" author="eliot" created="Wed, 14 Mar 2012 02:04:10 +0000"  >&lt;p&gt;You still have to handle data changes.&lt;br/&gt;
If I change the shard key from (a) to (b) the documents in each chunk will be totally different.&lt;br/&gt;
The meta data changes aren&apos;t so bad - its the actual in flight data that&apos;s hard.&lt;/p&gt;</comment>
                            <comment id="98364" author="mhobbs" created="Tue, 13 Mar 2012 17:25:45 +0000"  >&lt;p&gt;One option that doesn&apos;t require write locks, triggers, or 2X storage capacity:&lt;/p&gt;

&lt;p&gt;The sharding config can have 2 shard configurations per collection - a previous configuration and a current configuration. When a collection is re-keyed, the previous configuration is frozen so that no more splits or migrations are performed based on the previous key. The collection is then re-split and moved about based on the new key. As the data is re-distributed, the previous configuration is &lt;b&gt;not&lt;/b&gt; updated - it remains frozen. &lt;/p&gt;

&lt;p&gt;When a collection has multiple shard configurations, the mongos processes would distribute operations out to several shards based on both the previous and current configurations. A record will exist in its old shard if it has not yet moved - or it could potentially exist in a new shard if it has been moved. (There is an issue here when non-multi updates, upserts, and inserts do not contain both the previous and the new shard key)&lt;/p&gt;

&lt;p&gt;When all chunks are migrated based on the new key, the previous configuration is removed.&lt;/p&gt;</comment>
                            <comment id="97993" author="eliot" created="Mon, 12 Mar 2012 19:53:01 +0000"  >&lt;p&gt;For making a key more granular we should probably add a new case as that&apos;s a lot easier to do since it requires no data movement.&lt;/p&gt;</comment>
                            <comment id="97972" author="mhobbs" created="Mon, 12 Mar 2012 19:21:22 +0000"  >&lt;p&gt;A first pass at providing this functionality might be to allow a shard key to be refined. That is, additional fields could be added to an existing key, which would allow more granularity along the existing keys.&lt;/p&gt;

&lt;p&gt;We have sometimes discovered that our collections grow in unexpected ways and that a shard key no longer fits in a 64MB chunk. We can currently increase the max chunk size, but ideally, we&apos;d like to redefine the shard key to make it more granular.&lt;/p&gt;</comment>
                            <comment id="97913" author="bugslayer" created="Mon, 12 Mar 2012 17:44:28 +0000"  >&lt;p&gt;Not needed very often, but on rare occasion it can become really important. I think the most likely use of this may be to fix a collection that was sharded on a linear key (like a MongoId) and needs to be changed to shard against something else (like a hash of that key).&lt;/p&gt;

&lt;p&gt;IMO supporting writes is probably vital. Sharding isn&apos;t likely to be used in environments where it would be OK for writes to fail for a prolonged period of time. If triggers are done first you could easily ensure that new data is copied over. The process might be:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Create a new sharded destination collection&lt;/li&gt;
	&lt;li&gt;Guess at distribution and presplit&lt;/li&gt;
	&lt;li&gt;Add insert/update/delete triggers on the source collection to clone any changes to the destination collection&lt;/li&gt;
	&lt;li&gt;Copy the data&lt;/li&gt;
	&lt;li&gt;Add the indexes to the destination collection&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Also, wondering about some stuff at the end:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Sanity checks? (verify that record counts match, possibly other checks?)&lt;/li&gt;
	&lt;li&gt;Rename the collections dest&amp;gt;&amp;gt;src? (lock needed to make this atomic?)&lt;/li&gt;
	&lt;li&gt;Remove the old source collection or not?&lt;/li&gt;
&lt;/ul&gt;
</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                                                <inwardlinks description="is depended on by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="77543">SERVER-9845</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="420692">SERVER-30856</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="24757">SERVER-4246</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="151199">SERVER-14813</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="170624">SERVER-16264</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25134"><![CDATA[Sharding NYC]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 12 Mar 2012 17:44:28 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        2 years, 27 weeks, 5 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>alexander.golin@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            2 years, 27 weeks, 5 days ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10000" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Old_Backport</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10000"><![CDATA[No]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>backlog-server-sharding-nyc</customfieldvalue>
            <customfieldvalue>AnneTheAgile</customfieldvalue>
            <customfieldvalue>dwight@mongodb.com</customfieldvalue>
            <customfieldvalue>eliot</customfieldvalue>
            <customfieldvalue>garaudy.etienne@mongodb.com</customfieldvalue>
            <customfieldvalue>bugslayer</customfieldvalue>
            <customfieldvalue>jonhyman</customfieldvalue>
            <customfieldvalue>mghosh4@illinois.edu</customfieldvalue>
            <customfieldvalue>mhobbs</customfieldvalue>
            <customfieldvalue>tubededentifrice</customfieldvalue>
            <customfieldvalue>lizhenyu2000</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrooyn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hrfynj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6025</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hsbyfj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>