<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 03:01:41 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-2958] Freelist algorithm causes storage fragmentation</title>
                <link>https://jira.mongodb.org/browse/SERVER-2958</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;The algorithm used for the freelist groups related sizes into &quot;buckets&quot; that are searched for a free entry.  The algorithm stops at 30 entries and then goes to the next bucket.  If all buckets are searched then a new extent is allocated.  In a high insert / delete environment where the inserts occur throughout the delay and peak and then deletes peak at a separate time (for example a session cache for a web site) this algorithm results in very large freelists where the smallest items filter to the top of each bucket.  The freelist becomes filled with items that are never reused and blocking items that can be reused.  One option is to allocate only on the bucket size (256, 512, 1024, etc.) which would guarantee that all items in the freelist are reusable.  The following pull request &lt;a href=&quot;https://github.com/mongodb/mongo/pull/37&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/pull/37&lt;/a&gt; illustrates how this could be fixed.&lt;/p&gt;</description>
                <environment></environment>
        <key id="16175">SERVER-2958</key>
            <summary>Freelist algorithm causes storage fragmentation</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="2">Won&apos;t Fix</resolution>
                                        <assignee username="backlog-server-execution">Backlog - Storage Execution Team</assignee>
                                    <reporter username="cwolfinger">Chase Wolfinger</reporter>
                        <labels>
                            <label>compaction</label>
                            <label>extents</label>
                            <label>freelist</label>
                    </labels>
                <created>Mon, 18 Apr 2011 14:59:53 +0000</created>
                <updated>Tue, 6 Dec 2022 05:44:20 +0000</updated>
                            <resolved>Fri, 14 Sep 2018 20:22:33 +0000</resolved>
                                    <version>1.8.1</version>
                                                    <component>MMAPv1</component>
                    <component>Storage</component>
                                        <votes>19</votes>
                                    <watches>28</watches>
                                                                                                                <comments>
                            <comment id="240237" author="jblackburn" created="Tue, 15 Jan 2013 10:35:22 +0000"  >&lt;p&gt;I wonder if this is the root cause of:&lt;br/&gt;
&lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-8078&quot; title=&quot;Mongo databases fileSize grows without bound, even though total dataSize doesn&amp;#39;t change&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-8078&quot;&gt;&lt;del&gt;SERVER-8078&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Note, for the example script given, useProwerOf2Sizes seems to make the space leak worse rather than better.&lt;/p&gt;</comment>
                            <comment id="166603" author="rem" created="Fri, 21 Sep 2012 05:41:09 +0000"  >&lt;p&gt;@Eliot - Ok I will try that. We have a different scenario related to this jira &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-2838&quot; title=&quot;Dropping a collection does not free up disk space&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-2838&quot;&gt;&lt;del&gt;SERVER-2838&lt;/del&gt;&lt;/a&gt; where we drop collections (a &quot;fast delete&quot; of aged data) and create new ones for new data. Will the &quot;usePowerOf2Sizes&quot; also make the reusing of the disk space more efficient for these collections?&lt;/p&gt;</comment>
                            <comment id="164459" author="eliot" created="Fri, 14 Sep 2012 14:02:09 +0000"  >&lt;p&gt;@rene - you should try usePowerOf2Sizes, should do a lot better &lt;a href=&quot;http://docs.mongodb.org/manual/reference/command/collMod/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://docs.mongodb.org/manual/reference/command/collMod/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="164405" author="rem" created="Fri, 14 Sep 2012 10:46:06 +0000"  >&lt;p&gt;Any news on when we can expect this to be fixed? This causes problems for us because we have a collection with many inserts and deletes occuring all the time. In production, the database size on disk grows with 1GB a week even though the size of the data stays approx. the same.&lt;/p&gt;</comment>
                            <comment id="123972" author="dwight_10gen" created="Tue, 29 May 2012 18:25:33 +0000"  >&lt;p&gt;fwiw in 2.2 the compact command has some options for specifying padding that may or may not be helpful for you&lt;/p&gt;</comment>
                            <comment id="123966" author="milkie" created="Tue, 29 May 2012 18:14:06 +0000"  >&lt;p&gt;In the current head of master branch, you can force a collection &apos;mycoll&apos; to allocate only on the freelist bucket size with the collMod command:&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;mycoll.runCommand( &quot;collMod&quot; , { &quot;usePowerOf2Sizes&quot; : true } )&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;
&lt;p&gt;This interface is still in flux and will probably change for the 2.2 production release.&lt;/p&gt;

&lt;p&gt;In the meantime, you can indeed pad your documents yourself such that all document sizes are an exact freelist bucket size.&lt;/p&gt;</comment>
                            <comment id="123949" author="cangove" created="Tue, 29 May 2012 17:46:41 +0000"  >&lt;p&gt;I know this is old but 2 questions on this issue.&lt;/p&gt;

&lt;p&gt;1) Is there something planned for 2.2 that will fix or help this issue? &lt;/p&gt;

&lt;p&gt;2) It seems one (hacky) way of solving this would be for me to add padding to my documents. So for example if my docuemnt is usuall 100k-1MB I could pad to make every document exactly 1MB and then the existing algorithm should work since each document is the same size.  Uses more disk space at the beginning but reduces fragmentation and lost filespace.&lt;/p&gt;
</comment>
                            <comment id="92884" author="eliot" created="Mon, 27 Feb 2012 16:58:32 +0000"  >&lt;p&gt;This patch is not something we are likely to include as is.&lt;br/&gt;
We are working on some other options for 2.2&lt;/p&gt;</comment>
                            <comment id="92784" author="purple52" created="Mon, 27 Feb 2012 14:43:09 +0000"  >&lt;p&gt;Is this patch likely to get applied to 2.0.x, or will it even make 2.2? Currently, we are facing the unpleasant prospect of regular maintenance to keep our cluster running due to this issue.&lt;/p&gt;</comment>
                            <comment id="90183" author="eliot" created="Mon, 20 Feb 2012 18:52:32 +0000"  >&lt;p&gt;@ross - dropping and creating new collections is not subject to the freelist algorithm.&lt;br/&gt;
If that&apos;s happening then there is something else going on.&lt;/p&gt;</comment>
                            <comment id="90166" author="scotthernandez" created="Mon, 20 Feb 2012 17:46:16 +0000"  >&lt;p&gt;Capped collections don&apos;t add/remove extents. They are fixed size.&lt;/p&gt;</comment>
                            <comment id="90163" author="rdickeyvii" created="Mon, 20 Feb 2012 17:42:07 +0000"  >&lt;p&gt;I wonder: would the problem described be an issue with TTL-based capped collections?  In a steady-state, you&apos;d expect a TLL-capped collection would have about as many inserts as it does deletes (maybe somewhat more inserts, if your workload grows over time).  I would expect this to cause massive bloat over time.&lt;/p&gt;

&lt;p&gt;Even our algorithm of creating a new collection for each day, then dropping old ones (essentially hacking ttl-collections by rotating them) causes disk usage graphs nearly identical to those in the google groups link above (&lt;a href=&quot;http://groups.google.com/group/mongodb-user/browse_thread/thread/69da5f4a13f1db7c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://groups.google.com/group/mongodb-user/browse_thread/thread/69da5f4a13f1db7c&lt;/a&gt;)&lt;/p&gt;</comment>
                            <comment id="90158" author="destari" created="Mon, 20 Feb 2012 17:31:38 +0000"  >&lt;p&gt;@chase nice work!&lt;/p&gt;

&lt;p&gt;Gotta get this moving.. massive time and effort waste due to not having this in.&lt;/p&gt;</comment>
                            <comment id="82670" author="dwight_10gen" created="Thu, 26 Jan 2012 21:44:01 +0000"  >&lt;p&gt;@chase thanks will take a look&lt;/p&gt;</comment>
                            <comment id="82141" author="cwolfinger" created="Tue, 24 Jan 2012 23:04:11 +0000"  >&lt;p&gt;See the following link: &lt;a href=&quot;https://github.com/cwolfinger/mongo/commit/4641dbcad5bd94b21cf11d1d37531552642fdc94&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/cwolfinger/mongo/commit/4641dbcad5bd94b21cf11d1d37531552642fdc94&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="82027" author="dwight_10gen" created="Tue, 24 Jan 2012 17:34:39 +0000"  >&lt;p&gt;what is the commit #/tag?&lt;/p&gt;</comment>
                            <comment id="81980" author="cwolfinger" created="Tue, 24 Jan 2012 16:11:01 +0000"  >&lt;p&gt;I have fwd ported my original fix to the 2.0 branch.  It is &lt;a href=&quot;https://github.com/cwolfinger/mongo&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/cwolfinger/mongo&lt;/a&gt; on github.  You need to switch to the 2.0 branch. My team has tested several million insert deletes without any leaking memory and no need to compact. &lt;/p&gt;</comment>
                            <comment id="81976" author="dwight_10gen" created="Tue, 24 Jan 2012 15:57:41 +0000"  >&lt;p&gt;until much better, (1) try 2.0 there are some minor improvements and (2) you may find the compact command helpful.&lt;/p&gt;</comment>
                            <comment id="81958" author="grmblfrz" created="Tue, 24 Jan 2012 15:00:52 +0000"  >&lt;p&gt;I have the same problem with storing PHP sessions in MongoDB. We have to periodically repair the db to free the unused space (we are using mms, i can provide the data on request).&lt;/p&gt;</comment>
                            <comment id="81548" author="jbehl@logicmonitor.com" created="Mon, 23 Jan 2012 01:27:34 +0000"  >&lt;p&gt;I&apos;ve encountered this as well while using GridFS.  See:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://groups.google.com/group/mongodb-user/browse_thread/thread/69da5f4a13f1db7c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://groups.google.com/group/mongodb-user/browse_thread/thread/69da5f4a13f1db7c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;It&apos;s going to complicate my intended use of MongoDB unfortunately...&lt;/p&gt;</comment>
                            <comment id="29491" author="cwolfinger" created="Fri, 22 Apr 2011 03:50:56 +0000"  >&lt;p&gt;HI Dwight - this is a sample set of code that generates the fragementation in java --&amp;gt;&lt;/p&gt;


&lt;p&gt;        DBCollection test = _db.getCollection(&quot;test&quot;);&lt;br/&gt;
        test.ensureIndex(new BasicDBObject().append(&quot;key&quot;, 1),&quot;pk&quot;, true);&lt;br/&gt;
        int x = 0;&lt;br/&gt;
        while (true)&lt;br/&gt;
        {&lt;br/&gt;
        	x++;&lt;br/&gt;
        	for (int i = 0; i &amp;lt; 100000; i++)&lt;/p&gt;
        	{
                        BasicDBObject medium = new BasicDBObject();
        		medium.put(&quot;key&quot;, i);
        		medium.put(&quot;abc&quot;, new byte[800]);
        		medium.put(&quot;payload&quot;, new byte[(int) (Math.random()*3000)]);
        		medium.removeField(&quot;_id&quot;);
        		test.insert(medium);
        	}
&lt;p&gt;        	for (int i = 0; i &amp;lt; 100000; i++)&lt;/p&gt;
        	{
        		test.remove(new BasicDBObject().append(&quot;key&quot;, i));
        	}
&lt;p&gt;        	CommandResult result = test.getStats();&lt;br/&gt;
        	System.out.println(&quot;Cycle complete &quot;&lt;ins&gt;x&lt;/ins&gt;&quot;, extents=&quot;&lt;ins&gt;result.get(&quot;numExtents&quot;)&lt;/ins&gt;&quot;, indexSize=&quot;&lt;ins&gt;result.get(&quot;totalIndexSize&quot;)&lt;/ins&gt;&quot;,       storageSize=&quot;+result.get(&quot;storageSize&quot;));&lt;br/&gt;
        }&lt;/p&gt;</comment>
                            <comment id="29288" author="dwight_10gen" created="Tue, 19 Apr 2011 15:41:25 +0000"  >&lt;p&gt;like some of these ideas.  first thing we need is a test script (.js) to see the level of fragmentation before/after with various changes.  would just do a bunch of operations and then look at db.coll.stats()&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                                                <inwardlinks description="is depended on by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="60926">SERVER-8078</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="51441">SERVER-7159</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="49591">DOCS-499</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="42737">DOCS-276</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="31302">SERVER-5046</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="138978">SERVER-14081</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>22.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25136"><![CDATA[Storage Execution]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[500A000000UaZ9YIAV]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 19 Apr 2011 06:27:12 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        11 years, 5 weeks, 1 day ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>alexander.golin@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            11 years, 5 weeks, 1 day ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>backlog-server-execution</customfieldvalue>
            <customfieldvalue>cwolfinger</customfieldvalue>
            <customfieldvalue>cangove</customfieldvalue>
            <customfieldvalue>purple52</customfieldvalue>
            <customfieldvalue>dwight@mongodb.com</customfieldvalue>
            <customfieldvalue>eliot</customfieldvalue>
            <customfieldvalue>destari</customfieldvalue>
            <customfieldvalue>milkie@mongodb.com</customfieldvalue>
            <customfieldvalue>jblackburn</customfieldvalue>
            <customfieldvalue>jbehl@logicmonitor.com</customfieldvalue>
            <customfieldvalue>rem</customfieldvalue>
            <customfieldvalue>rdickeyvii</customfieldvalue>
            <customfieldvalue>scotthernandez</customfieldvalue>
            <customfieldvalue>grmblfrz</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrp17z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hr8gun:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4859</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hsndw7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>