<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 02:59:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-2327] MongoDB stuck after db.getServerStatus().globalLock.currentQueue.writers exceeds 128 (windows)</title>
                <link>https://jira.mongodb.org/browse/SERVER-2327</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;Reproduce using java program : &lt;a href=&quot;http://pastie.org/1428863&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://pastie.org/1428863&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Change CONNECTION_COUNT to 128 and the problem disappears, 129 or higher and the problem occurs 100% of the time on fast machines. Slower machines tend to not reach the 128 current lock point.&lt;/p&gt;

&lt;p&gt;db.serverStatus().globalLock.currentQueue.writers will show 128 and not recover, regardless of load or wait time. Verify by doing db.test.getIndexes(), it never returns after this.&lt;/p&gt;

&lt;p&gt;mongostats output after test to confirm load is in fact gone : &lt;a href=&quot;http://pastie.org/1428842&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://pastie.org/1428842&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Other hosts can successfully connect to the same mongo instance.&lt;/p&gt;</description>
                <environment>Reproduced on Windows 7 64-bit on 5 seperate machines.</environment>
        <key id="14148">SERVER-2327</key>
            <summary>MongoDB stuck after db.getServerStatus().globalLock.currentQueue.writers exceeds 128 (windows)</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.mongodb.org/images/icons/priorities/critical.svg">Critical - P2</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="9">Done</resolution>
                                        <assignee username="dwight@mongodb.com">Dwight Merriman</assignee>
                                    <reporter username="remonvv">Remon van Vliet</reporter>
                        <labels>
                    </labels>
                <created>Tue, 4 Jan 2011 18:54:50 +0000</created>
                <updated>Tue, 12 Jul 2016 00:18:51 +0000</updated>
                            <resolved>Tue, 30 Aug 2011 01:29:54 +0000</resolved>
                                                    <fixVersion>2.0.0-rc0</fixVersion>
                                    <component>Concurrency</component>
                                        <votes>3</votes>
                                    <watches>7</watches>
                                                                                                                <comments>
                            <comment id="51409" author="remonvv" created="Tue, 30 Aug 2011 08:09:09 +0000"  >&lt;p&gt;Hi, I&apos;ll confirm resolution when I get around to it. Thanks!&lt;/p&gt;</comment>
                            <comment id="51374" author="eliot" created="Tue, 30 Aug 2011 01:29:54 +0000"  >&lt;p&gt;Can someone who had this issue confirm its fixed in 2.0.0-rc0&lt;/p&gt;</comment>
                            <comment id="50157" author="dwight_10gen" created="Tue, 23 Aug 2011 15:28:04 +0000"  >&lt;p&gt;i think this is fixed.  slimreaderwriter would be better but requires windows server 2008 R2.  i do want to use it for a simple_rwlock as it&apos;s better &amp;#8211; for mmmutex specifically &amp;#8211; so i&apos;m leaving this open until then.  that will be 2.2.  (doing a simple rwlrock with no &apos;try&apos; supports slightly older OS versions)&lt;/p&gt;</comment>
                            <comment id="46516" author="eliot" created="Fri, 5 Aug 2011 00:01:14 +0000"  >&lt;p&gt;Code is done - just need a newer machine to build on.&lt;/p&gt;</comment>
                            <comment id="40231" author="dwight_10gen" created="Fri, 1 Jul 2011 19:17:01 +0000"  >&lt;p&gt;now works up to ~ 1000 connections but a more elaborate fix, or the slimreaderwriter locks (windows7 and 2008r2 only) required for complete fix.&lt;/p&gt;</comment>
                            <comment id="26214" author="remonvv" created="Fri, 18 Mar 2011 13:21:46 +0000"  >&lt;p&gt;Just attempted a repro on 1.8.0. The hard freezes seem to be gone but there are quite a few other issues surfacing under load now. Also, the ar|aw in mongostats does still display slowly increasing numbers even when idling after a load test. I&apos;ll report the issues I&apos;m running into now seperately.&lt;/p&gt;</comment>
                            <comment id="26157" author="remonvv" created="Thu, 17 Mar 2011 21:21:55 +0000"  >&lt;p&gt;I&apos;ll try and reproduce on 1.8 when I get round to it. Thanks.&lt;/p&gt;</comment>
                            <comment id="26114" author="auto" created="Thu, 17 Mar 2011 17:18:23 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{u&apos;login&apos;: u&apos;dwight&apos;, u&apos;name&apos;: u&apos;Dwight&apos;, u&apos;email&apos;: u&apos;dwight@10gen.com&apos;}
&lt;p&gt;Message: comments &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-2327&quot; title=&quot;MongoDB stuck after db.getServerStatus().globalLock.currentQueue.writers exceeds 128 (windows)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-2327&quot;&gt;&lt;del&gt;SERVER-2327&lt;/del&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/12a4af1c493d77ea54419dada3c46571d4cb7abb&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/12a4af1c493d77ea54419dada3c46571d4cb7abb&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="26113" author="dwight_10gen" created="Thu, 17 Mar 2011 17:16:04 +0000"  >&lt;p&gt;some work was done on this and is in 1.8.  can you try 1.8 and LMK if it still happens.  Specifically a patch to boost mutex: &lt;/p&gt;

&lt;p&gt;// in rwlock.h&lt;br/&gt;
#if defined(_WIN32)&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;include &quot;shared_mutex_win.hpp&quot;&lt;br/&gt;
namespace mongo {&lt;br/&gt;
    typedef boost::modified_shared_mutex shared_mutex;&lt;br/&gt;
}&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;But in the future will use slim reader writer locks on win64 which will be a better solution.&lt;/p&gt;</comment>
                            <comment id="22672" author="eliot" created="Tue, 25 Jan 2011 20:04:31 +0000"  >&lt;p&gt;The boost mutex is directly causing this behavior, so its really quite simple to follow.&lt;/p&gt;

&lt;p&gt;For the potential easy fix see:&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/blob/master/util/concurrency/rwlock.h&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/blob/master/util/concurrency/rwlock.h&lt;/a&gt;&lt;br/&gt;
look for MONGO_USE_SRW_ON_WINDOWS&lt;/p&gt;</comment>
                            <comment id="22671" author="dmarien" created="Tue, 25 Jan 2011 20:00:29 +0000"  >&lt;p&gt;Can you provide more information about why the boost mutex implementation is broken on Windows?&lt;/p&gt;

&lt;p&gt;I&apos;m also curious why your fix is only available for Win 7?  Can you point out the part of the mongodb code you&apos;re seeing as the cause of the deadlock/mutex issue?&lt;/p&gt;</comment>
                            <comment id="22667" author="remonvv" created="Tue, 25 Jan 2011 19:04:27 +0000"  >&lt;p&gt;Reproduction video.&lt;/p&gt;</comment>
                            <comment id="22661" author="remonvv" created="Tue, 25 Jan 2011 17:54:30 +0000"  >&lt;p&gt;It&apos;s more than a stats problem. After running the repro any operation on the database never returns. So doing a db.test.find() will not return at all from the shell nor will any other query. &lt;/p&gt;

&lt;p&gt;Basically, if the server reaches this point it has to be restarted before it can be used again. It accepts connections just fine though, but that&apos;s to be expected if it&apos;s a lock mutex issue of some sort. I think both Scott and Doug succesfully reproduced the issue and getting the server stuck. The reproduction code gets the server in this state on all the machines I&apos;ve tried (64-bit, 32-bit is a bit hit and miss).&lt;/p&gt;

&lt;p&gt;It doesn&apos;t cause a problem for me personally since it&apos;s only our dev load test boxes that get stuck, we deploy to linux environments only. I do think in it&apos;s current state the issue should be mentioned in the downloads section at least, and this JIRA entry elevated to BROKEN. I understand that you don&apos;t want a rushed fix though so if 1.9.X is the first possible version with a fix then so be it.&lt;/p&gt;

&lt;p&gt;Let me know if I can help you guys with a repro. I could show you the issue with a desktop cast even if you&apos;d like and are having trouble repro-ing on local machines.&lt;/p&gt;

&lt;p&gt;EDIT: The plot thickens. I cannot reproduce the issue (stats or stuck) at the moment using the same machine, data files and the same code. The only &quot;change&quot; is that a windows update ran yesterday. Can anyone still reproduce this?&lt;/p&gt;

&lt;p&gt;EDIT2: Never mind, just takes a lot longer for some reason.&lt;/p&gt;</comment>
                            <comment id="22656" author="eliot" created="Tue, 25 Jan 2011 17:31:21 +0000"  >&lt;p&gt;Does it cause any actual problems for you?&lt;br/&gt;
In our testing its just a stats problem.&lt;/p&gt;

&lt;p&gt;The not so simple fix is re-writing all the mutex stuff with basic windows primitives.&lt;/p&gt;

&lt;p&gt;Given there are people running in production well now, we feel thats too risky a change to put in 1.8.0 given its in the final stretch.&lt;/p&gt;</comment>
                            <comment id="22653" author="remonvv" created="Tue, 25 Jan 2011 17:13:33 +0000"  >&lt;p&gt;Hm, sorry to hear there&apos;s no simple fix available.  I previously checked for known issues with boost mutexes and/or it&apos;s usage in windows and I couldn&apos;t find any so it&apos;s must be a rare issue. &lt;/p&gt;

&lt;p&gt;Are there other &quot;not so simple&quot; fixes available? In it&apos;s current state I would not consider the windows builds production ready so the appropriate warnings should be added to the download page.&lt;/p&gt;</comment>
                            <comment id="22649" author="eliot" created="Tue, 25 Jan 2011 17:06:26 +0000"  >&lt;p&gt;Yes, both 32 and 64bit.  &lt;br/&gt;
The fix we&apos;ve got only works (compiles) on win7, so isn&apos;t really a viable fix.&lt;/p&gt;</comment>
                            <comment id="22648" author="andrewk" created="Tue, 25 Jan 2011 17:04:03 +0000"  >&lt;p&gt;does the &quot;There is no simple fix for this&quot; comment apply to both 64bit AND 32bit machines? previously you indicated something might have been done to fix this under only 64-bit. does that comment no longer apply?&lt;/p&gt;</comment>
                            <comment id="22645" author="eliot" created="Tue, 25 Jan 2011 16:48:02 +0000"  >&lt;p&gt;There is no simple fix for this.&lt;br/&gt;
Its a bug either in boost or a windows library boost is using, so swapping out is fairly complex.&lt;/p&gt;</comment>
                            <comment id="22442" author="remonvv" created="Mon, 17 Jan 2011 11:08:07 +0000"  >&lt;p&gt;What about 32-bit windows? We&apos;re able to reproduce it on that environment as well as mentioned above.&lt;/p&gt;

&lt;p&gt;I&apos;ll confirm fix on 64-bit windows when I have time once 1.7.5 becomes available.&lt;/p&gt;</comment>
                            <comment id="22386" author="eliot" created="Sun, 16 Jan 2011 04:56:35 +0000"  >&lt;p&gt;boost on windows is broken.&lt;br/&gt;
we&apos;ve replaced the lock on 64-bit windows&lt;/p&gt;</comment>
                            <comment id="22080" author="remonvv" created="Wed, 5 Jan 2011 11:46:46 +0000"  >&lt;p&gt;Seems to be isolated to Windows 64 platforms. Not repro&apos;d on linux systems or Win 32. Win 32 seems to show connection starvation though. Will look into it and post seperate issue if needed.&lt;/p&gt;

&lt;p&gt;EDIT: Scratch that, confirmed on 32 bits windows as well but takes a faster machine to get it there.&lt;/p&gt;</comment>
                            <comment id="22078" author="remonvv" created="Wed, 5 Jan 2011 09:47:16 +0000"  >&lt;p&gt;Official binaries for me, specifically 1.6.3, 1.6.5 and 1.7.4. &lt;/p&gt;

&lt;p&gt;System 1 :&lt;br/&gt;
OS : Windows 7 Pro 64-bit. &lt;br/&gt;
CPU : AMD Phenom II X4 965 Black Edition&lt;br/&gt;
MEM : 8Gb&lt;br/&gt;
DISK : 64Gb Intel X-25E SSD&lt;br/&gt;
Mongo : 1.6.5 and 1.7.4&lt;/p&gt;


&lt;p&gt;System 2 :&lt;br/&gt;
OS : Windows 7 Home Premium 64-bit. &lt;br/&gt;
CPU : Intel Core i7 920 2.6GHz&lt;br/&gt;
MEM : 12Gb&lt;br/&gt;
DISK : 2x 64Gb Intel X-25E SSD RAID0&lt;br/&gt;
Mongo : 1.6.3&lt;/p&gt;


&lt;p&gt;System 1 takes about 7 seconds to reach the lock, system 2 almost instantly.&lt;/p&gt;</comment>
                            <comment id="22068" author="eliot" created="Wed, 5 Jan 2011 04:32:51 +0000"  >&lt;p&gt;Seems there might be a bug in boost mutex.&lt;br/&gt;
Going to try some things&lt;/p&gt;</comment>
                            <comment id="22051" author="dmarien" created="Tue, 4 Jan 2011 21:56:40 +0000"  >&lt;p&gt;Also for me it&apos;s not an immediate repro sometimes because my machine is doing other things but letting it run for a bit and then starting a shell to issue some queries seems to trigger it faster.&lt;/p&gt;</comment>
                            <comment id="22050" author="dmarien" created="Tue, 4 Jan 2011 21:54:00 +0000"  >&lt;p&gt;I&apos;m using the official Windows 64-bit binaries for 1.6.3 and 1.7.4.&lt;/p&gt;

&lt;p&gt;OS: Windows Vista Business 64-bit SP2&lt;br/&gt;
Processor: Intel Core i7 920 2.6GHz&lt;br/&gt;
Memory: 12GB&lt;/p&gt;

&lt;p&gt;My quick code port in python+pymongo: &lt;a href=&quot;http://pastie.org/1429529&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://pastie.org/1429529&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="22048" author="eliot" created="Tue, 4 Jan 2011 21:06:55 +0000"  >&lt;p&gt;Also, was this with official binaries?&lt;/p&gt;</comment>
                            <comment id="22047" author="eliot" created="Tue, 4 Jan 2011 21:03:04 +0000"  >&lt;p&gt;Can you paste exact OS/Mongo versions.&lt;br/&gt;
We can&apos;t reproduce.&lt;/p&gt;</comment>
                            <comment id="22033" author="dmarien" created="Tue, 4 Jan 2011 19:05:39 +0000"  >&lt;p&gt;I&apos;m also able to reproduce this on Windows Vista 64-bit quad-core using 1.6.3 and 1.7.4 using that test re-written for python+pymongo.&lt;/p&gt;

&lt;p&gt;Seems to be a timing issue because if I introduce some load on the machine then I&apos;m unable to reproduce the lockup.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="10736" name="mongostuck.avi" size="8602778" author="remonvv" created="Tue, 25 Jan 2011 19:04:27 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>28.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 4 Jan 2011 19:05:39 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        12 years, 25 weeks, 1 day ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>ramon.fernandez@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            12 years, 25 weeks, 1 day ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10000" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Old_Backport</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10000"><![CDATA[No]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[Windows]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>andrewk</customfieldvalue>
            <customfieldvalue>auto</customfieldvalue>
            <customfieldvalue>dmarien</customfieldvalue>
            <customfieldvalue>dwight@mongodb.com</customfieldvalue>
            <customfieldvalue>eliot</customfieldvalue>
            <customfieldvalue>remonvv</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrp8xr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hrii1j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>21575</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrnvav:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>