<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 06:37:08 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-77972] Investigate that PreImagesTruncateManager on Secondaries takes PBWM lock</title>
                <link>https://jira.mongodb.org/browse/SERVER-77972</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;During truncate marker initiailisation on a secondary, the PBWM lock is acquired in MODE_IS by default.&lt;/p&gt;

&lt;p&gt;A deadlock can happen as follows:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;There is a large prepared transaction, the primary waits for the commit to be replicated&lt;/li&gt;
	&lt;li&gt;The OplogApplier on the secondary tries to apply a new batch with the commit, but needs to acquire the PBWM lock in MODE_X&lt;/li&gt;
	&lt;li&gt;The ChangeStreamExpiredPreImagesRemover initialisation acquired the PBWM lock in MODE_IS, but gets stuck &lt;a href=&quot;https://github.com/mongodb/mongo/blob/1b4a551a6b8c85611e26857217ce1a1e1363e716/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp#L374&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;waiting&lt;/a&gt; for the prepared transaction to commit or abort, but it can&apos;t because it needs the PBWM lock.&lt;/li&gt;
&lt;/ul&gt;
</description>
                <environment></environment>
        <key id="2364562">SERVER-77972</key>
            <summary>Investigate that PreImagesTruncateManager on Secondaries takes PBWM lock</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="haley.connelly@mongodb.com">Haley Connelly</assignee>
                                    <reporter username="haley.connelly@mongodb.com">Haley Connelly</reporter>
                        <labels>
                    </labels>
                <created>Fri, 9 Jun 2023 22:10:43 +0000</created>
                <updated>Sun, 29 Oct 2023 21:20:14 +0000</updated>
                            <resolved>Mon, 24 Jul 2023 09:02:26 +0000</resolved>
                                                    <fixVersion>7.1.0-rc0</fixVersion>
                                                        <votes>0</votes>
                                    <watches>7</watches>
                                                                                                                <comments>
                            <comment id="5583322" author="xgen-internal-githook" created="Fri, 21 Jul 2023 17:00:06 +0000"  >&lt;p&gt;Author: &lt;/p&gt;
{&apos;name&apos;: &apos;Haley Connelly&apos;, &apos;email&apos;: &apos;haley.connelly@mongodb.com&apos;, &apos;username&apos;: &apos;haleyConnelly&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-77972&quot; title=&quot;Investigate that PreImagesTruncateManager on Secondaries takes PBWM lock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-77972&quot;&gt;&lt;del&gt;SERVER-77972&lt;/del&gt;&lt;/a&gt; Prevent pre-image truncation from taking PBWM&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/4ff092d683be418230ef28fa3f3c81833b82c570&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/4ff092d683be418230ef28fa3f3c81833b82c570&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5539905" author="haley.connelly" created="Mon, 3 Jul 2023 08:49:37 +0000"  >&lt;p&gt;This is also likely an issue with change collections. Initially I was planning to do this in this ticket, but decided it deserves its own ticket and filed&#160;&lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-78599&quot; title=&quot;Prevent change collections from truncating inconsistent data&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-78599&quot;&gt;&lt;del&gt;SERVER-78599&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="5509028" author="haley.connelly" created="Mon, 19 Jun 2023 11:02:52 +0000"  >&lt;p&gt;To add some context on why all_durable isn&apos;t sufficient for secondaries (please, correct me if this seems wrong).&lt;/p&gt;


&lt;ul&gt;
	&lt;li&gt;WiredTiger &lt;a href=&quot;https://github.com/10gen/mongo/blob/b0b2b25821f55ee213114e3283fcf46fb04ba618/src/third_party/wiredtiger/src/txn/txn.c#L1780-L1785&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;publishes&lt;/a&gt; all durable at the end of every WT commit&lt;/li&gt;
&lt;/ul&gt;


&lt;ul&gt;
	&lt;li&gt;
	&lt;ul&gt;
		&lt;li&gt;In Secondary oplog application, entries are applied in parallel.&lt;/li&gt;
		&lt;li&gt;If WiredTiger doesn&apos;t know there is a TS(50) opTime for an oplog batch which has already applied/ committed TS(51), the all durable could be TS(51) despite TS(50) not being committed&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="5501386" author="haley.connelly" created="Thu, 15 Jun 2023 12:58:55 +0000"  >&lt;p&gt;Proposed fix (collaboration with &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=suganthi.mani%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;suganthi.mani@mongodb.com&quot;&gt;suganthi.mani@mongodb.com&lt;/a&gt;):&lt;br/&gt;
(1) Skip Acquiring PBMW lock for marker initialisation&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;
	&lt;ul&gt;
		&lt;li&gt;Setting the ReadSource to lastApplied isn&apos;t necessary provided we do part (2).&#160;&#160;&lt;/li&gt;
		&lt;li&gt;Truncate marker initalisation is an estimation, and if it initialisation opens a cursor mid oplog batch, we miss reading some entries, which is inconsequential.&#160;&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;(2) Only truncate range if marker.lastRecord &amp;lt;= all_durable &amp;amp;&amp;amp; marker.lastRecord &amp;lt;= lastApplied&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;
	&lt;ul&gt;
		&lt;li&gt;
		&lt;ul&gt;
			&lt;li&gt;all_durable: For the primary, ensures that all writes with opTimes less than marker.lastRecord are committed and there are no holes.&lt;/li&gt;
			&lt;li&gt;lastApplied: For the secondaries, without the PBMW lock, we can read in-between an oplog batch. Since lastApplied is only updated after each oplog batch, this guarantees the truncate range is in a &lt;a href=&quot;https://github.com/mongodb/mongo/blob/78fd67b9a4b4e13bb03d5c8ed5240f75ca493d58/src/mongo/db/catalog/README.md#secondary-reads&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;consistent state&lt;/a&gt;.&#160;&lt;/li&gt;
		&lt;/ul&gt;
		&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="5495905" author="haley.connelly" created="Tue, 13 Jun 2023 16:41:12 +0000"  >&lt;p&gt;The goal should be to ensure that a key within a range cannot be written after the range is truncated to prevent inconsistent views of&#160; the data.&lt;/p&gt;</comment>
                            <comment id="5495896" author="haley.connelly" created="Tue, 13 Jun 2023 16:38:05 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=suganthi.mani%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;suganthi.mani@mongodb.com&quot;&gt;suganthi.mani@mongodb.com&lt;/a&gt; Thanks so much for laying out this scenario! We discussed this in a bit more detail offline and I think we&apos;ve landed on a solution.&lt;/p&gt;

&lt;p&gt;The scenario where TS(200) would be truncated before TS(100) is committed would be very unlikely, but more likely the smaller the &apos;expireAfterSeconds&apos;.&#160;&lt;/p&gt;

&lt;p&gt;During truncate initialisation, we could use &apos;lastApplied&apos; as a read source, and wait for it to become durable before performing our scan. This could ensure that out of order inserts during secondary oplog application do not impact initialisation.&#160;&#160;&lt;/p&gt;

&lt;p&gt;On top of this, I&apos;ve filed&#160;&lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-78042&quot; title=&quot;Truncates on change collections and pre-images collections may truncate inconsistent ranges&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-78042&quot;&gt;&lt;del&gt;SERVER-78042&lt;/del&gt;&lt;/a&gt; to address the potential issue of trying to truncate a record whose timestamp is greater than the last durable timestamp.&#160;&lt;/p&gt;</comment>
                            <comment id="5492596" author="suganthi.mani" created="Mon, 12 Jun 2023 17:20:11 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=haley.connelly%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;haley.connelly@mongodb.com&quot;&gt;haley.connelly@mongodb.com&lt;/a&gt; After speaking to you last Friday, I realized that not taking PBWM lock during the truncate marker initialization, can cause Changestream(CS) cursors to skip some entries (assuming that truncate range is used for both Serverless change collection and pre-image collection). Consider this scenario.&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;secondary starts to apply a batch containing operations at TS(50) ,TS(100) and TS(200)&lt;/li&gt;
	&lt;li&gt;oplog applier applies TS(200) but not yet applied TS(50) &amp;amp; TS(100).&lt;/li&gt;
	&lt;li&gt;&#160;Truncate marker initialization not taking PBWM allows to read these out-of-order writes which can cause WT truncate all records till TS(200).&lt;/li&gt;
	&lt;li&gt;Now. oplog applier&#160; applies TS(50) and TS(100).&lt;/li&gt;
	&lt;li&gt;Before the client tries to re-establish CS cursor on this secondary, the last read entry by the cursor is TS(50). Since TS(50) entry is&#160; present, CS cursor can resume on this secondary but now miss reporting TS(200) op to the client.&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;To be noted, we can have the same problem on primary even with PBWM lock held and if the read snapshot is lastApplied (not a no-hole point). Consider this scenario.&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;Node1 is primary, CS cursor last read entry is TS(50) (which is majority committed)&lt;/li&gt;
	&lt;li&gt;Node1 commits TS(200), then truncates the change entries till TS(200).&lt;/li&gt;
	&lt;li&gt;Node1 then commits TS(100).&lt;/li&gt;
	&lt;li&gt;TS(200) managed to get majority committed.&lt;/li&gt;
	&lt;li&gt;Say node1 crashes,&#160; CS cursor resumes on node2 and reads till TS(100).&lt;/li&gt;
	&lt;li&gt;Now, node2&#160; crashes and CS cursor able to resume on node1 as it has TS(100), resulting in miss reporting TS(200) to client.&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=max.hirschhorn%40mongodb.com&quot; class=&quot;user-hover&quot; rel=&quot;max.hirschhorn@mongodb.com&quot;&gt;max.hirschhorn@mongodb.com&lt;/a&gt;&#160; and I think, the underlying problem in both cases is that we can have new writes with recordID older than &lt;a href=&quot;https://github.com/10gen/mongo/blob/c7a7b72562e867e83f7a5ff023e7b47b0ee70e56/src/mongo/db/storage/collection_truncate_markers.h#L82&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Marker.lastRecord&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                                                <inwardlinks description="is depended on by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="2366984">SERVER-78042</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="2366984">SERVER-78042</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="2382381">SERVER-78599</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="2398946">SERVER-79234</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>3.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 12 Jun 2023 17:20:11 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        28 weeks, 5 days ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_17050" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Downstream Team Attention</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="16941"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10857" key="com.pyxis.greenhopper.jira:gh-epic-link">
                        <customfieldname>Epic Link</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>PM-3092</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>luke.bonanomi@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            28 weeks, 5 days ago
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_16465" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Linked BF Score</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>114.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>haley.connelly@mongodb.com</customfieldvalue>
            <customfieldvalue>suganthi.mani@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i2d8q7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|i1r8mz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_22250" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Special Downgrade Instructions Required</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="23343"><![CDATA[Not Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="7535">Execution EMEA Team 2023-07-10</customfieldvalue>
    <customfieldvalue id="7536">Execution EMEA Team 2023-07-24</customfieldvalue>
    <customfieldvalue id="7537">Execution EMEA Team 2023-08-07</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i2cuvj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>