<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 04:48:19 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-38224] Calls to `killSessionsAction` will fail to kill all sessions if one is already killed</title>
                <link>https://jira.mongodb.org/browse/SERVER-38224</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;The sessions &lt;a href=&quot;https://github.com/mongodb/mongo/blob/f8de654b888ef3b9a9f210499b0b8f5d727dfffd/src/mongo/db/session.h#L89&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;kill API&lt;/a&gt; was designed so that subsequent attempts to kill a session will throw &lt;tt&gt;ConflictingOperationInProgress&lt;/tt&gt; exception, until &lt;tt&gt;checkoutSessionForKill&lt;/tt&gt; is called.&lt;/p&gt;

&lt;p&gt;This causes problems for attempts to kill sessions using the &lt;a href=&quot;https://github.com/mongodb/mongo/blob/f8de654b888ef3b9a9f210499b0b8f5d727dfffd/src/mongo/db/kill_sessions_local.cpp#L58&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;killSessionsAction&lt;/tt&gt;&lt;/a&gt; call, because it will fail at the first session, which is double killed.&lt;/p&gt;</description>
                <environment></environment>
        <key id="637031">SERVER-38224</key>
            <summary>Calls to `killSessionsAction` will fail to kill all sessions if one is already killed</summary>
                <type id="1" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14703&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="kaloian.manassiev@mongodb.com">Kaloian Manassiev</assignee>
                                    <reporter username="kaloian.manassiev@mongodb.com">Kaloian Manassiev</reporter>
                        <labels>
                    </labels>
                <created>Wed, 21 Nov 2018 14:19:17 +0000</created>
                <updated>Sun, 29 Oct 2023 22:26:25 +0000</updated>
                            <resolved>Wed, 12 Dec 2018 21:16:56 +0000</resolved>
                                    <version>4.1.5</version>
                                    <fixVersion>4.1.7</fixVersion>
                                    <component>Internal Code</component>
                                        <votes>0</votes>
                                    <watches>2</watches>
                                                                                                                <comments>
                            <comment id="2088982" author="xgen-internal-githook" created="Wed, 12 Dec 2018 21:15:42 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Kaloian Manassiev&apos;, &apos;email&apos;: &apos;kaloian.manassiev@mongodb.com&apos;, &apos;username&apos;: &apos;kaloianm&apos;}
&lt;p&gt;Message: &lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-38224&quot; title=&quot;Calls to `killSessionsAction` will fail to kill all sessions if one is already killed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-38224&quot;&gt;&lt;del&gt;SERVER-38224&lt;/del&gt;&lt;/a&gt; Allow sessions to have more than one outstanding kill request&lt;/p&gt;

&lt;p&gt;Also reverts commit e13e069902099d601c6cf64def9fc374082a629e (&lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-38058&quot; title=&quot;retryable_writes_direct_write_to_config_transactions.js doesn&amp;#39;t expect a direct transactions collection write to fail&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-38058&quot;&gt;&lt;del&gt;SERVER-38058&lt;/del&gt;&lt;/a&gt; Make retryable_writes_direct_write_to_config_transactions.js expect ConflictingOperationInProgress) since it is no longer necessary.&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo/commit/fb1ed31af90557a01fdabc8e09434dca401f1c02&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo/commit/fb1ed31af90557a01fdabc8e09434dca401f1c02&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="2076148" author="siyuan.zhou@10gen.com" created="Thu, 29 Nov 2018 22:34:28 +0000"  >&lt;p&gt;Talked with Kal offline, now we use a thread pool to invalidate sessions in&#160;&lt;a href=&quot;https://jira.mongodb.org/browse/SERVER-37244&quot; title=&quot;Implement checkOutSessionForKill API and switch all non-migration callers to use it&quot; class=&quot;issue-link&quot; data-issue-key=&quot;SERVER-37244&quot;&gt;&lt;del&gt;SERVER-37244&lt;/del&gt;&lt;/a&gt;. Option 2 sounds good to me.&lt;/p&gt;</comment>
                            <comment id="2075642" author="kaloian.manassiev" created="Thu, 29 Nov 2018 17:28:37 +0000"  >&lt;p&gt;No, the contract will still be &quot;If you call kill on a session, you must call checkOutForKill after that with the provided token&quot;. Calling &quot;kill&quot; will increment a counter of how many kills are there outstanding and will expect that many checkOutForKill to be invoked after that.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;should we have different ways to kill sessions?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;This would be the equivalent to having a &quot;drain&quot; mode on the SessionCatalog where no new sessions can be created. But I think at shutdown we stop the connections anyways, so I don&apos;t think it is necessary, is it?&lt;/p&gt;</comment>
                            <comment id="2075558" author="siyuan.zhou@10gen.com" created="Thu, 29 Nov 2018 16:41:12 +0000"  >&lt;blockquote&gt;
&lt;p&gt;Do we need a way to join the session kills at shutdown time and ensure that they are all completed? This is particularly the problem in the attached BF. My claim is that we do need such mechanism and option 2 provides it.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Just to clarify, option 2 will serialize the two kills, but the first kill may not call checkOutForKill() first, so shutdown may run first and destroy the sessions. This is not the BF case, but would happen, I think. Please correct me if I&apos;m wrong. In contrast, enqueuing a LockerManager lock will guarantee the ordering.&lt;/p&gt;

&lt;p&gt;Good point on using lock manager locks for session catalog concurrency. I was just wondering if we are reinventing the wheels as we need more and more concurrency control.&lt;/p&gt;

&lt;p&gt;Another thought: should we have different ways to kill sessions? Should the shutdown kill be separated into two parts - the common kill code path (abortArbitraryTransaction) and the shutdown cleanup, then the cleanup waits after joining all checked out sessions and has a simpler synchronization.&lt;/p&gt;</comment>
                            <comment id="2075131" author="kaloian.manassiev" created="Thu, 29 Nov 2018 08:50:32 +0000"  >&lt;blockquote&gt;&lt;p&gt;For option 1, why does the second killer have to wait for the first one?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;The reason for this is exactly because the two kill paths can be different - like you mention, there are different kinds of kills currently - for shutdown, for aborting expired transactions, etc. If we attach different decorations on the session, there could be different kill code paths as well. Because of this, currently there is no way to ensure that the two execute, because if the first is running, any subsequent one (the shutdown in this case) will just get a &lt;tt&gt;SessionAlreadyKilled&lt;/tt&gt; exception.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;The original design doc also stated option 1 using exception SessionAlreadyKilled.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;This is exactly what is happening currently.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;If the second killer has to wait, I&apos;m wondering if we can model the session checkout with LockManager locks.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I think this is orthogonal to this problem - it just changes the type of synchronization to lock manager locks instead of the condition variable on the session catalog and I would like to consider it separately. Just my initial issue with this approach though is that the locks we need to use for the session will have to be of the RESOURCE_MUTEX type and they do not belong anywhere on the lock manager hierarchy, so they are not any more safe than mutex + condition_variable. Also now the SessionCatalog is also used on mongos, which doesn&apos;t have the lock manager.&lt;/p&gt;

&lt;p&gt;I am going to find a time for us to talk about this &quot;in person&quot; this or next week when I am back in the NYC office so we can resolved it quicker.&lt;/p&gt;

&lt;p&gt;Basically I think the issues we need to agree on are:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;What does it mean for session to have multiple different code paths and is it safe for one kind of &quot;kill&quot; to find the session being killed by a different kind of kill and to just ignore that, or do we need to serialize them somehow one after the other (which is that option 2 proposes)?&lt;/li&gt;
	&lt;li&gt;Do we need a way to join the session kills at shutdown time and ensure that they are all completed? This is particularly the problem in the attached BF. My claim is that we do need such mechanism and option 2 provides it.&lt;/li&gt;
	&lt;li&gt;Using lock manager locks for session catalog concurrency
	&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
		&lt;li&gt;My arguments against it are - (1) we still need the session catalog mutex in order to run scan sessions and prevent sessions checked-in/checked-out state from changing while iterating them; (2) using RESOURCE_MUTEX is not any more safe than mutex + cond_var; (3) mongos doesn&apos;t have a lock manager&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="2075059" author="siyuan.zhou@10gen.com" created="Thu, 29 Nov 2018 07:06:57 +0000"  >&lt;p&gt;If the second killer has to wait, I&apos;m wondering if we can model the session checkout with LockManager locks. For each session, there will be one resource to represent whether the session can be killed or not (SessionKill lock), another resource to represent the Session itself.&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;checkout needs SessionKill&#160;lock in IX mode and Session lock in X mode, so checkout session conflict with each other.&lt;/li&gt;
	&lt;li&gt;markKilled will enqueue a SessionKill&#160;lock in X mode, checkOutForKill waits on that, so session kill prevent normal session checkout and wait for another session kill.&lt;/li&gt;
	&lt;li&gt;markUnkillable will try to upgrade the&#160;SessionKill&#160;lock in X mode. If it fails, then it&apos;s already killed.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Using conditional variable or whatever to notify killers also makes sense. Another more aggressive design is to register a killFunction on session after interrupting the operation context. Then the running thread will execute the killFunction on checkIn(). Both the original killer and the second one use the same future to learn the kill finishes.&lt;/p&gt;</comment>
                            <comment id="2075032" author="siyuan.zhou@10gen.com" created="Thu, 29 Nov 2018 06:21:37 +0000"  >&lt;p&gt;I&apos;d prefer option 1 since double kill in option 2 overrides the kill reason and implies that different kill code paths are idempotent which may not be true, while the ultimate goal of session refactoring is to reduce the unnecessary concurrency. For option 1, why does the second killer have to wait for the first one? It&apos;s fine for direct write and expiration kill. This will be an issue for shutdown since we don&apos;t want any running&#160;asynchronous task after the shutdown. However, I don&apos;t think double kill will fix this problem, since the&#160;asynchronous kill task may run very late after the shutdown kill is called. There has to be another way to synchronize them.&lt;/p&gt;

&lt;p&gt;The original design doc also stated option 1 using exception &lt;tt&gt;SessionAlreadyKilled&lt;/tt&gt;.&lt;/p&gt;</comment>
                            <comment id="2073816" author="kaloian.manassiev" created="Wed, 28 Nov 2018 09:37:22 +0000"  >&lt;p&gt;When a caller invokes &lt;tt&gt;Session::kill&lt;/tt&gt;, the session is put in a &quot;killed&quot; state, which means it is the responsibility of this caller to perform whatever cleanup tasks they intend to do and leave it in a &quot;pristine&quot; state so to speak (if possible).&lt;/p&gt;

&lt;p&gt;Double kill would mean that if two threads do it concurrently, they both will be required to execute the cleanup tasks (serially, because of the &lt;tt&gt;checkoutSessionForKill&lt;/tt&gt; serialization). This means the second &quot;killer&quot; might not actually have anything to do. In the mean time, until the second killer runs, other user threads will not be able to use the session, because it is going to be in the &quot;marked for kill&quot; state. Regardless of this though, the double-kill operation is no different than two kills happening one after another, so even if there were an active transaction, it is still fine to kill it.&lt;/p&gt;

&lt;p&gt;In the case of the linked BF though, what happens is that a session is marked for kill because of drop of &lt;tt&gt;config.system.sessions&lt;/tt&gt; and an asynchronous task is dispatched to finish killing it, but before that task completes, the server is shutdown, which goes through all sessions to kill them, but happens to find that session already killed and throws &lt;tt&gt;ConflictingOperationInProgress&lt;/tt&gt; exception in a place, where it is not expected.&lt;/p&gt;</comment>
                            <comment id="2073597" author="siyuan.zhou@10gen.com" created="Tue, 27 Nov 2018 23:18:59 +0000"  >&lt;p&gt;With option 2, what does it mean for double kill? Is it possible that the second kill actually kills a newly started transaction after the first one? BTW, who are the two concurrent kills in this case?&lt;/p&gt;</comment>
                            <comment id="2068941" author="kaloian.manassiev" created="Wed, 21 Nov 2018 14:25:57 +0000"  >&lt;p&gt;There are two ways I can think of to fix this:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;Change the &lt;a href=&quot;https://github.com/mongodb/mongo/blob/f8de654b888ef3b9a9f210499b0b8f5d727dfffd/src/mongo/db/kill_sessions_local.cpp#L68&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;killSessionsAction&lt;/tt&gt; loop&lt;/a&gt; to catch the &lt;tt&gt;ConflictingOperationInProgress&lt;/tt&gt; exception.&lt;/li&gt;
	&lt;li&gt;Allow &lt;a href=&quot;https://github.com/mongodb/mongo/blob/f8de654b888ef3b9a9f210499b0b8f5d727dfffd/src/mongo/db/session.cpp#L46&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;Session::kill&lt;/tt&gt;&lt;/a&gt; to be called more than once, keeping a counter of these calls and each call to kill must be followed by a corresponding call to &lt;tt&gt;checkoutSessionForKill&lt;/tt&gt;.&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;I prefer option (2) because it makes the sessions API simpler to use and also because with option (1) (and with how kill works today) there is no way for a second &quot;killer&quot; to wait until the session has actually been killed, whereas with option (2) nobody will be able to use the session until all &quot;killers&quot; have completed.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=siyuan.zhou&quot; class=&quot;user-hover&quot; rel=&quot;siyuan.zhou&quot;&gt;siyuan.zhou&lt;/a&gt;, does option (2) sound good to you?&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Depends</name>
                                                                <inwardlinks description="is depended on by">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18555" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname># of Sprints</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 27 Nov 2018 23:18:59 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        5 years, 9 weeks ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>luke.bonanomi@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            5 years, 9 weeks ago
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_16465" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Linked BF Score</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>67.0</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10032" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Operating System</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10026"><![CDATA[ALL]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>xgen-internal-githook</customfieldvalue>
            <customfieldvalue>kaloian.manassiev@mongodb.com</customfieldvalue>
            <customfieldvalue>siyuan.zhou@mongodb.com</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hudf9b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hu3kbb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_23361" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Requested By</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10557" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="2611">Sharding 2018-12-03</customfieldvalue>
    <customfieldvalue id="2639">Sharding 2018-12-17</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10053" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>Time In Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hud1in:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>