<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 09:05:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[KAFKA-253] Allow Kafka Sink Connector to Execute Unordered Bulk Operations</title>
                <link>https://jira.mongodb.org/browse/KAFKA-253</link>
                <project id="16285" key="KAFKA">Kafka Connector</project>
                    <description>&lt;p&gt;Hi Team,&lt;/p&gt;

&lt;p&gt;As of now, the Sink Connector only executes operations using &quot;ordered&quot; bulk writes that guarantee the message ordering within each source topic partition.&lt;/p&gt;

&lt;p&gt;There might be circumstances where ordering is not required and executing &quot;unordered&quot; bulk operations might have benefits:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Increase performance due to the ability to parallelize &quot;unordered&quot; bulk write operations.&lt;/li&gt;
	&lt;li&gt;Avoiding the whole batch of messages to fail, specially when the operation that fails is the first one of the &quot;ordered&quot; list.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;The default should still be &quot;ordered&quot; bulk writes but I propose to add the ability to modify this behavior by adding a property to switch to &quot;unordered&quot; bulk operations. The implications of this change should be made &lt;b&gt;very&lt;/b&gt; clear in our documentation page and it might even be wise to throw a warning about message processing order in the logs.&lt;/p&gt;

&lt;p&gt;Thanks&lt;br/&gt;
Diego&lt;/p&gt;</description>
                <environment></environment>
        <key id="1886321">KAFKA-253</key>
            <summary>Allow Kafka Sink Connector to Execute Unordered Bulk Operations</summary>
                <type id="4" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14710&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="13201">Fixed</resolution>
                                        <assignee username="valentin.kovalenko@mongodb.com">Valentin Kavalenka</assignee>
                                    <reporter username="diego.rodriguez@mongodb.com">Diego Rodriguez</reporter>
                        <labels>
                            <label>internal-user</label>
                    </labels>
                <created>Wed, 29 Sep 2021 15:50:54 +0000</created>
                <updated>Sat, 28 Oct 2023 10:46:13 +0000</updated>
                            <resolved>Wed, 12 Jan 2022 16:43:56 +0000</resolved>
                                                    <fixVersion>1.7.0</fixVersion>
                                    <component>Sink</component>
                                        <votes>0</votes>
                                    <watches>6</watches>
                                                                                                                <comments>
                            <comment id="4300557" author="xgen-internal-githook" created="Wed, 19 Jan 2022 16:36:47 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Valentin Kovalenko&apos;, &apos;email&apos;: &apos;valentin.male.kovalenko@gmail.com&apos;, &apos;username&apos;: &apos;stIncMale&apos;}
&lt;p&gt;Message: Mention `bulk.write.ordered` in `CHANGELOG.md` (#98)&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/browse/KAFKA-253&quot; title=&quot;Allow Kafka Sink Connector to Execute Unordered Bulk Operations&quot; class=&quot;issue-link&quot; data-issue-key=&quot;KAFKA-253&quot;&gt;&lt;del&gt;KAFKA-253&lt;/del&gt;&lt;/a&gt;&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo-kafka/commit/acada7ae2c9ec66f38819f01c84545ca10a0f5fa&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo-kafka/commit/acada7ae2c9ec66f38819f01c84545ca10a0f5fa&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="4288632" author="xgen-internal-githook" created="Wed, 12 Jan 2022 16:42:36 +0000"  >&lt;p&gt;Author:&lt;/p&gt;
{&apos;name&apos;: &apos;Valentin Kovalenko&apos;, &apos;email&apos;: &apos;valentin.male.kovalenko@gmail.com&apos;, &apos;username&apos;: &apos;stIncMale&apos;}
&lt;p&gt;Message: Add support for the `bulk.write.ordered` sink connector property (#96)&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/browse/KAFKA-253&quot; title=&quot;Allow Kafka Sink Connector to Execute Unordered Bulk Operations&quot; class=&quot;issue-link&quot; data-issue-key=&quot;KAFKA-253&quot;&gt;&lt;del&gt;KAFKA-253&lt;/del&gt;&lt;/a&gt;&lt;br/&gt;
Branch: master&lt;br/&gt;
&lt;a href=&quot;https://github.com/mongodb/mongo-kafka/commit/09ec070f9bb153aadaf62c210e0d06be23c96445&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/mongodb/mongo-kafka/commit/09ec070f9bb153aadaf62c210e0d06be23c96445&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="4125405" author="JIRAUSER1256209" created="Thu, 14 Oct 2021 23:06:56 +0000"  >&lt;p&gt;&lt;b&gt;Problem Statement:&lt;/b&gt;&lt;/p&gt;

&lt;p&gt;The Mongo Kafka Connector sink process works with a bulk insert in an ordered fashion.&#160; As a result, if an error is encountered during the insert process (for example, violation of an unique constraint), not only does the individual record encountering the error fail, but all subsequent records in the batch also fail.&#160; Furthermore, the failure occurs without an expected error message or routing of the failed inserts to the dead letter topic.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;For example, if records 1-10 are being inserted into Mongo through the connector process &lt;em&gt;and&lt;/em&gt; record #6 already exists in the target collection, not only with record #6 fail to insert, but records 7-10 will as well. None of these 5 records (6-10) will be written to the dead letter topic.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Stories:&lt;/b&gt;&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;Should a Mongo Kafka Connector sink insert/upsert write of an individual record fail for any reason, records that fail to be written should be placed in dead letter topic.&#160; This should happen regardless of ordered or unordered&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;&lt;em&gt;Acceptance Criteria&lt;/em&gt;:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Any record that cannot be inserted or upserted into Mongo from Kafka using the Mongo sink connector should be placed into the dead letter topic for the Mongo sink connector after some configurable period of retries&lt;/li&gt;
&lt;/ul&gt;


&lt;ol&gt;
	&lt;li&gt;For any insert, all records that do not error during the write process should succeed.&#160; In the above scenario, this will result in records 1-5 and 7-10 successfully being inserted into the Mongo collection.&#160; Record #6 will fail, and should be put into dead letter as per story #1.&#160; The writes (1-5, 7-10) should happen using the ordered condition to maintain the time series nature of the source Kafka topic&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;&lt;em&gt;Acceptance Criteria&lt;/em&gt;:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;All records from the source Kafka topic are inserted into Mongo in sequential order&lt;/li&gt;
	&lt;li&gt;Any individual records that cannot be inserted (for any reason) are placed into dead letter after some period of retries (configurable)&lt;/li&gt;
	&lt;li&gt;The sink connector should have a parameter allowing the process to run in either an ordered or an unordered fashion&lt;/li&gt;
&lt;/ul&gt;


&lt;ol&gt;
	&lt;li&gt;For any insert, all records that do not error during the write process should succeed.&#160; In the above scenario, this will result in records 1-5 and 7-10 successfully being inserted into the Mongo collection.&#160; Record #6 will fail, and should be put into dead letter.&#160; These writes should happen using the unordered condition&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;&lt;em&gt;Acceptance Criteria&lt;/em&gt;:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;All records from the source Kafka topic are inserted into Mongo in any order&lt;/li&gt;
	&lt;li&gt;Any individual records that cannot be inserted (for any reason) are placed into dead letter after some period of retries (configurable)&lt;/li&gt;
	&lt;li&gt;The sink connector should have a parameter allowing the process to run in either an ordered or an unordered fashion&lt;/li&gt;
&lt;/ul&gt;


&lt;ol&gt;
	&lt;li&gt;For any insert, all records that do not error during the write process should succeed.&#160; An example of a failure condition would be violation of the unique constraint associated with the primary key for the collection. In the above scenario, this will result in records 1-5 and 7-10 successfully being inserted into the Mongo collection.&#160; Record #6 will fail, and should be put into dead letter.&#160; These writes should happen using the unordered condition.&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;&lt;em&gt;Acceptance Criteria&lt;/em&gt;:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;All records from the source Kafka topic are inserted into Mongo in any order&lt;/li&gt;
	&lt;li&gt;Any individual records that cannot be inserted (for any reason) are placed into dead letter after some period of retries (configurable)&lt;/li&gt;
	&lt;li&gt;The sink connector should have a parameter allowing the process to run in either an ordered or an unordered fashion&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Of these four stories, Story #1 is an absolute requirement &#8211; data cannot be allowed to fall on the floor without some error notification or other means by which we can track what happened to the data.&#160; Story #2 is the ideal end result.&#160; Story #3 is the next best option, with Story #4 being the minimal solution (the difference between story #3 and #4 is story #3 is generic against any error, with story #4 focusing on the specific error we have encountered.&#160; While I would recommend the more broad solution for other customers, the more focused one would be the minimum criteria).&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10320">
                    <name>Documented</name>
                                                                <inwardlinks description="is documented by">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                                                        <customfield id="customfield_13552" key="com.go2group.jira.plugin.crm:crm_generic_field">
                        <customfieldname>Case</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[[5002K00000yU9gWQAS]]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10257" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Documentation Changes</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10250"><![CDATA[Needed]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_14266" key="com.atlassian.jira.plugin.system.customfieldtypes:textarea">
                        <customfieldname>Documentation Changes Summary</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>&lt;p&gt;If implemented, this new behavior will need to be properly documented.&lt;/p&gt;</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10857" key="com.pyxis.greenhopper.jira:gh-epic-link">
                        <customfieldname>Epic Link</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>KAFKA-223</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_21553" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Quarter</customfieldname>
                        <customfieldvalues>
                                        <label>FY22Q4</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hr3ngf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>