<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 03:59:27 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[SERVER-22123] Add an option to the $sample stage to specify weights to use in the sampling.</title>
                <link>https://jira.mongodb.org/browse/SERVER-22123</link>
                <project id="10000" key="SERVER">Core Server</project>
                    <description>&lt;p&gt;Specifying this option would prevent any optimized random cursor implementation from the storage engine, and would always use a top-k random sort, with the random value used to sort being multiplied by the specified weight.&lt;/p&gt;

&lt;p&gt;For example:&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;db.example.aggregate([{&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;    $sample: {&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;        size: 100,&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;        weight: &lt;/span&gt;&lt;span style=&quot;color: blue; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;&quot;$myWeightField&quot;&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;    }&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;}]);&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;</description>
                <environment></environment>
        <key id="246506">SERVER-22123</key>
            <summary>Add an option to the $sample stage to specify weights to use in the sampling.</summary>
                <type id="2" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14711&amp;avatarType=issuetype">New Feature</type>
                                            <priority id="3" iconUrl="https://jira.mongodb.org/images/icons/priorities/major.svg">Major - P3</priority>
                        <status id="10038" iconUrl="https://jira.mongodb.org/images/icons/subtask.gif" description="">Backlog</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="backlog-query-optimization">Backlog - Query Optimization</assignee>
                                    <reporter username="m3t4lukas">Lukas Wagner</reporter>
                        <labels>
                            <label>grab-bag</label>
                            <label>stage</label>
                    </labels>
                <created>Mon, 11 Jan 2016 14:50:00 +0000</created>
                <updated>Tue, 6 Dec 2022 04:36:17 +0000</updated>
                                            <version>3.2.0</version>
                                                    <component>Aggregation Framework</component>
                                        <votes>1</votes>
                                    <watches>11</watches>
                                                                                                                <comments>
                            <comment id="1154888" author="m3t4lukas" created="Wed, 27 Jan 2016 06:58:37 +0000"  >&lt;p&gt;Hi Charlie,&lt;/p&gt;

&lt;p&gt;thanks for proposing the issue internally.&lt;br/&gt;
Yeah the duplicates part is kind of a nice to have but it works without. It&apos;s just to avoid confusion with the users. But at least for me it&apos;s not critical to functionality. There might be people that may disagree with me on that part.&lt;br/&gt;
Edit: This is kind of a PS but there is weighted randomness built into boost. &lt;a href=&quot;http://www.boost.org/doc/libs/1_59_0/doc/html/boost_random/tutorial.html#boost_random.tutorial.generating_integers_with_different_probabilities&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://www.boost.org/doc/libs/1_59_0/doc/html/boost_random/tutorial.html#boost_random.tutorial.generating_integers_with_different_probabilities&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="1153435" author="charlie.swanson" created="Tue, 26 Jan 2016 00:11:11 +0000"  >&lt;p&gt;Hi Lukas,&lt;/p&gt;

&lt;p&gt;I&apos;ve proposed this internally, and if/when we all agree on the syntax and semantics, we&apos;ll work on a fix. I&apos;ve updated this ticket to reflect the revised plan. I&apos;ve also removed the backport request, since this is a new feature, and we generally do not backport new features to released versions.&lt;/p&gt;

&lt;p&gt;As for the duplicates, the &lt;tt&gt;$sample&lt;/tt&gt; stage is logically a sample without replacement, but we cannot guarantee there are not duplicates because of our isolation semantics (see &lt;a href=&quot;https://docs.mongodb.org/manual/core/read-isolation-consistency-recency/#cursor-snapshot&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;here&lt;/a&gt; for more details). This is not a trivial issue to fix, and I don&apos;t think we would want to add de-duplicating logic to only the &lt;tt&gt;$sample&lt;/tt&gt; stage, since this is a general problem that should be solved everywhere.&lt;/p&gt;</comment>
                            <comment id="1145865" author="m3t4lukas" created="Sat, 16 Jan 2016 13:58:31 +0000"  >&lt;p&gt;Hi Charlie,&lt;/p&gt;

&lt;p&gt;yes that would be great.&lt;br/&gt;
An option for allowing duplicates or not would be great, too. As far as I&apos;ve seen you have no choice but to accept that there can be duplicates right now.&lt;/p&gt;</comment>
                            <comment id="1145087" author="charlie.swanson" created="Fri, 15 Jan 2016 15:24:11 +0000"  >&lt;p&gt;Hi Lukas,&lt;/p&gt;

&lt;p&gt;I think your use case might be addressed by something like the following?&lt;/p&gt;
&lt;p/&gt;
&lt;div id=&quot;syntaxplugin&quot; class=&quot;syntaxplugin&quot; style=&quot;border: 1px dashed #bbb; border-radius: 5px !important; overflow: auto; max-height: 30em;&quot;&gt;
&lt;table cellspacing=&quot;0&quot; cellpadding=&quot;0&quot; border=&quot;0&quot; width=&quot;100%&quot; style=&quot;font-size: 1em; line-height: 1.4em !important; font-weight: normal; font-style: normal; color: black;&quot;&gt;
		&lt;tbody &gt;
				&lt;tr id=&quot;syntaxplugin_code_and_gutter&quot;&gt;
						&lt;td  style=&quot; line-height: 1.4em !important; padding: 0em; vertical-align: top;&quot;&gt;
					&lt;pre style=&quot;font-size: 1em; margin: 0 10px;  margin-top: 10px;   margin-bottom: 10px;  width: auto; padding: 0;&quot;&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;db.foo.aggregate([{$sample: {size: 10, weight: &lt;/span&gt;&lt;span style=&quot;color: blue; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;&quot;$myWeightField&quot;&lt;/span&gt;&lt;span style=&quot;color: black; font-family: &apos;Consolas&apos;, &apos;Bitstream Vera Sans Mono&apos;, &apos;Courier New&apos;, Courier, monospace !important;&quot;&gt;}}])&lt;/span&gt;&lt;/pre&gt;
			&lt;/td&gt;
		&lt;/tr&gt;
			&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p/&gt;

&lt;p&gt;This wouldn&apos;t be so hard to do. Let me know if that would work for you, and I&apos;ll confirm that this makes sense from our end.&lt;/p&gt;</comment>
                            <comment id="1134445" author="m3t4lukas" created="Wed, 13 Jan 2016 23:45:04 +0000"  >&lt;p&gt;Hi Charlie,&lt;/p&gt;

&lt;p&gt;yep, that would be an option. However, there is no option to weigh the randomness upon some kind of rating system. Maybe it would be a better approach to add that option to the sample stage.&lt;/p&gt;</comment>
                            <comment id="1134399" author="charlie.swanson" created="Wed, 13 Jan 2016 22:52:15 +0000"  >&lt;p&gt;Would the &lt;a href=&quot;https://docs.mongodb.org/manual/reference/operator/aggregation/sample/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;&lt;tt&gt;$sample&lt;/tt&gt;&lt;/a&gt; stage do what you wanted?&lt;/p&gt;</comment>
                            <comment id="1134388" author="m3t4lukas" created="Wed, 13 Jan 2016 22:42:01 +0000"  >&lt;p&gt;Hi Charlie,&lt;/p&gt;

&lt;p&gt;you&apos;d need it for any kind of randomized access onto a collections data. Right now there is no possibility whatsoever. Let&apos;s use a real world example that is commonly used.&lt;br/&gt;
Imagine a database that has to serve ads. Now of course there are some criteria upon which ads to show to a user are chosen. There might be some kind of rating behind it. Now imagine you had several hundreds of ads you could serve to a user. What you&apos;d need to do now is query them all, which uses up network resources, and randomize on the CDN stage. That is rather inefficient. If you would not randomize at all the user would see the same ads all the time which is not what you want since when the user did not click on the ad the first several times it was served to him it is highly unlikely he will in the near future. The most efficient way to do that would be to randomize and filtering upon randomization as early in the aggregation pipeline as possible. That would save on ram and it would in particular in that case save on networking resources (several hundred ads transferred from db to cdn server vs only the one needed &lt;span class=&quot;error&quot;&gt;&amp;#91;per request!!!&amp;#93;&lt;/span&gt;).&lt;br/&gt;
Another option would be to add a field to the collection with a random number. That approach would have two major disadvantages: for once it would be the same random numbers for each user and each request for a period of time which causes clumping and on the other hand would cause a whole lot of writes every time there is &quot;feeding time&quot;.&lt;/p&gt;</comment>
                            <comment id="1134344" author="charlie.swanson" created="Wed, 13 Jan 2016 22:11:16 +0000"  >&lt;p&gt;Hi Lukas,&lt;/p&gt;

&lt;p&gt;Before we go forward with implementing this (sorry if you&apos;ve already started), can you describe why you need this expression? What are you using it for?&lt;/p&gt;

&lt;p&gt;We have some concerns that this may add some subtle complexity to aggregation&apos;s optimizer. This would be the first expression that would return different results depending on which order you called it in, or if you called it multiple times, which will make reasoning about which optimizations are safe to apply harder to analyze.&lt;/p&gt;</comment>
                            <comment id="1133323" author="m3t4lukas" created="Wed, 13 Jan 2016 08:54:50 +0000"  >&lt;p&gt;Hi Charlie,&lt;/p&gt;

&lt;p&gt;thanks for the heads up on contributing guidelines and the agreement. I was aware of the guideline but I had yet to sign the agreement. It&apos;s all done now.&lt;/p&gt;

&lt;p&gt;Regards,&lt;br/&gt;
Lukas Wagner&lt;/p&gt;</comment>
                            <comment id="1132685" author="charlie.swanson" created="Tue, 12 Jan 2016 19:08:20 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=m3t4lukas&quot; class=&quot;user-hover&quot; rel=&quot;m3t4lukas&quot;&gt;m3t4lukas&lt;/a&gt;, I&apos;m excited to hear that you are working on a patch!&lt;/p&gt;

&lt;p&gt;If you&apos;re planning to submit a pull request to have this merged into the server project, here is a useful &lt;a href=&quot;https://docs.mongodb.org/manual/contributors/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;guide to getting started&lt;/a&gt;. In particular, you&apos;ll have to sign the &lt;a href=&quot;https://www.mongodb.com/legal/contributor-agreement&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Contributor&apos;s Agreement&lt;/a&gt;. Apologies if you already knew this, or already signed that. &lt;/p&gt;

&lt;p&gt;I&apos;ll assign this ticket to myself in the meantime, since I&apos;ll likely review your patch, and we can&apos;t assign tickets to people outside of MongoDB.&lt;/p&gt;

&lt;p&gt;Let me know if there&apos;s anything I can do to help!&lt;/p&gt;</comment>
                            <comment id="1132345" author="m3t4lukas" created="Tue, 12 Jan 2016 15:12:44 +0000"  >&lt;p&gt;Hi Charlie,&lt;/p&gt;

&lt;p&gt;what you assumed is correct. If you like you can assign it to me, as I am already working on it.&lt;br/&gt;
Thanks for filling me in on priorities. I just used that priority since I can&apos;t continue to work on my current project without that feature.&lt;/p&gt;</comment>
                            <comment id="1131072" author="charlie.swanson" created="Mon, 11 Jan 2016 15:30:00 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://jira.mongodb.org/secure/ViewProfile.jspa?name=m3t4lukas&quot; class=&quot;user-hover&quot; rel=&quot;m3t4lukas&quot;&gt;m3t4lukas&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;I&apos;ve filled in the description with what I believe you are asking for, let me know if this is not correct.&lt;/p&gt;

&lt;p&gt;I&apos;ve downgraded the priority of this ticket to the default priority. We don&apos;t use the priority field when prioritizing new features, so I&apos;ve changed it to the default to avoid possible confusion in other search results.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10012">
                    <name>Related</name>
                                            <outwardlinks description="related to">
                                        <issuelink>
            <issuekey id="409699">SERVER-30405</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                <customfield id="customfield_10050" key="com.atlassian.jira.toolkit:comments">
                        <customfieldname># Replies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>12.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_12751" key="com.atlassian.jira.plugin.system.customfieldtypes:multiselect">
                        <customfieldname>Assigned Teams</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="25126"><![CDATA[Query Optimization]]></customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_10011" key="com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons">
                        <customfieldname>Backwards Compatibility</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10038"><![CDATA[Fully Compatible]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10055" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>Date of 1st Reply</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 11 Jan 2016 15:30:00 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10052" key="com.atlassian.jira.toolkit:dayslastcommented">
                        <customfieldname>Days since reply</customfieldname>
                        <customfieldvalues>
                                        8 years, 3 weeks ago
    
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_18254" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Dependencies</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[]]></customfieldvalue>


                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10057" key="com.atlassian.jira.toolkit:lastusercommented">
                        <customfieldname>Last comment by Customer</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>true</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10056" key="com.atlassian.jira.toolkit:lastupdaterorcommenter">
                        <customfieldname>Last commenter</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>alexander.golin@mongodb.com</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_11151" key="com.atlassian.jira.toolkit:LastCommentDate">
                        <customfieldname>Last public comment date</customfieldname>
                        <customfieldvalues>
                            8 years, 3 weeks ago
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    <customfield id="customfield_10051" key="com.atlassian.jira.toolkit:participants">
                        <customfieldname>Participants</customfieldname>
                        <customfieldvalues>
                                        <customfieldvalue>backlog-query-optimization</customfieldvalue>
            <customfieldvalue>charlie.swanson@mongodb.com</customfieldvalue>
            <customfieldvalue>m3t4lukas</customfieldvalue>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_14254" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Product Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrkkyn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hr2e9b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_22870" key="com.onresolve.jira.groovy.groovyrunner:scripted-field">
                        <customfieldname>Triagers</customfieldname>
                        <customfieldvalues>
                                

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_14350" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>serverRank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hrjd07:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>