<!-- 
RSS generated by JIRA (9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66) at Thu Feb 08 08:51:58 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>MongoDB Jira</title>
    <link>https://jira.mongodb.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.7.1</version>
        <build-number>970001</build-number>
        <build-date>13-04-2023</build-date>
    </build-info>


<item>
            <title>[JAVA-307] The low speed of range query with two constraints</title>
                <link>https://jira.mongodb.org/browse/JAVA-307</link>
                <project id="10006" key="JAVA">Java Driver</project>
                    <description>&lt;p&gt;I use single java client to do  range query with two constraints and i have create index on query key, the read speed only can reach to 6M/s (Read back 6M, nearly spend 1.1 second ), but my network card is &quot;1000Mbit/s&quot; and all of query data have been in memory(The data size &amp;lt; Memory size)?&lt;/p&gt;

&lt;p&gt;stats info:&lt;/p&gt;

&lt;p&gt;        &quot;objects&quot; : 2936299,&lt;br/&gt;
        &quot;avgObjSize&quot; : 357.40783210429186,&lt;br/&gt;
        &quot;dataSize&quot; : 1049456260,&lt;br/&gt;
        &quot;storageSize&quot; : 1181462784,&lt;br/&gt;
        &quot;indexSize&quot; : 370705344,&lt;br/&gt;
        &quot;fileSize&quot; : 4226809856,&lt;/p&gt;


&lt;p&gt;select report_path from table where visit_time &amp;gt; start_visit_time and visit_time &amp;lt; end_visit_time&lt;br/&gt;
--------------------------------------------------------------------&lt;br/&gt;
BasicDBObject range = new BasicDBObject();&lt;br/&gt;
range.put(&quot;$gt&quot;, start_visit_time);&lt;br/&gt;
range.put(&quot;$lt&quot;, end_visit_time);&lt;/p&gt;

&lt;p&gt;BasicDBObject query = new BasicDBObject();&lt;br/&gt;
 query.put(&quot;visit_time&quot;, range);&lt;/p&gt;

&lt;p&gt;BasicDBObject keys = new BasicDBObject();&lt;br/&gt;
keys.put(&quot;report_path&quot;, 1);&lt;br/&gt;
keys.put(&quot;_id&quot;, 0);&lt;/p&gt;

&lt;p&gt;DBCursor cur = ct.find(query, keys);&lt;br/&gt;
while (cur.hasNext()) &lt;/p&gt;
{
       String report_path = cur.next().toString();
 }



</description>
                <environment>network card: 1000Mbit/s, memory  8G, Mongodb 1.8, Mongodb driver 2.5</environment>
        <key id="15278">JAVA-307</key>
            <summary>The low speed of range query with two constraints</summary>
                <type id="3" iconUrl="https://jira.mongodb.org/secure/viewavatar?size=xsmall&amp;avatarId=14718&amp;avatarType=issuetype">Task</type>
                                            <priority id="2" iconUrl="https://jira.mongodb.org/images/icons/priorities/critical.svg">Critical - P2</priority>
                        <status id="6" iconUrl="https://jira.mongodb.org/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="9">Done</resolution>
                                        <assignee username="-1">Unassigned</assignee>
                                    <reporter username="shawny">shawn yang</reporter>
                        <labels>
                    </labels>
                <created>Fri, 25 Mar 2011 02:27:15 +0000</created>
                <updated>Wed, 11 Sep 2019 19:12:25 +0000</updated>
                            <resolved>Tue, 4 Sep 2012 00:33:23 +0000</resolved>
                                    <version>2.5</version>
                                                    <component>API</component>
                                        <votes>1</votes>
                                    <watches>4</watches>
                                                                                                                <comments>
                            <comment id="159972" author="jeff.yemin" created="Tue, 4 Sep 2012 00:33:23 +0000"  >&lt;p&gt;Apologies for letting this sit so long without a response.  Please re-open if you want to pursue it further.&lt;/p&gt;</comment>
                            <comment id="26801" author="shawny" created="Fri, 25 Mar 2011 09:21:21 +0000"  >&lt;p&gt;Update the test cases, add the detail consume time in each part (return DBCursor VS read back data ):&lt;/p&gt;

&lt;p&gt;                        BasicDBObject range = new BasicDBObject();&lt;br/&gt;
                        range.put(&quot;$gt&quot;, start_visit_time);&lt;br/&gt;
                        range.put(&quot;$lt&quot;, end_visit_time);&lt;/p&gt;

&lt;p&gt;                        BasicDBObject query = new BasicDBObject();&lt;br/&gt;
                        query.put(&quot;visit_time&quot;, range);&lt;/p&gt;

&lt;p&gt;                        BasicDBObject keys = new BasicDBObject();&lt;br/&gt;
                        keys.put(&quot;report_path&quot;, 1);&lt;br/&gt;
                        keys.put(&quot;_id&quot;, 0);&lt;/p&gt;


&lt;p&gt;                        long st1 = System.currentTimeMillis();&lt;/p&gt;

&lt;p&gt;                        DBCursor cur = ct.find(query, keys).batchSize(1000);&lt;br/&gt;
                        cur.count();  &lt;br/&gt;
                        long ed1 = System.currentTimeMillis();&lt;br/&gt;
                        System.out.println(ed1 - st1);                  // consume: 319 ms, why the count operation need so long time?&lt;/p&gt;

&lt;p&gt;                        long st = System.currentTimeMillis();&lt;/p&gt;

&lt;p&gt;                        while (cur.hasNext()) &lt;/p&gt;
{
                                cur.next();
                        }


&lt;p&gt;                        long ed = System.currentTimeMillis();&lt;br/&gt;
                        System.out.println(ed1 - st1);                  // consume: 958 ms&lt;/p&gt;</comment>
                            <comment id="26800" author="shawny" created="Fri, 25 Mar 2011 09:14:32 +0000"  >&lt;p&gt;1) Read back : the record number : 217611, the total data size is 6734899 bytes&lt;br/&gt;
2) remove toString, then it will consume 964ms (a little small than 1.1second)&lt;br/&gt;
3) I have been used batch size 2048 (I use batch size 1000 to test, nearly the same value)&lt;br/&gt;
4) I want to know the size of read back data, so add the following code in while statement:&lt;br/&gt;
while (cur.hasNext()) &lt;/p&gt;
{
       String report_path = cur.next().toString();
       total += report_path.length();                            // i have been removed this statement
 }
&lt;p&gt; &lt;/p&gt;

&lt;p&gt;---------------------------------------------------&lt;/p&gt;

&lt;p&gt;When i remove toString, it will consume 964ms, a little smaller.&lt;/p&gt;


</comment>
                            <comment id="26799" author="antoine" created="Fri, 25 Mar 2011 09:01:12 +0000"  >&lt;p&gt;how many records are you reading back?&lt;br/&gt;
I&apos;m guessing 6MB / 357 = 16800&lt;br/&gt;
This means the driver will do many getMore operations on the cursor, probably 168 of them.&lt;br/&gt;
The original operation that takes 294 ms only gives you a cursor back, but does not show the whole retrieval time.&lt;br/&gt;
A few things to try and see if faster:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;remove the toString() in java&lt;/li&gt;
	&lt;li&gt;increase the batch size in query (say set it to 1000)&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;What do you mean by:&lt;br/&gt;
&quot;If i don&apos;t calculate the total size of read back data, the cpu percent only is 9.7%, so i think if it is the java driver&apos;s problem?&quot;&lt;/p&gt;</comment>
                            <comment id="26798" author="shawny" created="Fri, 25 Mar 2011 07:24:49 +0000"  >&lt;p&gt;I open mongodb profiling  and get the following info: &lt;/p&gt;

&lt;p&gt;&amp;gt; db.system.profile.find()&lt;br/&gt;
{ &quot;ts&quot; : ISODate(&quot;2011-03-25T07:07:28.147Z&quot;), &quot;info&quot; : &quot;query ycsb.$cmd ntoreturn:1 command: { count: \&quot;taobao\&quot;, query: { visit_time: &lt;/p&gt;
{ $gt: \&quot;2011-03-07 16:53:34\&quot;, $lt: \&quot;2011-03-09 16:53:34\&quot; }
&lt;p&gt; }, fields: &lt;/p&gt;
{ visit_time: 1, _id: 0 }
&lt;p&gt; } reslen:64 294ms&quot;, &quot;millis&quot; : 294 }&lt;/p&gt;

&lt;p&gt;It seems the server process time only is 294ms, using  mongostat to check %lock, its value is 0; The client and server are on the same server using Local look back.&lt;/p&gt;

&lt;p&gt;From test case:&lt;br/&gt;
DBCursor cur = ct.find(query, keys);                // this place spend nearly 322ms&lt;br/&gt;
                                                                                // the while loop to get back data spend more time&lt;br/&gt;
while (cur.hasNext()) &lt;/p&gt;
{
       String report_path = cur.next().toString();
 }


&lt;p&gt;----------------------&lt;br/&gt;
Note: millis Time, in milliseconds, to perform the operation. This time does not include time to acquire the lock or network time, just the time for the server to process.&lt;/p&gt;</comment>
                            <comment id="26797" author="shawny" created="Fri, 25 Mar 2011 06:20:27 +0000"  >&lt;p&gt;PID  USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND&lt;br/&gt;
29622 shawn  15   0 2163m 116m 115m R  9.7  1.5   0:04.28 mongod&lt;br/&gt;
30418 shawn  17   0 1349m  22m 8080 S  6.7  0.3   0:00.20 java&lt;br/&gt;
-----------------------------------------&lt;/p&gt;

&lt;p&gt;If i don&apos;t calculate the total size of read back data, the cpu percent only is 9.7%, so i think if it is the java driver&apos;s problem?&lt;/p&gt;</comment>
                            <comment id="26796" author="shawny" created="Fri, 25 Mar 2011 06:15:26 +0000"  >&lt;p&gt;1) sure, i have been used iostat to check it,  it&apos;s idle&lt;/p&gt;

&lt;p&gt;2) the percent of cpu only reach to ~22%&lt;/p&gt;

&lt;p&gt;So the bottleneck isn&apos;t disk io/ cpu/memory.&lt;/p&gt;</comment>
                            <comment id="26785" author="eliot" created="Fri, 25 Mar 2011 04:19:18 +0000"  >&lt;p&gt;Can you verify disk is idle during this yes?&lt;br/&gt;
Is java maxing out a core?&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_15850" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_12550" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2|hrgj0n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10558" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10024</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            </customfields>
    </item>
</channel>
</rss>