Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-23219

DBCommandCursor doesn't route getMore operations to original server

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: 3.3.12
    • Component/s: Shell
    • Labels:
      None
    • Backwards Compatibility:
      Fully Compatible
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

      The following patch modifies the stepdown_query.js test to use DBClientReplicaSet to demonstrate the issue for getMore and killCursor operations. It can be invoked with resmoke.py by doing

      python buildscripts/resmoke.py --executor no_passthrough jstests/noPassthrough/stepdown_query.js
      

      diff --git a/jstests/noPassthrough/stepdown_query.js b/jstests/noPassthrough/stepdown_query.js
      index 05d22f3..5d1dd36 100644
      --- a/jstests/noPassthrough/stepdown_query.js
      +++ b/jstests/noPassthrough/stepdown_query.js
      @@ -5,11 +5,12 @@
           var dbName = "test";
           var collName = jsTest.name();
       
      -    function runTest(host, rst) {
      -        // We create a new connection to 'host' here instead of passing in the original connection.
      -        // This to work around the fact that connections created by ReplSetTest already have slaveOk
      -        // set on them, but we need a connection with slaveOk not set for this test.
      -        var conn = new Mongo(host);
      +    function runTest(connStr, rst) {
      +        // We create a new connection using 'connStr' as our connection string instead of passing in
      +        // the original connection. This to work around the fact that each connection created by
      +        // ReplSetTest is backed by a DBClientConnection, but we need to use a DBClientReplicaSet
      +        // connection.
      +        var conn = new Mongo(connStr);
               var coll = conn.getDB(dbName).getCollection(collName);
               assert(!coll.exists());
               assert.writeOK(coll.insert([{}, {}, {}, {}, {}]));
      @@ -19,10 +20,12 @@
               cursor.next();
               assert.eq(0, cursor.objsLeftInBatch());
               var primary = rst.getPrimary();
      +        var secondary = rst.getSecondary();
               assert.throws(function() {
                   primary.getDB("admin").runCommand({replSetStepDown: 60, force: true});
               });
               rst.waitForState(primary, ReplSetTest.State.SECONDARY, 60 * 1000);
      +        rst.waitForState(secondary, ReplSetTest.State.PRIMARY, 60 * 1000);
               // When the primary steps down, it closes all client connections. Since 'conn' may be a
               // direct connection to the primary and the shell doesn't automatically retry operations on
               // network errors, we run a dummy operation here to force the shell to reconnect.
      @@ -39,16 +42,17 @@
               });
           }
       
      -    // Test querying a replica set primary directly.
      -    var rst = new ReplSetTest({nodes: 1});
      +    // Test querying a replica set.
      +    var rst = new ReplSetTest({nodes: 2});
           rst.startSet();
           rst.initiate();
      -    runTest(rst.getPrimary().host, rst);
      +    runTest('mongodb://' + rst.getPrimary().host + ',' + rst.getSecondary().host + '/?replicaSet=' +
      +            rst.name, rst);
           rst.stopSet();
       
           // Test querying a replica set primary through mongos.
      -    var st = new ShardingTest({shards: 1, rs: true});
      -    rst = st.rs0;
      -    runTest(st.s0.host, rst);
      -    st.stop();
      +    // var st = new ShardingTest({shards: 1, rs: true});
      +    // rst = st.rs0;
      +    // runTest(st.s0.host, rst);
      +    // st.stop();
       })()
      

      Output

      [js_test:stepdown_query] 2016-03-17T18:06:56.965-0400 2016-03-17T18:06:56.965-0400 E QUERY    [thread1] Error: getMore command failed: {
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400   "ok" : 0,
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400   "errmsg" : "Cursor not found, cursor id: 32766606856",
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400   "code" : 43
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 } :
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 _getErrorWithCode@src/mongo/shell/utils.js:25:13
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBCommandCursor.prototype._runGetMoreCommand@src/mongo/shell/query.js:758:1
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBCommandCursor.prototype._hasNextUsingCommands@src/mongo/shell/query.js:786:9
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBCommandCursor.prototype.hasNext@src/mongo/shell/query.js:794:1
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBQuery.prototype.hasNext@src/mongo/shell/query.js:287:13
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 runTest@jstests/noPassthrough/stepdown_query.js:39:16
      [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 @jstests/noPassthrough/stepdown_query.js:49:1
      [js_test:stepdown_query] 2016-03-17T18:06:56.967-0400 @jstests/noPassthrough/stepdown_query.js:3:2
      [js_test:stepdown_query] 2016-03-17T18:06:56.967-0400
      [js_test:stepdown_query] 2016-03-17T18:06:56.967-0400 failed to load: jstests/noPassthrough/stepdown_query.js
      

      Show
      The following patch modifies the stepdown_query.js test to use DBClientReplicaSet to demonstrate the issue for getMore and killCursor operations. It can be invoked with resmoke.py by doing python buildscripts/resmoke.py --executor no_passthrough jstests/noPassthrough/stepdown_query.js diff --git a/jstests/noPassthrough/stepdown_query.js b/jstests/noPassthrough/stepdown_query.js index 05d22f3..5d1dd36 100644 --- a/jstests/noPassthrough/stepdown_query.js +++ b/jstests/noPassthrough/stepdown_query.js @@ -5,11 +5,12 @@ var dbName = "test"; var collName = jsTest.name(); - function runTest(host, rst) { - // We create a new connection to 'host' here instead of passing in the original connection. - // This to work around the fact that connections created by ReplSetTest already have slaveOk - // set on them, but we need a connection with slaveOk not set for this test. - var conn = new Mongo(host); + function runTest(connStr, rst) { + // We create a new connection using 'connStr' as our connection string instead of passing in + // the original connection. This to work around the fact that each connection created by + // ReplSetTest is backed by a DBClientConnection, but we need to use a DBClientReplicaSet + // connection. + var conn = new Mongo(connStr); var coll = conn.getDB(dbName).getCollection(collName); assert(!coll.exists()); assert.writeOK(coll.insert([{}, {}, {}, {}, {}])); @@ -19,10 +20,12 @@ cursor.next(); assert.eq(0, cursor.objsLeftInBatch()); var primary = rst.getPrimary(); + var secondary = rst.getSecondary(); assert.throws(function() { primary.getDB("admin").runCommand({replSetStepDown: 60, force: true}); }); rst.waitForState(primary, ReplSetTest.State.SECONDARY, 60 * 1000); + rst.waitForState(secondary, ReplSetTest.State.PRIMARY, 60 * 1000); // When the primary steps down, it closes all client connections. Since 'conn' may be a // direct connection to the primary and the shell doesn't automatically retry operations on // network errors, we run a dummy operation here to force the shell to reconnect. @@ -39,16 +42,17 @@ }); } - // Test querying a replica set primary directly. - var rst = new ReplSetTest({nodes: 1}); + // Test querying a replica set. + var rst = new ReplSetTest({nodes: 2}); rst.startSet(); rst.initiate(); - runTest(rst.getPrimary().host, rst); + runTest('mongodb://' + rst.getPrimary().host + ',' + rst.getSecondary().host + '/?replicaSet=' + + rst.name, rst); rst.stopSet(); // Test querying a replica set primary through mongos. - var st = new ShardingTest({shards: 1, rs: true}); - rst = st.rs0; - runTest(st.s0.host, rst); - st.stop(); + // var st = new ShardingTest({shards: 1, rs: true}); + // rst = st.rs0; + // runTest(st.s0.host, rst); + // st.stop(); })() Output [js_test:stepdown_query] 2016-03-17T18:06:56.965-0400 2016-03-17T18:06:56.965-0400 E QUERY [thread1] Error: getMore command failed: { [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 "ok" : 0, [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 "errmsg" : "Cursor not found, cursor id: 32766606856", [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 "code" : 43 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 } : [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 _getErrorWithCode@src/mongo/shell/utils.js:25:13 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBCommandCursor.prototype._runGetMoreCommand@src/mongo/shell/query.js:758:1 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBCommandCursor.prototype._hasNextUsingCommands@src/mongo/shell/query.js:786:9 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBCommandCursor.prototype.hasNext@src/mongo/shell/query.js:794:1 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 DBQuery.prototype.hasNext@src/mongo/shell/query.js:287:13 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 runTest@jstests/noPassthrough/stepdown_query.js:39:16 [js_test:stepdown_query] 2016-03-17T18:06:56.966-0400 @jstests/noPassthrough/stepdown_query.js:49:1 [js_test:stepdown_query] 2016-03-17T18:06:56.967-0400 @jstests/noPassthrough/stepdown_query.js:3:2 [js_test:stepdown_query] 2016-03-17T18:06:56.967-0400 [js_test:stepdown_query] 2016-03-17T18:06:56.967-0400 failed to load: jstests/noPassthrough/stepdown_query.js
    • Sprint:
      Platforms 2016-08-26, Platforms 2016-09-19

      Description

      DBCommandCursor will route getMore and killCursor operations to the current primary of the replica set. Since a cursor that exists on the primary remains following a stepdown, the DBCommandCursor will route a getMore or killCursor operation to the wrong node. A similiar situation can arise if slaveOk is set on the replica-set connection.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              jonathan.reams Jonathan Reams
              Reporter:
              max.hirschhorn Max Hirschhorn
              Participants:
              Votes:
              1 Vote for this issue
              Watchers:
              16 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: