[SERVER-24114] getMore on oplog does not adhere to maxTimeMS Created: 09/May/16  Updated: 06/Dec/22  Resolved: 22/Jan/19

Status: Closed
Project: Core Server
Component/s: Querying, Replication
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Judah Schvimer Assignee: Backlog - Replication Team
Resolution: Won't Fix Votes: 2
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Attachments: Text File getMoreMaxTimeMS.txt    
Issue Links:
Depends
Related
Assigned Teams:
Replication
Operating System: ALL
Participants:
Linked BF Score: 0

 Description   

In a patch build, a getMore that the fetcher executed on the primary was given a maxTimeMS of 5 seconds. After 10 seconds the command failed with a user assertion that led to an ExceededTimeLimit error. maxTimeMS failures on a getMore should lead to an empty batch and no error.

[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:15.600+0000 d20010| 2016-05-05T22:17:15.501+0000 I COMMAND  [conn10] command local.oplog.rs command: find { find: "oplog.rs", filter: { ts: { $gte: Timestamp 1462486622000|1 } }, tailable: true, oplogReplay: true, awaitData: true, maxTimeMS: 60000, term: 1 } planSummary: COLLSCAN cursorid:27336043991 keysExamined:0 docsExamined:4 numYields:0 nreturned:4 reslen:708 locks:{ Global: { acquireCount: { r: 2 } }, MMAPV1Journal: { acquireCount: { r: 1 } }, Database: { acquireCount: { r: 1 } }, oplog: { acquireCount: { R: 1 } } } protocol:op_command 0ms
...
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.219+0000 d20010| 2016-05-05T22:17:26.219+0000 D -        [conn10] User Assertion: 50:operation exceeded time limit
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.221+0000 d20010| 2016-05-05T22:17:26.219+0000 I COMMAND  [conn10] command local.oplog.rs command: getMore { getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } } planSummary: COLLSCAN cursorid:27336043991 keysExamined:0 docsExamined:0 numYields:0 nreturned:0 reslen:292 locks:{ Global: { acquireCount: { r: 4 } }, MMAPV1Journal: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 2 } }, oplog: { acquireCount: { R: 2 } } } protocol:op_command 10002ms
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.222+0000 d20011| 2016-05-05T22:17:26.219+0000 D ASIO     [NetworkInterfaceASIO-BGSync-0] Request 47 timed out, adjusted timeout after getting connection from pool was 10000ms, op was id: 3, states: [ UNINITIALIZED, IN_PROGRESS ], start_time: 2016-05-05T22:17:16.217+0000, request: RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.223+0000 d20011| 2016-05-05T22:17:26.220+0000 D ASIO     [NetworkInterfaceASIO-BGSync-0] Operation timing out; original request was: RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.224+0000 d20011| 2016-05-05T22:17:26.220+0000 D ASIO     [NetworkInterfaceASIO-BGSync-0] Failed to execute command: RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } } reason: ExceededTimeLimit: Operation timed out, request was RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.226+0000 d20011| 2016-05-05T22:17:26.220+0000 D ASIO     [NetworkInterfaceASIO-BGSync-0] Request 47 finished with response: ExceededTimeLimit: Operation timed out, request was RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.227+0000 d20011| 2016-05-05T22:17:26.220+0000 D EXECUTOR [NetworkInterfaceASIO-BGSync-0] Received remote response: ExceededTimeLimit: Operation timed out, request was RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.228+0000 d20011| 2016-05-05T22:17:26.220+0000 D REPL     [NetworkInterfaceASIO-SyncSourceFeedback-0] Reporter sending slave oplog progress to upstream updater ip-10-168-148-139:20010: { replSetUpdatePosition: 1, optimes: [ { durableOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, appliedOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, memberId: 0, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 1, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 2, cfgver: 1 } ] }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.229+0000 d20011| 2016-05-05T22:17:26.220+0000 D EXECUTOR [NetworkInterfaceASIO-SyncSourceFeedback-0] Scheduling remote command request: RemoteCommand 63 -- target:ip-10-168-148-139:20010 db:admin cmd:{ replSetUpdatePosition: 1, optimes: [ { durableOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, appliedOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, memberId: 0, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 1, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 2, cfgver: 1 } ] }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.230+0000 d20011| 2016-05-05T22:17:26.220+0000 D ASIO     [NetworkInterfaceASIO-SyncSourceFeedback-0] startCommand: RemoteCommand 63 -- target:ip-10-168-148-139:20010 db:admin cmd:{ replSetUpdatePosition: 1, optimes: [ { durableOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, appliedOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, memberId: 0, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 1, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 2, cfgver: 1 } ] }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.231+0000 d20011| 2016-05-05T22:17:26.220+0000 D ASIO     [NetworkInterfaceASIO-SyncSourceFeedback-0] Initiating asynchronous command: RemoteCommand 63 -- target:ip-10-168-148-139:20010 db:admin cmd:{ replSetUpdatePosition: 1, optimes: [ { durableOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, appliedOpTime: { ts: Timestamp 1462486622000|1, t: -1 }, memberId: 0, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 1, cfgver: 1 }, { durableOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, appliedOpTime: { ts: Timestamp 1462486635000|2, t: 1 }, memberId: 2, cfgver: 1 } ] }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.232+0000 d20011| 2016-05-05T22:17:26.220+0000 D ASIO     [NetworkInterfaceASIO-SyncSourceFeedback-0] Starting asynchronous command 63 on host ip-10-168-148-139:20010
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.233+0000 d20011| 2016-05-05T22:17:26.220+0000 D EXECUTOR [rsBackgroundSync-0] Executing a task on behalf of pool rsBackgroundSync
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.233+0000 d20011| 2016-05-05T22:17:26.220+0000 D REPL     [rsBackgroundSync-0] Error returned from oplog query: ExceededTimeLimit: Operation timed out, request was RemoteCommand 47 -- target:ip-10-168-148-139:20010 db:local expDate:2016-05-05T22:17:26.217+0000 cmd:{ getMore: 27336043991, collection: "oplog.rs", maxTimeMS: 5000, term: 1, lastKnownCommittedOpTime: { ts: Timestamp 1462486635000|2, t: 1 } }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.234+0000 d20011| 2016-05-05T22:17:26.220+0000 D EXECUTOR [rsBackgroundSync-0] waiting for work; I am one of 1 thread(s); the minimum number of threads is 1
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.234+0000 d20011| 2016-05-05T22:17:26.221+0000 D ASIO     [NetworkInterfaceASIO-SyncSourceFeedback-0] Request 63 finished with response: { ok: 1.0 }
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.234+0000 d20011| 2016-05-05T22:17:26.221+0000 D EXECUTOR [NetworkInterfaceASIO-SyncSourceFeedback-0] Received remote response: RemoteResponse --  cmd:{ ok: 1.0 }
...
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.238+0000 d20010| 2016-05-05T22:17:26.220+0000 D NETWORK  [conn10] Socket recv() conn closed? 10.168.148.139:60818
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.238+0000 d20010| 2016-05-05T22:17:26.220+0000 D NETWORK  [conn10] SocketException: remote: 10.168.148.139:60818 error: 9001 socket exception [CLOSED] server [10.168.148.139:60818]
[js_test:maxSyncSourceLagSecs] 2016-05-05T22:17:26.238+0000 d20010| 2016-05-05T22:17:26.220+0000 I NETWORK  [conn10] end connection 10.168.148.139:60818 (5 connections now open)



 Comments   
Comment by Judah Schvimer [ 08/Nov/18 ]

This ticket was about getMores with a 5 second maxTimeMS that timed out well after 5 seconds. It could definitely be resolved by SERVER-33942.

Comment by Charlie Swanson [ 08/Nov/18 ]

I'm not totally sure I understand this ticket but it seems at least related to SERVER-34277 and I wonder if it was resolved by SERVER-33942.

Comment by Spencer Brody (Inactive) [ 19/Dec/16 ]

Hi Vladimir,
The "Planning Bucket A" fixVersion this ticket has means that it is not currently being considered for our 3.6 release which will come out towards the end of 2017, but it is at the top of our list to consider after 3.6 is finished.

Cheers,
-Spencer

Comment by Vladimir [ 18/Dec/16 ]

Do you have any timeplan to fix this?

Comment by Max Hirschhorn [ 13/Jun/16 ]

Passing this back to the replication team as they've spent time working on the CappedInsertNotifier and may have better ideas as to what could be going wrong if this isn't a case of a spurious wake-up on the condition variable.

Comment by Max Hirschhorn [ 12/Jun/16 ]

I've had a modicum of success trying to reproduce this issue. It is definitely possible to reproduce this issue outside of the maxSyncSourceLagSecs.js test, but all occurrences seem rare and seem to occur only on RHEL 5.5 (among RHEL 6.2, OS X, Solaris, and my Debian-based Linux box).

I suspect that there is an issue with the CappedInsertNotifier class in that it doesn't handle spurious wake-ups on the _notifier condition variable; if a spurious wake-up were to occur, then we'd wait for an additional timeout microseconds to occur and not the remaining duration of our original timeout. However, there isn't enough information given the relative infrequency of this issue to say whether or not a spurious wake-up is the root cause of the reported issue.

void CappedInsertNotifier::_wait(stdx::unique_lock<stdx::mutex>& lk,
                                 uint64_t prevVersion,
                                 Microseconds timeout) const {
    while (!_dead && prevVersion == _version) {
        if (timeout == Microseconds::max()) {
            _notifier.wait(lk);
        } else if (stdx::cv_status::timeout == _notifier.wait_for(lk, timeout.toSystemDuration())) {
            return;
        }
    }
}

The getMore request for the oplog is constructed by the OplogFetcher class. It configures the network layer to use a 10 second timeout (i.e. the election timeout period) for the getMore request, and specifies a maxTimeMS of 5 seconds (i.e. half the election timeout period). My impression from reading through the logs is that the mongod is actually responding to the getMore request after 10 seconds and not that the other mongod's network layer timed out the request after 10 seconds.

This observed behavior seems at odds with my theory that the root cause could be the result of a spurious wake-up on the condition variable, given that all failures have timeouts of almost exactly 10 seconds, and not some period of time between 0 and 5 seconds + an additional 5 seconds.

Script used to try and reproduce this issue

var rst = new ReplSetTest({nodes: 1, oplogSize: 50, nodeOptions: {verbose: 1}});
var nodes = rst.startSet();
rst.initiate();
 
// Insert some large documents so that the "find" command can't return everything in a single batch.
var padding = makeStringOfLength(9 * 1024 * 1024);
assert.writeOK(nodes[0].getDB("test").fake.insert({x: padding}));
assert.writeOK(nodes[0].getDB("test").fake.insert({x: padding}));
assert.writeOK(nodes[0].getDB("test").fake.insert({x: padding}));
assert.writeOK(nodes[0].getDB("test").fake.insert({x: padding}));
 
var findCmdResult = nodes[0].getDB("local").runCommand({
    find: "oplog.rs",
    filter: {ts: {$gte: new Timestamp(0, 0)}},
    tailable: true,
    oplogReplay: true,
    awaitData: true,
    maxTimeMS: new NumberLong(60000),
    term: new NumberLong(1),
});
assert.commandWorked(findCmdResult);
 
var lastKnownCommittedOpTime = {ts: new Timestamp(0, 0), t: new NumberLong(-1)};
while (true) {
    var getMoreStart = (new Date()).getTime();
    var getMoreResponse = nodes[0].getDB("local").runCommandWithMetadata("getMore", {
        getMore: findCmdResult.cursor.id,
        collection: "oplog.rs",
        maxTimeMS: new NumberLong(5000),
        term: new NumberLong(1),
        lastKnownCommittedOpTime: lastKnownCommittedOpTime,
    }, {$replData: true});
 
    // We update the last known committed optime to avoid causing our getMore commands to
    // immediately return without waiting for the specified maxTimeMS.
    assert.commandWorked(getMoreResponse.commandReply);
    lastKnownCommittedOpTime = getMoreResponse.metadata.$replData.lastOpCommitted;
 
    if ((new Date()).getTime() - getMoreStart >= 6 * 1000) {
        throw new Error("Waited longer than 5 seconds for getMore response!");
    }
}
 
rst.stopSet();
 
function makeStringOfLength(length) {
    return new Array(length + 1).join('x');
}

Generated at Thu Feb 08 04:05:25 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.