[SERVER-38687] Mongod segfaults during teardown after javascript test failure Created: 18/Dec/18  Updated: 08/Mar/19  Resolved: 08/Mar/19

Status: Closed
Project: Core Server
Component/s: JavaScript
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Vlad Rachev (Inactive) Assignee: Gabriel Russell (Inactive)
Resolution: Duplicate Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Depends
depends on SERVER-39183 Change $where to respect socket disco... Closed
Related
is related to SERVER-37716 Do not duplicate 'planSummary' in act... Closed
Operating System: ALL
Steps To Reproduce:

(function() {
    "use strict";
 
    TestData.disableImplicitSessions = true;
 
    const collName = "coll";
    const coll = db.getCollection(collName);
    coll.drop();
    for (let i = 0; i < 200; i++) {
        assert.commandWorked(coll.insert({x: 1}));
    }
 
    const cmdRes = db.runCommand({find: collName, filter: 
        { $where:  function() { sleep(100); return true; }}, batchSize: 0});
    assert.commandWorked(cmdRes);
    const cmdStr = 'db.runCommand({getMore: ' + cmdRes.cursor.id.toString() 
        + ', collection: "' + collName + '"})';
    const awaitShell = startParallelShell(cmdStr);
 
    assert.soon(function() {
        const currOp = db.currentOp( { "op": "getmore"});
 
        assert("inprog" in currOp);
        if (currOp.inprog.length === 0) {
            return false;
        }
 
        const getmoreOp = currOp.inprog[0];
        assert.eq(("not-in-obj" in getmoreOp.cursor), true,
            "this should error");
 
        db.killOp(getmoreOp.opid);
 
        return true;
    });
    awaitShell();
}());

Sprint: Security 2019-01-14, Dev Tools 2019-02-25, Dev Tools 2019-03-11
Participants:

 Description   

After the test (attached in steps to reproduce) fails during the assert.soon, mongod segfaults during shutdown.

[js_test:job0_fixture_teardown] 2018-12-18T11:19:53.087-0500 Starting the teardown of MongoDFixture (Job #0).
[MongoDFixture:job0] Stopping mongod on port 20000 with pid 10085...
[MongoDFixture:job0] 2018-12-18T11:19:53.088-0500 I CONTROL  [signalProcessingThread] got signal 15 (Terminated), will terminate after current cmd ends
[MongoDFixture:job0] 2018-12-18T11:19:53.088-0500 I NETWORK  [signalProcessingThread] shutdown: going to close listening sockets...
[MongoDFixture:job0] 2018-12-18T11:19:53.088-0500 I NETWORK  [signalProcessingThread] removing socket file: /tmp/mongodb-20000.sock
[MongoDFixture:job0] 2018-12-18T11:19:53.088-0500 F -        [signalProcessingThread] Invalid access at address: 0x8
[MongoDFixture:job0] 2018-12-18T11:19:53.105-0500 F -        [signalProcessingThread] Got signal: 11 (Segmentation fault).

Here is the stacktrace:

 /home/vlad/github/mongo/src/mongo/util/stacktrace_posix.cpp:175:30: mongo::printStackTrace(std::ostream&)
 /home/vlad/github/mongo/src/mongo/util/signal_handlers_synchronous.cpp:182:5: printSignalAndBacktrace
 /home/vlad/github/mongo/src/mongo/util/signal_handlers_synchronous.cpp:281:0: mongo::(anonymous namespace)::abruptQuitWithAddrSignal(int, siginfo_t*, void*)
 /home/vlad/github/mongo/src/third_party/mozjs-45/extract/js/src/asmjs/AsmJSSignalHandlers.cpp:1159:9: AsmJSFaultHandler(int, siginfo_t*, void*)
 ??:0:0: ??
 /home/vlad/github/mongo/src/mongo/scripting/mozjs/implscope.cpp:210:27: mongo::mozjs::MozJSImplScope::kill()
 /home/vlad/github/mongo/src/mongo/scripting/mozjs/engine.cpp:124:9: mongo::mozjs::MozJSScriptEngine::interruptAll()
 /home/vlad/github/mongo/src/mongo/db/service_context.cpp:294:13: mongo::ServiceContext::setKillAllOperations()
 /home/vlad/github/mongo/src/mongo/db/db.cpp:922:5: mongo::(anonymous namespace)::shutdownTask()
 /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/functional:2267:14: operator()
 /home/vlad/github/mongo/src/mongo/util/exit.cpp:63:0: mongo::(anonymous namespace)::runTasks(std::stack<std::function<void ()>, std::deque<std::function<void ()>, std::allocator<std::function<void ()> > > >)
 /home/vlad/github/mongo/src/mongo/util/exit.cpp:142:5: mongo::shutdown(mongo::ExitCode)
 /home/vlad/github/mongo/src/mongo/util/exit.h:83:5: mongo::exitCleanly(mongo::ExitCode)
 /home/vlad/github/mongo/src/mongo/util/signal_handlers.cpp:200:17: mongo::(anonymous namespace)::signalProcessingThread(mongo::LogFileStatus)
 ??:0:0: ??
 ??:0:0: start_thread
 /build/glibc-Cl5G7W/glibc-2.23/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:109:0: clone



 Comments   
Comment by Gabriel Russell (Inactive) [ 05/Feb/19 ]

I've confirmed that this still occurs after the mozjs-60 upgrade. I've also made a simpler test.

(function() {
    "use strict";
 
    const coll = db.getCollection("coll");
    coll.drop();
    for (let i = 0; i < 200; i++) {
        assert.commandWorked(coll.insert({x: 1}));
    }
 
    startParallelShell(
        'var cmdRes = db.runCommand({find: "coll", filter: { $where:  function() { sleep(100); return true; }}, batchSize: 0});' +
        'db.runCommand({getMore: cmdRes.cursor.id, collection: "coll"})'
    );
 
    sleep(1000);
 
    db.getSiblingDB("admin").shutdownServer({ force: true, timeoutSecs: 0});
}());

Comment by Mira Carey [ 17/Jan/19 ]

I think this may actually be downstream of bad opCtx assignment to $where queries.

If you look at $where expression_where.cpp#L75-L97:

bool WhereMatchExpression::matches(const MatchableDocument* doc, MatchDetails* details) const {
    uassert(28692, "$where compile error", _func);
    BSONObj obj = doc->toBSON();
 
    if (!getScope().isEmpty()) {
        _scope->init(&getScope());
    }
 
    _scope->advanceGeneration();
    _scope->setObject("obj", const_cast<BSONObj&>(obj));
    _scope->setBoolean("fullObject", true);  // this is a hack b/c fullObject used to be relevant
 
    int err = _scope->invoke(_func, 0, &obj, 1000 * 60, false);
    if (err == -3) {  // INVOKE_ERROR
        stringstream ss;
        ss << "error on invocation of $where function:\n" << _scope->getError();
        uassert(16812, ss.str(), false);
    } else if (err != 0) {  // ! INVOKE_SUCCESS
        uassert(16813, "unknown error in invocation of $where function", false);
    }
 
    return _scope->getBoolean("__returnValue") != 0;
}

Note that we don't register or deregister the opCtx per match (we register on node creation). And for $where's which use getMores, this can mean that we're checking for an opCtx that isn't the one we're being run under. I think it usually isn't an issue because we rarely kill any $where's which aren't on the initial batch and they do get deregistered when they're returned to the pool

Comment by Mark Benvenuto [ 31/Dec/18 ]

How to repro:

  1. Save repro in test1.js
  2. python resmoke.py --suite=core test1.js

On SIGTERM, we are crashing while trying to kill in-flight JS proxies.

(gdb) list
205	        stdx::lock_guard<stdx::mutex> lk(_mutex);
206
207	        // If we are on the right thread, in the middle of an operation, and we have a registered
208	        // opCtx, then we should check the opCtx for interrupts.
209	        if (_mr._thread.get() == PR_GetCurrentThread() && _inOp > 0 && _opCtx) {
210	            _killStatus = _opCtx->checkForInterruptNoAssert();
211	        }
212
213	        // If we didn't have a kill status, someone is killing us by hand here.
214	        if (_killStatus.isOK()) {
(gdb) p this._mr
$3 = {_thread = std::unique_ptr<nspr::Thread> containing 0x0, _runtime = std::unique_ptr<JSRuntime> containing 0x7f3bafaf1020,
  _context = std::unique_ptr<JSContext> containing 0x7f3bbcb05620}
(gdb) bt
#0  raise (sig=11) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x0000557e88d92fde in mongo::(anonymous namespace)::endProcessWithSignal (signalNum=11) at src/mongo/util/signal_handlers_synchronous.cpp:91
#2  0x0000557e88d939a5 in mongo::(anonymous namespace)::abruptQuitWithAddrSignal (signalNum=11, siginfo=0x7f3bc10edf30, ucontext_erased=0x7f3bc10ede00)
    at src/mongo/util/signal_handlers_synchronous.cpp:283
#3  0x0000557e8768e51d in AsmJSFaultHandler (signum=11, info=0x7f3bc10edf30, context=0x7f3bc10ede00) at src/third_party/mozjs-45/extract/js/src/asmjs/AsmJSSignalHandlers.cpp:1159
#4  <signal handler called>
#5  0x0000557e8754e287 in mongo::mozjs::MozJSImplScope::kill (this=0x7f3bafaed020) at src/mongo/scripting/mozjs/implscope.cpp:210
#6  0x0000557e875433b6 in mongo::mozjs::MozJSScriptEngine::interruptAll (this=0x7f3bc111e520) at src/mongo/scripting/mozjs/engine.cpp:124
#7  0x0000557e88c89b4a in mongo::ServiceContext::setKillAllOperations (this=0x7f3bc1a4dc60) at src/mongo/db/service_context.cpp:294
#8  0x0000557e868d0789 in mongo::(anonymous namespace)::shutdownTask () at src/mongo/db/db.cpp:925
#9  0x0000557e868e87b7 in std::_Function_handler<void (), void (*)()>::_M_invoke(std::_Any_data const&) (__functor=...)
    at /opt/mongodbtoolchain/v2/include/c++/5.4.0/functional:1871
#10 0x0000557e86926730 in std::function<void ()>::operator()() const (this=0x7f3bc1b03920) at /opt/mongodbtoolchain/v2/include/c++/5.4.0/functional:2267
#11 0x0000557e88d8e20d in mongo::(anonymous namespace)::runTasks(std::stack<std::function<void()>, std::deque<std::function<void()>, std::allocator<std::function<void()> > > >) (
    tasks=std::stack wrapping: std::deque with 1 elements = {...}) at src/mongo/util/exit.cpp:63
#12 0x0000557e88d8e6a7 in mongo::shutdown (code=mongo::EXIT_CLEAN) at src/mongo/util/exit.cpp:142
#13 0x0000557e868d8709 in mongo::exitCleanly (code=mongo::EXIT_CLEAN) at src/mongo/util/exit.h:83
#14 0x0000557e880a0809 in mongo::(anonymous namespace)::signalProcessingThread (rotate=mongo::LogFileStatus::kNeedToRotateLogFile) at src/mongo/util/signal_handlers.cpp:200
#15 0x0000557e880a1d44 in std::_Bind_simple<void (*(mongo::LogFileStatus))(mongo::LogFileStatus)>::_M_invoke<0ul>(std::_Index_tuple<0ul>) (this=0x7f3bc15b0948)
    at /opt/mongodbtoolchain/v2/include/c++/5.4.0/functional:1531
#16 0x0000557e880a1c50 in std::_Bind_simple<void (*(mongo::LogFileStatus))(mongo::LogFileStatus)>::operator()() (this=0x7f3bc15b0948)
    at /opt/mongodbtoolchain/v2/include/c++/5.4.0/functional:1520
#17 0x0000557e880a1be0 in std::thread::_Impl<std::_Bind_simple<void (*(mongo::LogFileStatus))(mongo::LogFileStatus)> >::_M_run() (this=0x7f3bc15b0930)
    at /opt/mongodbtoolchain/v2/include/c++/5.4.0/thread:115
#18 0x0000557e88f60cf0 in std::execute_native_thread_routine (__p=<optimized out>) at ../../../../../src/gcc-5.4.0/libstdc++-v3/src/c++11/thread.cc:84
#19 0x00007f3bc31fe58e in start_thread (arg=<optimized out>) at pthread_create.c:486
#20 0x00007f3bc312d6a3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Comment by Vlad Rachev (Inactive) [ 18/Dec/18 ]

This test is being developed as part of linked ticket.

Generated at Thu Feb 08 04:49:41 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.