Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-65596

Server crashes when maxproc limit is reached

    • Type: Icon: Bug Bug
    • Resolution: Won't Fix
    • Priority: Icon: Minor - P4 Minor - P4
    • None
    • Affects Version/s: 5.2.0
    • Component/s: None
    • Labels:
      None
    • Service Arch
    • ALL
    • Hide

      The following code snippet for the Node.js Driver could reliably reproduce the issue for me but I think the important part is the max/min PoolSize being set to a number equal to or greater than the system's ulimt -u value.

      const mdb = require('mongodb');
      const { MongoClient } = mdb;
      const client = new MongoClient(process.env.MONGODB_URI, {
        maxPoolSize: 4176,
        minPoolSize: 4176
      });
      async function main() {
        await client.connect();
        const collection = client.db('test_db').collection('test_collection');
        await collection.insertOne({ a: 2.3 });
        return await collection.find({ a: 2.3 }).toArray();
      }
      main().then(console.log).catch(console.error);
      
      Show
      The following code snippet for the Node.js Driver could reliably reproduce the issue for me but I think the important part is the max/min PoolSize being set to a number equal to or greater than the system's ulimt -u value. const mdb = require( 'mongodb' ); const { MongoClient } = mdb; const client = new MongoClient(process.env.MONGODB_URI, { maxPoolSize: 4176, minPoolSize: 4176 }); async function main() { await client.connect(); const collection = client.db( 'test_db' ).collection( 'test_collection' ); await collection.insertOne({ a: 2.3 }); return await collection.find({ a: 2.3 }).toArray(); } main().then(console.log). catch (console.error);

      This issue is potentially tolerable / expected. I've encountered a scenario where the server crashes when it's under a load that is greater than the system's resources will permit. My maxproc value is capped at 4176 and asking a driver to create a pool size larger than that number will cause the server to exit with the attached backtrace. I was using with a 3 node replicaset and arbiter.

      Is this something that should be caught and handled? Like preventing new connections or something similar.

      OS: Macos 12.3.1 Darwin 21.4.0 Darwin Kernel Version 21.4.0: Fri Mar 18 00:45:05 PDT 2022; root:xnu-8020.101.4~15/RELEASE_X86_64 x86_64

      Server:

      {
          "version": "5.2.0",
          "gitVersion": "c930ce7cdda51b0aec05dddbab88b7468902d24e",
          "modules": [
              "enterprise"
          ],
          "allocator": "system",
          "environment": {
              "distarch": "x86_64",
              "target_arch": "x86_64"
          }
      }
      
      _ZN5mongo18stack_trace_detail12_GLOBAL__N_119printStackTraceImplERKNS1_7OptionsEPNS_14StackTraceSinkE
      _ZN5mongo15printStackTraceEv
      _ZN5mongo12_GLOBAL__N_111myTerminateEv
      _ZSt11__terminatePFvvE
      _ZSt9terminatev
      _ZN5mongo10ThreadPool4Impl8scheduleENS_15unique_functionIFvNS_6StatusEEEE
      _ZN5mongo10ThreadPool8scheduleENS_15unique_functionIFvNS_6StatusEEEE
      _ZN5mongo4repl16OplogApplierImpl16_applyOplogBatchEPNS_16OperationContextENSt3__16vectorINS0_10OplogEntryENS4_9allocatorIS6_EEEE
      _ZN5mongo4repl16OplogApplierImpl4_runEPNS0_11OplogBufferE
      _ZZN5mongo15unique_functionIFvRKNS_8executor12TaskExecutor12CallbackArgsEEE8makeImplIZNS_4repl12OplogApplier7startupEvE3$_0EEDaOT_EN12SpecificImpl4callES5_
      _ZN5mongo8executor22ThreadPoolTaskExecutor11runCallbackENSt3__110shared_ptrINS1_13CallbackStateEEE
      _ZZN5mongo15unique_functionIFvNS_6StatusEEE8makeImplIZNS_8executor22ThreadPoolTaskExecutor23scheduleIntoPool_inlockEPNSt3__14listINS7_10shared_ptrINS6_13CallbackStateEEENS7_9allocatorISB_EEEERKNS7_15__list_iteratorISB_PvEESK_NS7_11unique_lockINS_12latch_detail5LatchEEEE3$_3EEDaOT_EN12SpecificImpl4callEOS1_
      _ZN5mongo10ThreadPool4Impl10_doOneTaskEPNSt3__111unique_lockINS_12latch_detail5LatchEEE
      _ZN5mongo10ThreadPool4Impl13_consumeTasksEv
      _ZN5mongo10ThreadPool4Impl17_workerThreadBodyERKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEE
      _ZNSt3__114__thread_proxyINS_5tupleIJNS_10unique_ptrINS_15__thread_structENS_14default_deleteIS3_EEEEZN5mongo4stdx6threadC1IZNS7_10ThreadPool4Impl25_startWorkerThread_inlockEvE4$_21JELi0EEET_DpOT0_EUlvE_EEEEEPvSK_
      _pthread_start
      thread_start
      

            Assignee:
            backlog-server-servicearch [DO NOT USE] Backlog - Service Architecture
            Reporter:
            neal.beeken@mongodb.com Neal Beeken
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

              Created:
              Updated:
              Resolved: