Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-54144

Relax timing constraint in profile_operation_metrics.js

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 4.9.0
    • Affects Version/s: None
    • Component/s: None
    • Labels:
      None
    • Fully Compatible
    • ALL
    • Execution Team 2021-02-22
    • 13

      The assertMetricsExist function in profile_operation_metrics.js relies on the elapsed time measured by OperationCPUTimer, and expects a non-zero value when measuring execution time of database operations (see here).

      However, the OS clock source is only expected to return monotonically increasing values, and might retain its value for a period of time. The following repro is an example of such cases, where not only the reported elapsed time is zero, the timestamps captured by both mongod logger and resmoke indicate that the time is not advancing:

      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 [jsTest] ----
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  CONTROL  7777701 [conn1] "Started the timer","attr":{"timer":140017349984296}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  CONTROL  7777705 [conn1] "Got elapsed","attr":{"timer":140017349984296,"elapsedNanos":0}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  COMMAND  51803   [conn11] "Slow query","attr":{"type":"command","ns":"local.oplog.rs","appName":"OplogFetcher","command":{"getMore":2008406674905367600,"collection":"oplog.rs","batchSize":13981010,"maxTimeMS":5000,"term":1,"lastKnownCommittedOpTime":{"ts":{"$timestamp":{"t":1610736438,"i":3}},"t":1},"$db":"local","$replData":1,"$oplogQueryData":1,"$readPreference":{"mode":"secondaryPreferred"},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1610736438,"i":3}},"signature":{"hash":{"$binary":{"base64":"0pIxOg35PPB2fjfkjtAnD4HOq5Q=","subType":"0"}},"keyId":6918060319390564000}}},"originatingCommand":{"find":"oplog.rs","filter":{"ts":{"$gte":{"$timestamp":{"t":1610736438,"i":2}}}},"ntoreturn":13981010,"tailable":true,"awaitData":true,"term":1,"maxTimeMS":60000,"readConcern":{"level":"local","afterClusterTime":{"$timestamp":{"t":0,"i":1}}},"$db":"local","$replData":1,"$oplogQueryData":1,"$readPreference":{"mode":"secondaryPreferred"},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1610736438,"i":3}},"signature":{"hash":{"$binary":{"base64":"0pIxOg35PPB2fjfkjtAnD4HOq5Q=","subType":"0"}},"keyId":6918060319390564000}}},"planSummary":"COLLSCAN","cursorid":2008406674905367600,"keysExamined":0,"docsExamined":1,"numYields":2,"nreturned":1,"reslen":811,"locks":{"Global":{"acquireCount":{"r":3}},"Mutex":{"acquireCount":{"r":1}}},"readConcern":{"level":"local","afterClusterTime":{"$timestamp":{"t":0,"i":1}},"provenance":"clientSupplied"},"storage":{},"remote":"10.122.0.229:32975","protocol":"op_msg","durationMillis":0}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  WRITE    51803   [conn1] "Slow query","attr":{"type":"remove","ns":"profile_operation_metrics.coll","appName":"MongoDB Shell","command":{"q":{"_id":1},"limit":0},"planSummary":"IDHACK","keysExamined":1,"docsExamined":1,"ndeleted":1,"keysDeleted":1,"numYields":0,"locks":{"ParallelBatchWriterMode":{"acquireCount":{"r":1}},"ReplicationStateTransition":{"acquireCount":{"w":1}},"Global":{"acquireCount":{"w":1}},"Database":{"acquireCount":{"w":1}},"Collection":{"acquireCount":{"w":1}},"Mutex":{"acquireCount":{"r":1}}},"flowControl":{"acquireCount":1,"timeAcquiringMicros":2},"storage":{},"operationMetrics":{"docBytesRead":29,"docUnitsRead":1,"idxEntryBytesRead":3,"idxEntryUnitsRead":1,"cursorSeeks":3,"docBytesWritten":29,"docUnitsWritten":1,"idxEntryBytesWritten":3,"idxEntryUnitsWritten":1},"remote":"127.0.0.1:56621","durationMillis":0}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  CONTROL  7777704 [conn1] "Got elapsed","attr":{"timer":140017349984296,"elapsedNanos":0}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  CONTROL  7777702 [conn1] "Stopped the timer","attr":{"timer":140017349984296}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  CONTROL  7777705 [conn1] "Got elapsed","attr":{"timer":140017349984296,"elapsedNanos":0}
      [js_test:profile_operation_metrics] 2021-01-15T18:47:19.863+0000 d20021| 2021-01-15T18:47:19.862+00:00 I  COMMAND  51803   [conn1] "Slow query","attr":{"type":"remove","ns":"","appName":"MongoDB Shell","numYields":0,"locks":{"ParallelBatchWriterMode":{"acquireCount":{"r":2}},"ReplicationStateTransition":{"acquireCount":{"w":3}},"Global":{"acquireCount":{"r":1,"w":2}},"Database":{"acquireCount":{"w":2}},"Collection":{"acquireCount":{"w":2}},"Metadata":{"acquireCount":{"W":1}},"Mutex":{"acquireCount":{"r":2}}},"flowControl":{"acquireCount":1,"timeAcquiringMicros":2},"storage":{},"operationMetrics":{"docBytesRead":29,"docUnitsRead":1,"idxEntryBytesRead":3,"idxEntryUnitsRead":1,"cursorSeeks":3,"docBytesWritten":1445,"docUnitsWritten":13,"idxEntryBytesWritten":3,"idxEntryUnitsWritten":1},"remote":"127.0.0.1:56621","durationMillis":0}
      

      We should relax this constraint so that a non-advancing clock would not cause failures.

      If the value retrieved from OperationCPUTimer is required to be always increasing, we need another ticket to investigate changes to the implementation so that it provides a stronger guarantee than what is provided by the OS.

            Assignee:
            louis.williams@mongodb.com Louis Williams
            Reporter:
            amirsaman.memaripour@mongodb.com Amirsaman Memaripour
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: