Loading...

XML

Word

Printable

JSON

Type: Bug
Resolution: Duplicate
Priority: Major - P3
Fix Version/s: None
Affects Version/s: None
Component/s: Diagnostics, Querying
Labels:
None

Operating System:
ALL
Steps To Reproduce:
Hide

The following script demonstrates the behavioral differences between top, the per-collection latency histograms, and the global latency histogram. Note that both the top command and the per-collection latency histograms are affected by this issue.

(function() { "use strict"; let c = db.c; c.drop(); assert.commandWorked(db.createCollection("c", {capped: true, size: 1024})); assert.writeOK(db.c.insert({_id: 1})); assert.writeOK(db.c.insert({_id: 2})); assert.writeOK(db.c.insert({_id: 3})); function getGlobalLatencyStats() { return db.serverStatus().opLatencies.reads; } function getCollectionLatencyStats(coll) { return coll.latencyStats().next().latencyStats.reads; } function getTop(coll) { return db.adminCommand({top: 1}).totals[coll.getFullName()].getmore; } // Global latency histogram from serverStatus. let oldGlobalLatency = getGlobalLatencyStats(); assert.eq(3, db.c.find().tailable(true).itcount()); let newGlobalLatency = getGlobalLatencyStats(); let globalLatencyOpsDelta = newGlobalLatency.ops - oldGlobalLatency.ops; print("global latency hist ops delta: " + globalLatencyOpsDelta); let globalLatencyDelta = newGlobalLatency.latency - oldGlobalLatency.latency; print("global latency hist latency delta (micros): " + globalLatencyDelta); // Per-collection latency histogram. let oldCollLatency = getCollectionLatencyStats(db.c); assert.eq(3, db.c.find().tailable(true).itcount()); let newCollLatency = getCollectionLatencyStats(db.c); let collLatencyOpsDelta = newCollLatency.ops - oldCollLatency.ops; print("collection latency hist ops delta: " + collLatencyOpsDelta); let collLatencyDelta = newCollLatency.latency - oldCollLatency.latency; print("collection latency hist latency delta (micros): " + collLatencyOpsDelta); // Top. let oldTop = getTop(db.c); assert.eq(3, db.c.find().tailable(true).itcount()); let newTop = getTop(db.c); let topOpsDelta = newTop.count - oldTop.count; print("top ops delta: " + topOpsDelta); let topLatencyDelta = newTop.time - oldTop.time; print("top latency delta (micros): " + topOpsDelta); }());
Show
The following script demonstrates the behavioral differences between top, the per-collection latency histograms, and the global latency histogram. Note that both the top command and the per-collection latency histograms are affected by this issue. (function() { "use strict" ; let c = db.c; c.drop(); assert .commandWorked(db.createCollection( "c" , {capped: true , size: 1024})); assert .writeOK(db.c.insert({_id: 1})); assert .writeOK(db.c.insert({_id: 2})); assert .writeOK(db.c.insert({_id: 3})); function getGlobalLatencyStats() { return db.serverStatus().opLatencies.reads; } function getCollectionLatencyStats(coll) { return coll.latencyStats().next().latencyStats.reads; } function getTop(coll) { return db.adminCommand({top: 1}).totals[coll.getFullName()].getmore; } // Global latency histogram from serverStatus. let oldGlobalLatency = getGlobalLatencyStats(); assert .eq(3, db.c.find().tailable( true ).itcount()); let newGlobalLatency = getGlobalLatencyStats(); let globalLatencyOpsDelta = newGlobalLatency.ops - oldGlobalLatency.ops; print( "global latency hist ops delta: " + globalLatencyOpsDelta); let globalLatencyDelta = newGlobalLatency.latency - oldGlobalLatency.latency; print( "global latency hist latency delta (micros): " + globalLatencyDelta); // Per-collection latency histogram. let oldCollLatency = getCollectionLatencyStats(db.c); assert .eq(3, db.c.find().tailable( true ).itcount()); let newCollLatency = getCollectionLatencyStats(db.c); let collLatencyOpsDelta = newCollLatency.ops - oldCollLatency.ops; print( "collection latency hist ops delta: " + collLatencyOpsDelta); let collLatencyDelta = newCollLatency.latency - oldCollLatency.latency; print( "collection latency hist latency delta (micros): " + collLatencyOpsDelta); // Top. let oldTop = getTop(db.c); assert .eq(3, db.c.find().tailable( true ).itcount()); let newTop = getTop(db.c); let topOpsDelta = newTop.count - oldTop.count; print( "top ops delta: " + topOpsDelta); let topLatencyDelta = newTop.time - oldTop.time; print( "top latency delta (micros): " + topOpsDelta); }());
Sprint:
Query 2017-06-19
Confidence Status:
None
Work Order:
3

Aha! Reference:
None
Tracking Level:
None
Risk Status:
None
Exec Notes:
None
Goal Name(s):
None
Goal Link:
None

The getMore path uses the AutoGetCollectionForReadCommand RAII helper, which on destruction updates Top and the appropriate per-collection latency histogram. It updates the stats by

bumping the number of recorded reads, and
recording the wall clock time between construction and destruction of the RAII object.

For getMore on awaitData cursors, however, we release the AutoGetCollectionForReadCommand while blocking. This is to ensure that we do not hold collection locks while sleeping on a condition variable for potentially long periods of time. After waking up, the AutoGetCollectionForReadCommand is reacquired. The result is that we double-count each getMore operation in the Top diagnostic output. Furthermore, the time spent blocking for awaitData is not recorded. This is inconsistent with the global operation latency histogram reported by serverStatus(), which incorporates time spent blocking for awaitData.

duplicates

SERVER-29304 Exclude time spent blocking for awaitData from latency metrics

Closed

Assignee:: David Storch
Reporter:: David Storch
Participants:: David Storch
Votes:: 0 Vote for this issue
Watchers:: 10 Start watching this issue

Created:: May 12 2017 10:36:22 PM UTC
Updated:: Jun 14 2017 10:58:32 PM UTC
Resolved:: Jun 14 2017 10:58:13 PM UTC

Details

Description

Attachments

Issue Links

Activity

People

Dates