[SERVER-16700] Deadlock with WiredTiger LSM Created: 31/Dec/14  Updated: 03/Mar/15  Resolved: 03/Mar/15

Status: Closed
Project: Core Server
Component/s: Aggregation Framework, Concurrency
Affects Version/s: 2.8.0-rc4
Fix Version/s: 3.0.0-rc6

Type: Bug Priority: Major - P3
Reporter: Daniel Alabi Assignee: Kaloian Manassiev
Resolution: Done Votes: 0
Labels: 28qa
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Backwards Compatibility: Fully Compatible
Operating System: ALL
Steps To Reproduce:

This repro script is equivalent in functionality to the FSM-based workload, agg_sort_external.js (which at this time of writing has yet to be merged into the master branch), in the concurrency test suite.

load('jstests/libs/parallelTester.js');
 
var getStringOfLength = (function() {
    var cache = {};
    return function(size) {
	if (!cache[size]) {
	    cache[size] = new Array(size + 1).join('x');
	}
	return cache[size];
    };
})();
 
var makeDoc = function(doc, size) {
    doc.padding = '';
    var paddingLength = size - Object.bsonsize(doc);
    doc.padding = getStringOfLength(paddingLength);
    return doc;
};
 
var main = function(tid) {
    var prefix = 'out_agg_sort_external';
    while (true) {
	var toCollName = prefix + tid;
	var cursor = db.source.aggregate([
	    { $match: { flag: true } },
	    { $sort: { rand: 1 } },
	    { $out: toCollName }
	], {
	    allowDiskUse: true
	});
	assert.eq(0, cursor.itcount());
	assert.eq(db.source.find().itcount() / 2, db[toCollName].find().itcount());
    }
};
 
var KB = 1024;
var MB = KB * KB;
var numDocs = 24 * KB;
var docSize = 12 * KB;
 
assert.lte(100 * MB, numDocs * docSize / 2);
// insert enough documents until we exceed the 100MB in-memory aggregation limit
 
var bulk = db.source.initializeUnorderedBulkOp();
for (var i = 0; i < numDocs; ++i) {
    bulk.insert(makeDoc({
	flag: i % 2 ? true : false,
	rand: Random.rand()
    }, docSize));
}
var res = bulk.execute();
assert.writeOK(res);
assert.eq(numDocs, res.nInserted);
 
var numThreads = 10;
i = 0;
for (; i < numThreads - 1; ++i) {
    var t = new ScopedThread(main, i);
    t.start();
}
 
main(i);

Participants:

 Description   

mongod seems to deadlock after a while when run using the WiredTiger LSM engine.
mongod was started with:
./mongod --port 29999 --dbpath /data/db_wt/ --storageEngine wiredTiger --wiredTigerCollectionConfigString type=lsm
and then I ran:
./mongo repro.js
See repro script above.



 Comments   
Comment by Daniel Alabi [ 01/Jan/15 ]

Backtrace after shutting down server. The backtraces you can get by attaching to the mongod process through gdb might be more interesting.

2014-12-31T14:45:42.381-0500 I -        [clientcursormon] Invariant failure getLockMode(resourceIdGlobal) != MODE_NONE src/mongo/db/concurrency/lock_state.cpp 573
2014-12-31T14:45:42.389-0500 I CONTROL  [clientcursormon] 
 0xef1019 0xe9d7e1 0xe83bc2 0x9aeb44 0x9b283b 0x9a2eb1 0x921490 0x921620 0x90b5c4 0x923e05 0xe86770 0xf3d644 0x7f9ba4bc3182 0x7f9ba3cc3efd
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"AF1019"},{"b":"400000","o":"A9D7E1"},{"b":"400000","o":"A83BC2"},{"b":"400000","o":"5AEB44"},{"b":"400000","o":"5B283B"},{"b":"400000","o":"5A2EB1"},{"b":"400000","o":"521490"},{"b":"400000","o":"521620"},{"b":"400000","o":"50B5C4"},{"b":"400000","o":"523E05"},{"b":"400000","o":"A86770"},{"b":"400000","o":"B3D644"},{"b":"7F9BA4BBB000","o":"8182"},{"b":"7F9BA3BC9000","o":"FAEFD"}],"processInfo":{ "mongodbVersion" : "2.8.0-rc4", "gitVersion" : "3ad571742911f04b307f0071979425511c4f2570", "uname" : { "sysname" : "Linux", "release" : "3.13.0-37-generic", "version" : "#64-Ubuntu SMP Mon Sep 22 21:28:38 UTC 2014", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "109C1DBF84F11B7B5494F36CF9C939A1080DB3DB" }, { "b" : "7FFF713D1000", "elfType" : 3, "buildId" : "0074678E5FFFF79F46C476077E67057161772F37" }, { "b" : "7F9BA4BBB000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "FE662C4D7B14EE804E0C1902FB55218A106BC5CB" }, { "b" : "7F9BA49B3000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "92FCF41EFE012D6186E31A59AD05BDBB487769AB" }, { "b" : "7F9BA47AF000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "C1AE4CB7195D337A77A3C689051DABAA3980CA0C" }, { "b" : "7F9BA44AB000", "path" : "/usr/lib/x86_64-linux-gnu/libstdc++.so.6", "elfType" : 3, "buildId" : "19EFDDAB11B3BF5C71570078C59F91CF6592CE9E" }, { "b" : "7F9BA41A5000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "574C6350381DA194C00FF555E0C1784618C05569" }, { "b" : "7F9BA3F8F000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "8D0AA71411580EE6C08809695C3984769F25725B" }, { "b" : "7F9BA3BC9000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "7603ABF78951CC138A4105F4516B075D859DFC9A" }, { "b" : "7F9BA4DD9000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "9F00581AB3C73E3AEA35995A0C50D24D59A01D47" } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x29) [0xef1019]
 mongod(_ZN5mongo10logContextEPKc+0xE1) [0xe9d7e1]
 mongod(_ZN5mongo15invariantFailedEPKcS1_j+0xB2) [0xe83bc2]
 mongod(_ZN5mongo10LockerImplILb0EE9lockBeginENS_10ResourceIdENS_8LockModeE+0x294) [0x9aeb44]
 mongod(_ZN5mongo10LockerImplILb0EE4lockENS_10ResourceIdENS_8LockModeEjb+0x1B) [0x9b283b]
 mongod(_ZN5mongo4Lock6DBLockC1EPNS_6LockerERKNS_10StringDataENS_8LockModeE+0x111) [0x9a2eb1]
 mongod(_ZN5mongo9AutoGetDbC1EPNS_16OperationContextERKNS_10StringDataENS_8LockModeE+0x30) [0x921490]
 mongod(_ZN5mongo24AutoGetCollectionForReadC2EPNS_16OperationContextERKSs+0x60) [0x921620]
 mongod(_ZN5mongo19GlobalCursorIdCache14timeoutCursorsEPNS_16OperationContextEi+0x114) [0x90b5c4]
 mongod(_ZN5mongo19ClientCursorMonitor3runEv+0xA5) [0x923e05]
 mongod(_ZN5mongo13BackgroundJob7jobBodyEv+0x120) [0xe86770]
 mongod(+0xB3D644) [0xf3d644]
 libpthread.so.0(+0x8182) [0x7f9ba4bc3182]
 libc.so.6(clone+0x6D) [0x7f9ba3cc3efd]
-----  END BACKTRACE  -----
2014-12-31T14:45:42.390-0500 I -        [clientcursormon] 

Generated at Thu Feb 08 03:41:58 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.