Details
-
Bug
-
Resolution: Done
-
Major - P3
-
2.5.2
-
ALL
Description
Initially this was ticketed as MCI-416, when MCI folks assumed it might be their problem. But now it's looking like a flaky test that has flown under the radar on Buildbot for a long time.
windows_64 slow_weekly_tests : replsets_prefetch_stress.js
enterprise-windows-64 slow_weekly_tests : replsets_prefetch_stress.js
More examples in https://jira.mongodb.org/browse/MCI-416
Example from August 30:
http://mci.10gen.com/ui/task/mongodb_mongo_master_windows_64_84ff1694c4bf92f3deef5c013ac9a5b912e6f96f_13_08_29_22_09_07_slow_weekly_tests_windows_64
http://buildlogs.mongodb.org/mci_0.9_windows_64/builds/676/test/slow_weekly_tests_0/replsets_prefetch_stress.js
Message: "assert.soon failed: ... msg:awaiting replication"
ReplSetTest awaitReplication: secondary #1, AMAZONA-H6QPPOM:31001, is NOT synced
|
assert.soon failed: function () {
|
try {
|
print("ReplSetTest awaitReplication: checking secondaries against timestamp " +
|
tojson(self.latest));
|
var secondaryCount = 0;
|
for (var i=0; i < self.liveNodes.slaves.length; i++) {
|
var slave = self.liveNodes.slaves[i];
|
// Continue if we're connected to an arbiter
|
if (res = slave.getDB("admin").runCommand({replSetGetStatus: 1})) {
|
if (res.myState == 7) {
|
continue;
|
}
|
}
|
++secondaryCount;
|
var name = slave.toString().substr(14); // strip "connection to "
|
print("ReplSetTest awaitReplication: checking secondary #" +
|
secondaryCount + ": " + name);
|
slave.getDB("admin").getMongo().setSlaveOk();
|
var log = slave.getDB("local")['oplog.rs'];
|
if (log.find({}).sort({'$natural': -1}).limit(1).hasNext()) {
|
var entry = log.find({}).sort({'$natural': -1}).limit(1).next();
|
var ts = entry['ts'];
|
if (self.latest.t < ts.t ||
|
(self.latest.t == ts.t && self.latest.i < ts.i)) {
|
self.latest = self.liveNodes.master.getDB("local")['oplog.rs'].
|
find({}).
|
sort({'$natural': -1}).
|
limit(1).
|
next()['ts'];
|
print("ReplSetTest awaitReplication: timestamp for " + name +
|
" is newer, resetting latest to " + tojson(self.latest));
|
return false;
|
}
|
if (!friendlyEqual(self.latest, ts)) {
|
print("ReplSetTest awaitReplication: timestamp for secondary #" +
|
secondaryCount + ", " + name + ", is " + tojson(ts) +
|
" but latest is " + tojson(self.latest));
|
print("ReplSetTest awaitReplication: last oplog entry (of " +
|
log.count() + ") for secondary #" + secondaryCount +
|
", " + name + ", is " + tojsononeline(entry));
|
print("ReplSetTest awaitReplication: secondary #" +
|
secondaryCount + ", " + name + ", is NOT synced");
|
return false;
|
}
|
print("ReplSetTest awaitReplication: secondary #" +
|
secondaryCount + ", " + name + ", is synced");
|
}
|
else {
|
print("ReplSetTest awaitReplication: waiting for secondary #" +
|
secondaryCount + ", " + name + ", to have an oplog built");
|
return false;
|
}
|
var slaveConfigVersion = slave.getDB("local")['system.replset'].findOne().version;
|
if (configVersion != slaveConfigVersion) {
|
print("ReplSetTest awaitReplication: secondary #" + secondaryCount +
|
", " + name + ", has config version #" + slaveConfigVersion +
|
", but expected config version #" + configVersion);
|
return false;
|
}
|
}
|
print("ReplSetTest awaitReplication: finished: all " + secondaryCount +
|
" secondaries synced at timestamp " + tojson(self.latest));
|
return true;
|
}
|
catch (e) {
|
print("ReplSetTest awaitReplication: caught exception: " + e);
|
// we might have a new master now
|
self.getLastOpTimeWritten();
|
print("ReplSetTest awaitReplication: resetting: timestamp for primary " +
|
self.liveNodes.master + " is " + tojson(self.latest));
|
return false;
|
}
|
}, msg:awaiting replication
|
Error: Printing Stack Trace
|
at printStackTrace (src/mongo/shell/utils.js:37:15)
|
at doassert (src/mongo/shell/assert.js:6:5)
|
at Function.assert.soon (src/mongo/shell/assert.js:174:60)
|
at ReplSetTest.awaitReplication (src/mongo/shell/replsettest.js:534:12)
|
at D:\data\mci\git@github.commongodb\mongo.git\master\jstests\slowWeekly\replsets_prefetch_stress.js:29:10
|