-
Type:
Bug
-
Resolution: Done
-
Priority:
Major - P3
-
Affects Version/s: 2.5.2
-
Component/s: Testing Infrastructure
-
ALL
-
None
-
3
-
None
-
None
-
None
-
None
-
None
-
None
Initially this was ticketed as MCI-416, when MCI folks assumed it might be their problem. But now it's looking like a flaky test that has flown under the radar on Buildbot for a long time.
windows_64 slow_weekly_tests : replsets_prefetch_stress.js
enterprise-windows-64 slow_weekly_tests : replsets_prefetch_stress.js
More examples in https://jira.mongodb.org/browse/MCI-416
Example from August 30:
http://mci.10gen.com/ui/task/mongodb_mongo_master_windows_64_84ff1694c4bf92f3deef5c013ac9a5b912e6f96f_13_08_29_22_09_07_slow_weekly_tests_windows_64
http://buildlogs.mongodb.org/mci_0.9_windows_64/builds/676/test/slow_weekly_tests_0/replsets_prefetch_stress.js
Message: "assert.soon failed: ... msg:awaiting replication"
ReplSetTest awaitReplication: secondary #1, AMAZONA-H6QPPOM:31001, is NOT synced assert.soon failed: function () { try { print("ReplSetTest awaitReplication: checking secondaries against timestamp " + tojson(self.latest)); var secondaryCount = 0; for (var i=0; i < self.liveNodes.slaves.length; i++) { var slave = self.liveNodes.slaves[i]; // Continue if we're connected to an arbiter if (res = slave.getDB("admin").runCommand({replSetGetStatus: 1})) { if (res.myState == 7) { continue; } } ++secondaryCount; var name = slave.toString().substr(14); // strip "connection to " print("ReplSetTest awaitReplication: checking secondary #" + secondaryCount + ": " + name); slave.getDB("admin").getMongo().setSlaveOk(); var log = slave.getDB("local")['oplog.rs']; if (log.find({}).sort({'$natural': -1}).limit(1).hasNext()) { var entry = log.find({}).sort({'$natural': -1}).limit(1).next(); var ts = entry['ts']; if (self.latest.t < ts.t || (self.latest.t == ts.t && self.latest.i < ts.i)) { self.latest = self.liveNodes.master.getDB("local")['oplog.rs']. find({}). sort({'$natural': -1}). limit(1). next()['ts']; print("ReplSetTest awaitReplication: timestamp for " + name + " is newer, resetting latest to " + tojson(self.latest)); return false; } if (!friendlyEqual(self.latest, ts)) { print("ReplSetTest awaitReplication: timestamp for secondary #" + secondaryCount + ", " + name + ", is " + tojson(ts) + " but latest is " + tojson(self.latest)); print("ReplSetTest awaitReplication: last oplog entry (of " + log.count() + ") for secondary #" + secondaryCount + ", " + name + ", is " + tojsononeline(entry)); print("ReplSetTest awaitReplication: secondary #" + secondaryCount + ", " + name + ", is NOT synced"); return false; } print("ReplSetTest awaitReplication: secondary #" + secondaryCount + ", " + name + ", is synced"); } else { print("ReplSetTest awaitReplication: waiting for secondary #" + secondaryCount + ", " + name + ", to have an oplog built"); return false; } var slaveConfigVersion = slave.getDB("local")['system.replset'].findOne().version; if (configVersion != slaveConfigVersion) { print("ReplSetTest awaitReplication: secondary #" + secondaryCount + ", " + name + ", has config version #" + slaveConfigVersion + ", but expected config version #" + configVersion); return false; } } print("ReplSetTest awaitReplication: finished: all " + secondaryCount + " secondaries synced at timestamp " + tojson(self.latest)); return true; } catch (e) { print("ReplSetTest awaitReplication: caught exception: " + e); // we might have a new master now self.getLastOpTimeWritten(); print("ReplSetTest awaitReplication: resetting: timestamp for primary " + self.liveNodes.master + " is " + tojson(self.latest)); return false; } }, msg:awaiting replication Error: Printing Stack Trace at printStackTrace (src/mongo/shell/utils.js:37:15) at doassert (src/mongo/shell/assert.js:6:5) at Function.assert.soon (src/mongo/shell/assert.js:174:60) at ReplSetTest.awaitReplication (src/mongo/shell/replsettest.js:534:12) at D:\data\mci\git@github.commongodb\mongo.git\master\jstests\slowWeekly\replsets_prefetch_stress.js:29:10