|
Looking at http://buildlogs.mongodb.org/Solaris-SmartOS%2064-bit/builds/774/test/recent%20failures/sync_passive2.js:
ReplSetTest awaitReplication: checking secondaries against timestamp { "t" : 1356355104000, "i" : 1 }
|
ReplSetTest awaitReplication: checking secondary #1: bs-smartos-x86-64-1.10gen.cc:31002
|
ReplSetTest awaitReplication: secondary #1, bs-smartos-x86-64-1.10gen.cc:31002, is synced
|
ReplSetTest awaitReplication: checking secondary #2: bs-smartos-x86-64-1.10gen.cc:31003
|
ReplSetTest awaitReplication: timestamp for secondary #2, bs-smartos-x86-64-1.10gen.cc:31003, is { "t" : 1356355054000, "i" : 4525 } but latest is { "t" : 1356355104000, "i" : 1 }
|
ReplSetTest awaitReplication: last oplog entry (of 20001) for secondary #2, bs-smartos-x86-64-1.10gen.cc:31003, is { "ts" : { "t" : 1356355054000, "i" : 4525 }, "h" : NumberLong("2924272236560738974"), "v" : 2, "op" : "i", "ns" : "test.foo", "o" : { "_id" : ObjectId("50d855ee70be68d67b74d5ab"), "x" : 9999, "foo" : "bar", "msg" : "all the talk on the market", "date" : [ ISODate("2012-12-24T13:17:34.745Z"), ISODate("2012-12-24T13:17:34.745Z"), ISODate("2012-12-24T13:17:34.745Z") ] } }
|
ReplSetTest awaitReplication: secondary #2, bs-smartos-x86-64-1.10gen.cc:31003, is NOT synced
|
m31003| Mon Dec 24 13:19:04.539 [rsSyncNotifier] replset setting oplog notifier to bs-smartos-x86-64-1.10gen.cc:31000
|
m31002| Mon Dec 24 13:19:04.539 [conn11] end connection 165.225.128.186:47553 (6 connections now open)
|
m31000| Mon Dec 24 13:19:04.539 [initandlisten] connection accepted from 165.225.128.186:48071 #27 (11 connections now open)
|
ReplSetTest awaitReplication: checking secondaries against timestamp { "t" : 1356355104000, "i" : 1 }
|
ReplSetTest awaitReplication: checking secondary #1: bs-smartos-x86-64-1.10gen.cc:31002
|
ReplSetTest awaitReplication: secondary #1, bs-smartos-x86-64-1.10gen.cc:31002, is synced
|
ReplSetTest awaitReplication: checking secondary #2: bs-smartos-x86-64-1.10gen.cc:31003
|
ReplSetTest awaitReplication: secondary #2, bs-smartos-x86-64-1.10gen.cc:31003, is synced
|
ReplSetTest awaitReplication: checking secondary #3: bs-smartos-x86-64-1.10gen.cc:31004
|
ReplSetTest awaitReplication: secondary #3, bs-smartos-x86-64-1.10gen.cc:31004, is synced
|
ReplSetTest awaitReplication: finished: all 3 secondaries synced at timestamp { "t" : 1356355104000, "i" : 1 }
|
{
|
"set" : "sync_passive2",
|
"date" : ISODate("2012-12-24T13:19:04Z"),
|
"myState" : 1,
|
"members" : [
|
{
|
"_id" : 0,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31000",
|
"health" : 1,
|
"state" : 1,
|
"stateStr" : "PRIMARY",
|
"uptime" : 67,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"self" : true
|
},
|
{
|
"_id" : 1,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31001",
|
"health" : 1,
|
"state" : 7,
|
"stateStr" : "ARBITER",
|
"uptime" : 40,
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:04Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0
|
},
|
{
|
"_id" : 2,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31002",
|
"health" : 1,
|
"state" : 2,
|
"stateStr" : "SECONDARY",
|
"uptime" : 40,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:04Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0,
|
"lastHeartbeatMessage" : "syncing to: bs-smartos-x86-64-1.10gen.cc:31000"
|
},
|
{
|
"_id" : 3,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31003",
|
"health" : 1,
|
"state" : 2,
|
"stateStr" : "SECONDARY",
|
"uptime" : 40,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:04Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0,
|
"lastHeartbeatMessage" : "syncing to: bs-smartos-x86-64-1.10gen.cc:31000"
|
},
|
{
|
"_id" : 4,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31004",
|
"health" : 1,
|
"state" : 2,
|
"stateStr" : "SECONDARY",
|
"uptime" : 40,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:04Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0,
|
"lastHeartbeatMessage" : "syncing to: bs-smartos-x86-64-1.10gen.cc:31000"
|
}
|
],
|
"ok" : 1
|
}
|
force 2 to sync from 3
|
m31002| Mon Dec 24 13:19:04.673 [conn2] attempting to sync from bs-smartos-x86-64-1.10gen.cc:31003, but its latest opTime is 1356355054 and ours is 1356355104 so this may not work
|
{
|
"set" : "sync_passive2",
|
"date" : ISODate("2012-12-24T13:19:04Z"),
|
"myState" : 2,
|
"syncingTo" : "bs-smartos-x86-64-1.10gen.cc:31000",
|
"members" : [
|
{
|
"_id" : 0,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31000",
|
"health" : 1,
|
"state" : 1,
|
"stateStr" : "PRIMARY",
|
"uptime" : 39,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:03Z"),
|
"lastHeartbeatRecv" : ISODate("2012-12-24T13:19:04Z"),
|
"pingMs" : 0
|
},
|
{
|
"_id" : 1,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31001",
|
"health" : 1,
|
"state" : 7,
|
"stateStr" : "ARBITER",
|
"uptime" : 39,
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:03Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0
|
},
|
{
|
"_id" : 2,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31002",
|
"health" : 1,
|
"state" : 2,
|
"stateStr" : "SECONDARY",
|
"uptime" : 73,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"errmsg" : "syncing to: bs-smartos-x86-64-1.10gen.cc:31000",
|
"self" : true
|
},
|
{
|
"_id" : 3,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31003",
|
"health" : 1,
|
"state" : 2,
|
"stateStr" : "SECONDARY",
|
"uptime" : 39,
|
"optime" : {
|
"t" : 1356355054000,
|
"i" : 4525
|
},
|
"optimeDate" : ISODate("2012-12-24T13:17:34Z"),
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:03Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0,
|
"lastHeartbeatMessage" : "syncing to: bs-smartos-x86-64-1.10gen.cc:31000"
|
},
|
{
|
"_id" : 4,
|
"name" : "bs-smartos-x86-64-1.10gen.cc:31004",
|
"health" : 1,
|
"state" : 2,
|
"stateStr" : "SECONDARY",
|
"uptime" : 39,
|
"optime" : {
|
"t" : 1356355104000,
|
"i" : 1
|
},
|
"optimeDate" : ISODate("2012-12-24T13:18:24Z"),
|
"lastHeartbeat" : ISODate("2012-12-24T13:19:03Z"),
|
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
|
"pingMs" : 0,
|
"lastHeartbeatMessage" : "syncing to: bs-smartos-x86-64-1.10gen.cc:31000"
|
}
|
],
|
"ok" : 1
|
}
|
m31002| Mon Dec 24 13:19:04.884 [rsBackgroundSync] replSet syncing to: bs-smartos-x86-64-1.10gen.cc:31003 by request
|
m31000| Mon Dec 24 13:19:04.884 [conn22] end connection 165.225.128.186:43498 (10 connections now open)
|
m31003| Mon Dec 24 13:19:04.885 [initandlisten] connection accepted from 165.225.128.186:58331 #19 (6 connections now open)
|
m31002| Mon Dec 24 13:19:04.885 [rsBackgroundSync] replSet syncing to: bs-smartos-x86-64-1.10gen.cc:31000
|
The problem is not with replication, but with the nodes tracking each others' opTimes. The logged information from awaitReplication() and replSetGetStatus shows that the primary and all three secondaries were synced at opTime:
{ "t" : 1356355104000, "i" : 1 }
, but when we call replSetGetStatus on bs-smartos-x86-64-1.10gen.cc:31002, it reports that bs-smartos-x86-64-1.10gen.cc:31003 has opTime:
{ "t" : 1356355054000, "i" : 4525 }
. It is wrong.
This doesn't look like a bug in the test. When node[2] gets the replSetSyncFrom command, it is using stale information about node[3] to decide how to respond to the command.
|