|
I did this. But we have the same issue. It is happening for one node only.
Error on this node :
Tue Jan 13 09:35:14.919 [rsHealthPoll] couldn't connect to 10.98.2.20:27017: couldn't connect to server 10.98.2.20:27017
|
Tue Jan 13 09:35:14.919 BackgroundJob starting: ConnectBG
|
Tue Jan 13 09:35:15.575 [rsHealthPoll] replset info 10.98.2.22:27017 heartbeat failed, retrying
|
Tue Jan 13 09:35:15.575 BackgroundJob starting: ConnectBG
|
Tue Jan 13 09:35:15.758 [rsHealthPoll] couldn't connect to 10.98.2.21:27017: couldn't connect to server 10.98.2.21:27017
|
Tue Jan 13 09:35:15.927 [rsHealthPoll] couldn't connect to 10.98.2.20:27017: couldn't connect to server 10.98.2.20:27017
|
Tue Jan 13 09:35:15.927 BackgroundJob starting: ConnectBG
|
Tue Jan 13 09:35:16.026 [conn7440] run command admin.$cmd { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.2.21:27017" }
|
Tue Jan 13 09:35:16.026 [conn7440] command admin.$cmd command: { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.2.21:27017" } ntoreturn:1 keyUpdates:0 reslen:178 0ms
|
Tue Jan 13 09:35:16.098 [conn7437] run command admin.$cmd { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.141.120:27017" }
|
Tue Jan 13 09:35:16.098 [conn7437] command admin.$cmd command: { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.141.120:27017" } ntoreturn:1 keyUpdates:0 reslen:158 0ms
|
Tue Jan 13 09:35:16.284 [conn7441] run command admin.$cmd { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.2.22:27017" }
|
Tue Jan 13 09:35:16.284 [conn7441] command admin.$cmd command: { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.2.22:27017" } ntoreturn:1 keyUpdates:0 reslen:178 0ms
|
Tue Jan 13 09:35:16.593 [rsHealthPoll] couldn't connect to 10.98.2.22:27017: couldn't connect to server 10.98.2.22:27017
|
Tue Jan 13 09:35:16.593 BackgroundJob starting: ConnectBG
|
Tue Jan 13 09:35:16.795 [conn7439] run command admin.$cmd { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.141.170:27017" }
|
Tue Jan 13 09:35:16.795 [conn7439] command admin.$cmd command: { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.141.170:27017" } ntoreturn:1 keyUpdates:0 reslen:158 0ms
|
Tue Jan 13 09:35:16.834 [conn7438] run command admin.$cmd { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.2.20:27017" }
|
Tue Jan 13 09:35:16.834 [conn7438] command admin.$cmd command: { replSetHeartbeat: "ecset01", v: 29, pv: 1, checkEmpty: false, from: "10.98.2.20:27017" } ntoreturn:1 keyUpdates:0 reslen:178 0ms
|
Tue Jan 13 09:35:16.940 BackgroundJob starting: ConnectBG
|
Tue Jan 13 09:35:17.601 [rsHealthPoll] couldn't connect to 10.98.2.22:27017: couldn't connect to server 10.98.2.22:27017
|
Tue Jan 13 09:35:17.601 BackgroundJob starting: ConnectBG
|
|
|
Here is the information from
ecset01:PRIMARY> db.slaves.find().pretty();
{
"_id" : ObjectId("539639a230fdaa2e0a1a6252"),
"config" :
{
"_id" : 4,
"host" : "10.98.141.170:27017",
"priority" : 0,
"hidden" : true
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1403479862,
"i" : 20
}
}
{
"_id" : ObjectId("502377d7973506627217ad2c"),
"config" :
{
"_id" : 1,
"host" : "10.98.2.21:27017"
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1403640972,
"i" : 48
}
}
{
"_id" : ObjectId("50237f40811578c8407c4013"),
"config" :
{
"_id" : 2,
"host" : "10.98.2.22:27017"
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1403434502,
"i" : 6
}
}
{
"_id" : ObjectId("53a7ffeff9dec7f137283a2c"),
"config" :
{
"_id" : 4,
"host" : "10.98.141.170:27017",
"priority" : 0,
"hidden" : true
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1403687295,
"i" : 1
}
}
{
"_id" : ObjectId("53a8d6f696ad4ac732a931d8"),
"config" :
{
"_id" : 2,
"host" : "10.98.2.22:27017"
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1420920928,
"i" : 1
}
}
{
"_id" : ObjectId("53aa302c71e48a32f08ebb5d"),
"config" :
{
"_id" : 1,
"host" : "10.98.2.21:27017"
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1403687283,
"i" : 27
}
}
{
"_id" : ObjectId("53ab0302ec35a48fd0312dfc"),
"config" :
{
"_id" : 4,
"host" : "10.98.141.170:27017",
"priority" : 0,
"hidden" : true
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1421102614,
"i" : 2
}
}
{
"_id" : ObjectId("53d6e1e4405531a76ae81db2"),
"config" :
{
"_id" : 5,
"host" : "10.98.141.120:27017",
"priority" : 0,
"slaveDelay" : 86400,
"hidden" : true
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1418881657,
"i" : 50
}
}
{
"_id" : ObjectId("549478f8e2a0e87c18167a7f"),
"config" :
{
"_id" : 5,
"host" : "10.98.141.120:27017",
"priority" : 0,
"slaveDelay" : 86400,
"hidden" : true
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1420804683,
"i" : 18
}
}
{
"_id" : ObjectId("54b1c8274385133608f1c691"),
"config" :
{
"_id" : 6,
"host" : "10.10.14.158:27017",
"priority" : 0,
"slaveDelay" : 86400,
"hidden" : true
}
,
"ns" : "local.oplog.rs",
"syncedTo" :
{
"t" : 1421080215,
"i" : 2
}
}
ecset01:PRIMARY>
But ecset01:PRIMARY> rs.conf()
{
"_id" : "ecset01",
"version" : 26,
"members" : [
{
"_id" : 1,
"host" : "10.98.2.21:27017",
"priority" : 2
}
,
{
"_id" : 2,
"host" : "10.98.2.22:27017",
"priority" : 3
}
,
{
"_id" : 3,
"host" : "10.98.2.20:27017",
"priority" : 4
}
,
{
"_id" : 4,
"host" : "10.98.141.170:27017",
"priority" : 0,
"hidden" : true
}
,
{
"_id" : 5,
"host" : "10.98.141.120:27017",
"priority" : 0,
"slaveDelay" : 86400,
"hidden" : true
}
,
{
"_id" : 6,
"host" : "10.10.14.158:27017",
"priority" : 0,
"hidden" : true
}
]
}
This is production database. Please confirm that dropping "slaves" collection on Primary will not cause any issue.
|