Details
-
Bug
-
Status: Closed
-
Major - P3
-
Resolution: Duplicate
-
2.4.3
-
None
-
Ubuntu 12.10, 3 replica set shards, each replica set contains 3 machines
-
Linux
-
Description
After a chunk migration, there are some objects that have been moved to another shard, that are still present on the old one.
When we query these objects on a mongo shell, reading from the primaries, everything seems fine, but if we read from the secondaries, it appears that these objects are duplicated.
Shell behavior:
mongos> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("51939a0041002e238ff315d2"), "mid" : "421587931535525217_30569368", "cnt" : { "lke" : 12 } } |
|
mongos> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }).readPref('secondary') |
{ "_id" : ObjectId("51939a0041002e238ff315d2"), "mid" : "421587931535525217_30569368", "cnt" : { "lke" : 12 } } |
{ "_id" : ObjectId("51939a0041002e238ff315d2"), "mid" : "421587931535525217_30569368", "cnt" : { "lke" : 12 } } |
As you can see, there are two objects that are exactly the same when reading from secondaries.
Here are the same queries with explain():
mongos> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }).explain() |
{
|
"clusteredType" : "ParallelSort", |
"shards" : { |
"str1/str1-1.nitrogr.am:27017,str1-2.nitrogr.am:27017,str1-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor mid_1", |
"isMultiKey" : false, |
"n" : 0, |
"nscannedObjects" : 0, |
"nscanned" : 0, |
"nscannedObjectsAllPlans" : 0, |
"nscannedAllPlans" : 0, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 0, |
"millis" : 0, |
"indexBounds" : { |
"mid" : [ |
[
|
"421587931535525217_30569368", |
"421587931535525217_30569368" |
]
|
]
|
},
|
"server" : "str1-1.nitrogr.am:27017" |
}
|
],
|
"str2/str2-1.nitrogr.am:27017,str2-2.nitrogr.am:27017,str2-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor mid_1", |
"isMultiKey" : false, |
"n" : 0, |
"nscannedObjects" : 1, |
"nscanned" : 1, |
"nscannedObjectsAllPlans" : 1, |
"nscannedAllPlans" : 1, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 1, |
"millis" : 0, |
"indexBounds" : { |
"mid" : [ |
[
|
"421587931535525217_30569368", |
"421587931535525217_30569368" |
]
|
]
|
},
|
"server" : "str2-3.nitrogr.am:27017" |
}
|
],
|
"str3/str3-1.nitrogr.am:27017,str3-2.nitrogr.am:27017,str3-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor mid_1", |
"isMultiKey" : false, |
"n" : 1, |
"nscannedObjects" : 1, |
"nscanned" : 1, |
"nscannedObjectsAllPlans" : 1, |
"nscannedAllPlans" : 1, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 0, |
"millis" : 0, |
"indexBounds" : { |
"mid" : [ |
[
|
"421587931535525217_30569368", |
"421587931535525217_30569368" |
]
|
]
|
},
|
"server" : "str3-2.nitrogr.am:27017" |
}
|
]
|
},
|
"cursor" : "BtreeCursor mid_1", |
"n" : 1, |
"nChunkSkips" : 1, |
"nYields" : 0, |
"nscanned" : 2, |
"nscannedAllPlans" : 2, |
"nscannedObjects" : 2, |
"nscannedObjectsAllPlans" : 2, |
"millisShardTotal" : 0, |
"millisShardAvg" : 0, |
"numQueries" : 3, |
"numShards" : 3, |
"millis" : 3 |
}
|
|
mongos> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }).readPref('secondary').explain() |
{
|
"clusteredType" : "ParallelSort", |
"shards" : { |
"str1/str1-1.nitrogr.am:27017,str1-2.nitrogr.am:27017,str1-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor mid_1", |
"isMultiKey" : false, |
"n" : 0, |
"nscannedObjects" : 0, |
"nscanned" : 0, |
"nscannedObjectsAllPlans" : 0, |
"nscannedAllPlans" : 0, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 0, |
"millis" : 0, |
"indexBounds" : { |
"mid" : [ |
[
|
"421587931535525217_30569368", |
"421587931535525217_30569368" |
]
|
]
|
},
|
"server" : "str1-3.nitrogr.am:27017" |
}
|
],
|
"str2/str2-1.nitrogr.am:27017,str2-2.nitrogr.am:27017,str2-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor mid_1", |
"isMultiKey" : false, |
"n" : 1, |
"nscannedObjects" : 1, |
"nscanned" : 1, |
"nscannedObjectsAllPlans" : 1, |
"nscannedAllPlans" : 1, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 0, |
"millis" : 0, |
"indexBounds" : { |
"mid" : [ |
[
|
"421587931535525217_30569368", |
"421587931535525217_30569368" |
]
|
]
|
},
|
"server" : "str2-1.nitrogr.am:27017" |
}
|
],
|
"str3/str3-1.nitrogr.am:27017,str3-2.nitrogr.am:27017,str3-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor mid_1", |
"isMultiKey" : false, |
"n" : 1, |
"nscannedObjects" : 1, |
"nscanned" : 1, |
"nscannedObjectsAllPlans" : 1, |
"nscannedAllPlans" : 1, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 0, |
"millis" : 0, |
"indexBounds" : { |
"mid" : [ |
[
|
"421587931535525217_30569368", |
"421587931535525217_30569368" |
]
|
]
|
},
|
"server" : "str3-1b.nitrogr.am:27017" |
}
|
]
|
},
|
"cursor" : "BtreeCursor mid_1", |
"n" : 2, |
"nChunkSkips" : 0, |
"nYields" : 0, |
"nscanned" : 2, |
"nscannedAllPlans" : 2, |
"nscannedObjects" : 2, |
"nscannedObjectsAllPlans" : 2, |
"millisShardTotal" : 0, |
"millisShardAvg" : 0, |
"numQueries" : 3, |
"numShards" : 3, |
"millis" : 12 |
}
|
But there is another interesting thing: according to the previous queries, before the chunk migration, the object was on str2 and has been moved to str3. From what the first explained query returns, it is not present on str2:PRIMARY anymore. But here are the results of this same query on the replica set members:
// str2-1 SECONDARY
|
str2:SECONDARY> rs.slaveOk()
|
str2:SECONDARY> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("51939a0041002e238ff315d2"), "mid" : "421587931535525217_30569368", "cnt" : { "lke" : 12 } } |
|
// str2-2 SECONDARY
|
str2:SECONDARY> rs.slaveOk()
|
str2:SECONDARY> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("51939a0041002e238ff315d2"), "mid" : "421587931535525217_30569368", "cnt" : { "lke" : 12 } } |
|
// str2-3 PRIMARY
|
str2:PRIMARY> db.ig_medias.find({ mid: "421587931535525217_30569368" }, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("51939a0041002e238ff315d2"), "mid" : "421587931535525217_30569368", "cnt" : { "lke" : 12 } } |
The slt (sharding key) value of the previous object is 393022. According to sh.status(true):
{ "slt" : 392653 } -->> { "slt" : 393077 } on : str3 { "t" : 530, "i" : 0 } |
When we try to retrieve all the objects from this chunk:
mongos> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }).readPref('primary').explain() |
{
|
"clusteredType" : "ParallelSort", |
"shards" : { |
"str3/str3-1.nitrogr.am:27017,str3-2.nitrogr.am:27017,str3-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor slt_1", |
"isMultiKey" : false, |
"n" : 43791, |
"nscannedObjects" : 43791, |
"nscanned" : 43791, |
"nscannedObjectsAllPlans" : 43791, |
"nscannedAllPlans" : 43791, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 59, |
"nChunkSkips" : 0, |
"millis" : 235, |
"indexBounds" : { |
"slt" : [ |
[
|
392653,
|
393077
|
]
|
]
|
},
|
"server" : "str3-2.nitrogr.am:27017" |
}
|
]
|
},
|
"cursor" : "BtreeCursor slt_1", |
"n" : 43791, |
"nChunkSkips" : 0, |
"nYields" : 59, |
"nscanned" : 43791, |
"nscannedAllPlans" : 43791, |
"nscannedObjects" : 43791, |
"nscannedObjectsAllPlans" : 43791, |
"millisShardTotal" : 235, |
"millisShardAvg" : 235, |
"numQueries" : 1, |
"numShards" : 1, |
"indexBounds" : { |
"slt" : [ |
[
|
392653,
|
393077
|
]
|
]
|
},
|
"millis" : 254 |
}
|
mongos> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }).readPref('secondary').explain() |
{
|
"clusteredType" : "ParallelSort", |
"shards" : { |
"str3/str3-1.nitrogr.am:27017,str3-2.nitrogr.am:27017,str3-3.nitrogr.am:27017" : [ |
{
|
"cursor" : "BtreeCursor slt_1", |
"isMultiKey" : false, |
"n" : 43791, |
"nscannedObjects" : 43791, |
"nscanned" : 43791, |
"nscannedObjectsAllPlans" : 43791, |
"nscannedAllPlans" : 43791, |
"scanAndOrder" : false, |
"indexOnly" : false, |
"nYields" : 0, |
"nChunkSkips" : 0, |
"millis" : 65, |
"indexBounds" : { |
"slt" : [ |
[
|
392653,
|
393077
|
]
|
]
|
},
|
"server" : "str3-3.nitrogr.am:27017" |
}
|
]
|
},
|
"cursor" : "BtreeCursor slt_1", |
"n" : 43791, |
"nChunkSkips" : 0, |
"nYields" : 0, |
"nscanned" : 43791, |
"nscannedAllPlans" : 43791, |
"nscannedObjects" : 43791, |
"nscannedObjectsAllPlans" : 43791, |
"millisShardTotal" : 65, |
"millisShardAvg" : 65, |
"numQueries" : 1, |
"numShards" : 1, |
"indexBounds" : { |
"slt" : [ |
[
|
392653,
|
393077
|
]
|
]
|
},
|
"millis" : 102 |
}
|
Everything looks fine, but here is the result of the same queries, on str2 machines:
// str2-1 SECONDARY
|
str2:SECONDARY> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("5121d413710ea50b420debbe"), "mid" : "347964548473999677_9543635", "cnt" : { "lke" : 29 } } |
{ "_id" : ObjectId("512393f541002e238ff4a859"), "mid" : "390217151503894124_28517528", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("5131378341002e238f32144a"), "mid" : "383281284459672776_39604492", "cnt" : { "lke" : 24 } } |
{ "_id" : ObjectId("5131fecc41002e238f3b943b"), "mid" : "238527467705375711_36829133", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("512d19bc41002e238f07f548"), "mid" : "395567356503299509_279531956", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("512c87de41002e238f0030d6"), "mid" : "369210183091623223_897180", "cnt" : { "lke" : 23 } } |
{ "_id" : ObjectId("513a581d41002e238f94b99c"), "mid" : "407332724338286182_270796238", "cnt" : { "lke" : 10 } } |
{ "_id" : ObjectId("512c396241002e238ffa969a"), "mid" : "398216305239938103_312516556", "cnt" : { "lke" : 18 } } |
{ "_id" : ObjectId("515591ec41002e238f634c66"), "mid" : "422301314713088397_12787523", "cnt" : { "lke" : 13 } } |
{ "_id" : ObjectId("512c614e41002e238ffd154a"), "mid" : "397802494119832758_313004538", "cnt" : { "lke" : 27 } } |
{ "_id" : ObjectId("512e638541002e238f1974ad"), "mid" : "392182161891056449_306179277", "cnt" : { "lke" : 45 } } |
{ "_id" : ObjectId("512f128741002e238f1dc17b"), "mid" : "401136119972715099_182925853", "cnt" : { "lke" : 22 } } |
{ "_id" : ObjectId("5171d1b641002e238f630665"), "mid" : "190548751164070962_42200470", "cnt" : { "lke" : 6 } } |
{ "_id" : ObjectId("513222e341002e238f421310"), "mid" : "364344438491857520_265458215", "cnt" : { "lke" : 12 } } |
{ "_id" : ObjectId("5132b87241002e238f503297"), "mid" : "253952124281476138_47594148", "cnt" : { "lke" : 5 } } |
{ "_id" : ObjectId("5132952041002e238f4c9046"), "mid" : "293163186055939480_174301408", "cnt" : { "lke" : 19 } } |
{ "_id" : ObjectId("513336aa41002e238f55910a"), "mid" : "375606112677368687_199701635", "cnt" : { "lke" : 105 } } |
{ "_id" : ObjectId("5136f27a41002e238f707851"), "mid" : "362900259412374239_4157811", "cnt" : { "lke" : 17 } } |
{ "_id" : ObjectId("513c5cf241002e238fa0cb92"), "mid" : "351195940879395820_222487873", "cnt" : { "lke" : 46 } } |
{ "_id" : ObjectId("513c97e641002e238fa22b02"), "mid" : "350248142066526553_245941897", "cnt" : { "lke" : 42 } } |
Type "it" for more |
str2:SECONDARY> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }).count() |
40347
|
|
// str2-2 SECONDARY
|
str2:SECONDARY> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("5121d413710ea50b420debbe"), "mid" : "347964548473999677_9543635", "cnt" : { "lke" : 29 } } |
{ "_id" : ObjectId("512393f541002e238ff4a859"), "mid" : "390217151503894124_28517528", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("5131fecc41002e238f3b943b"), "mid" : "238527467705375711_36829133", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("512d19bc41002e238f07f548"), "mid" : "395567356503299509_279531956", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("512c87de41002e238f0030d6"), "mid" : "369210183091623223_897180", "cnt" : { "lke" : 23 } } |
{ "_id" : ObjectId("512e638541002e238f1974ad"), "mid" : "392182161891056449_306179277", "cnt" : { "lke" : 45 } } |
{ "_id" : ObjectId("513a581d41002e238f94b99c"), "mid" : "407332724338286182_270796238", "cnt" : { "lke" : 10 } } |
{ "_id" : ObjectId("512c396241002e238ffa969a"), "mid" : "398216305239938103_312516556", "cnt" : { "lke" : 18 } } |
{ "_id" : ObjectId("512c614e41002e238ffd154a"), "mid" : "397802494119832758_313004538", "cnt" : { "lke" : 27 } } |
{ "_id" : ObjectId("512f128741002e238f1dc17b"), "mid" : "401136119972715099_182925853", "cnt" : { "lke" : 22 } } |
{ "_id" : ObjectId("5131378341002e238f32144a"), "mid" : "383281284459672776_39604492", "cnt" : { "lke" : 24 } } |
{ "_id" : ObjectId("5171d1b641002e238f630665"), "mid" : "190548751164070962_42200470", "cnt" : { "lke" : 6 } } |
{ "_id" : ObjectId("513222e341002e238f421310"), "mid" : "364344438491857520_265458215", "cnt" : { "lke" : 12 } } |
{ "_id" : ObjectId("5132b87241002e238f503297"), "mid" : "253952124281476138_47594148", "cnt" : { "lke" : 5 } } |
{ "_id" : ObjectId("5136f27a41002e238f707851"), "mid" : "362900259412374239_4157811", "cnt" : { "lke" : 17 } } |
{ "_id" : ObjectId("5132952041002e238f4c9046"), "mid" : "293163186055939480_174301408", "cnt" : { "lke" : 19 } } |
{ "_id" : ObjectId("513336aa41002e238f55910a"), "mid" : "375606112677368687_199701635", "cnt" : { "lke" : 105 } } |
{ "_id" : ObjectId("513c5cf241002e238fa0cb92"), "mid" : "351195940879395820_222487873", "cnt" : { "lke" : 46 } } |
{ "_id" : ObjectId("513c97e641002e238fa22b02"), "mid" : "350248142066526553_245941897", "cnt" : { "lke" : 42 } } |
{ "_id" : ObjectId("513cea5241002e238fa32c6e"), "mid" : "349858663940795082_181939649", "cnt" : { "lke" : 54 } } |
Type "it" for more |
str2:SECONDARY> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }).count() |
40347
|
|
// str2-3 PRIMARY
|
str2:PRIMARY> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }) |
{ "_id" : ObjectId("5121d413710ea50b420debbe"), "mid" : "347964548473999677_9543635", "cnt" : { "lke" : 29 } } |
{ "_id" : ObjectId("512393f541002e238ff4a859"), "mid" : "390217151503894124_28517528", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("518a036141002e238fa5f264"), "mid" : "328357796146382089_13860576", "cnt" : { "lke" : 55 } } |
{ "_id" : ObjectId("515f6e8841002e238fab2ac7"), "mid" : "326812285631217041_20784622", "cnt" : { "lke" : 6 } } |
{ "_id" : ObjectId("513a581d41002e238f94b99c"), "mid" : "407332724338286182_270796238", "cnt" : { "lke" : 10 } } |
{ "_id" : ObjectId("5131fecc41002e238f3b943b"), "mid" : "238527467705375711_36829133", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("512d19bc41002e238f07f548"), "mid" : "395567356503299509_279531956", "cnt" : { "lke" : 38 } } |
{ "_id" : ObjectId("512c87de41002e238f0030d6"), "mid" : "369210183091623223_897180", "cnt" : { "lke" : 23 } } |
{ "_id" : ObjectId("512e638541002e238f1974ad"), "mid" : "392182161891056449_306179277", "cnt" : { "lke" : 45 } } |
{ "_id" : ObjectId("512c396241002e238ffa969a"), "mid" : "398216305239938103_312516556", "cnt" : { "lke" : 18 } } |
{ "_id" : ObjectId("512c614e41002e238ffd154a"), "mid" : "397802494119832758_313004538", "cnt" : { "lke" : 27 } } |
{ "_id" : ObjectId("517afcac880084345c36eb36"), "mid" : "442869895293265303_32677776", "cnt" : { "lke" : 3 } } |
{ "_id" : ObjectId("512f128741002e238f1dc17b"), "mid" : "401136119972715099_182925853", "cnt" : { "lke" : 22 } } |
{ "_id" : ObjectId("5131378341002e238f32144a"), "mid" : "383281284459672776_39604492", "cnt" : { "lke" : 24 } } |
{ "_id" : ObjectId("513222e341002e238f421310"), "mid" : "364344438491857520_265458215", "cnt" : { "lke" : 12 } } |
{ "_id" : ObjectId("5132b87241002e238f503297"), "mid" : "253952124281476138_47594148", "cnt" : { "lke" : 5 } } |
{ "_id" : ObjectId("513336aa41002e238f55910a"), "mid" : "375606112677368687_199701635", "cnt" : { "lke" : 105 } } |
{ "_id" : ObjectId("5132952041002e238f4c9046"), "mid" : "293163186055939480_174301408", "cnt" : { "lke" : 19 } } |
{ "_id" : ObjectId("5136f27a41002e238f707851"), "mid" : "362900259412374239_4157811", "cnt" : { "lke" : 17 } } |
{ "_id" : ObjectId("513c5cf241002e238fa0cb92"), "mid" : "351195940879395820_222487873", "cnt" : { "lke" : 46 } } |
Type "it" for more |
str2:PRIMARY> db.ig_medias.find({ slt: {$gte: 392653, $lt: 393077}}, { mid: 1, "cnt.lke": 1 }).count() |
40347
|
After checking all chunks, we found out that 56/1196 of them contain duplicates.
Attachments
Issue Links
- duplicates
-
SERVER-5931 Secondary reads in sharded clusters need stronger consistency
-
- Closed
-