|
Independent on the used driver (java or perl), we may get a wrong number of updated docs.
We are running MongoDB v2.0.5, 64 Bit Linux, 5 shards each 3 nodes.
Shardkey is _id.
Selecting the concerned documents through mongos looks fine:
|
mongos> db.offer.find({_id:{$in:[1059301595,1059301637]}},{clusterId:1})
|
{ "_id" : NumberLong(1059301595), "clusterId" : NumberLong("10071732130") }
|
{ "_id" : NumberLong(1059301637), "clusterId" : NumberLong("10071732130") }
|
mongos> db.offer.count({_id:{$in:[1059301595,1059301637]}},{clusterId:1})
|
2
|
Both documents are located on shard 5. There are no orphan docs on the other shards.
Step 1) connect to mongos to update these 2 docs -> OK (numer of updated docs = 2)
Step 2) connect to the primary of shard 5 to to update these 2 docs -> OK (numer of updated docs = 2)
Step 3) connect to mongos to to update these 2 docs -> FAILED (mongo reports that 4 docs have been updated, even though there are only 2 such documents)
It seems that mongo did not only found 2 docs on shard 5. Mongo counted also 1 doc on shard 1 and shard 4 (see below), even though there are not such documents.
My logs to the above steps:
|
Step 1)
|
perl -MMongoDB -MData::Dump=pp -e 'my $conn = MongoDB::Connection->new(host => "mongodb://sx210:27018"); my $coll = $conn->get_database("offerStore")->get_collection("offer"); my @ids = ("1059301595","1059301637"); @ids = map { 0+$_ } @ids; my $return = $coll->update({ _id => { q[$in] => \@ids }},{ q[$set] => { clusterId => 10071732130 }},{ safe => 1, multiple => 1 }); print pp($return) ."\n";'
|
{
|
connectionId => 930083,
|
err => undef,
|
lastOp => 5760590994173067268,
|
n => 2,
|
ok => 1,
|
...
|
|
Step 2)
|
perl -MMongoDB -MData::Dump=pp -e 'my $conn = MongoDB::Connection->new(host => "mongodb://s209:27018"); my $coll = $conn->get_database("offerStore")->get_collection("offer"); my @ids = ("1059301595","1059301637"); @ids = map { 0+$_ } @ids; my $return = $coll->update({ _id => { q[$in] => \@ids }},{ q[$set] => { clusterId => 10071732130 }},{ safe => 1, multiple => 1 }); print pp($return) ."\n";'
|
{
|
connectionId => 930105,
|
err => undef,
|
lastOp => 5760591024237838361,
|
n => 2,
|
ok => 1,
|
...
|
|
Step 3)
|
perl -MMongoDB -MData::Dump=pp -e 'my $conn = MongoDB::Connection->new(host => "mongodb://sx210:27018"); my $coll = $conn->get_database("offerStore")->get_collection("offer"); my @ids = ("1059301595","1059301637"); @ids = map { 0+$_ } @ids; my $return = $coll->update({ _id => { q[$in] => \@ids }},{ q[$set] => { clusterId => 10071732130 }},{ safe => 1, multiple => 1 }); print pp($return) ."\n";'
|
do {
|
my $a = {
|
err => undef,
|
n => 4,
|
ok => 1,
|
shardRawGLE => {
|
"offerStoreDE1/s118:27018,s126:27018,s131:27018" => {
|
connectionId => 974213,
|
err => undef,
|
lastOp => 5760594412967034883,
|
n => 1,
|
ok => 1,
|
updatedExisting => bless(do{\(my $o = 1)}, "boolean"),
|
wtime => 0,
|
},
|
"offerStoreDE2/s120:27018,s127:27018,s136:27018" => {
|
connectionId => 299967,
|
err => undef,
|
lastOp => 0,
|
n => 0,
|
ok => 1,
|
wnote => "no write has been done on this connection",
|
wtime => 0,
|
},
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018" => {
|
connectionId => 744569,
|
err => undef,
|
lastOp => 0,
|
n => 0,
|
ok => 1,
|
wnote => "no write has been done on this connection",
|
wtime => 0,
|
},
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018" => {
|
connectionId => 586373,
|
err => undef,
|
lastOp => 5760594679255007242,
|
n => 1,
|
ok => 1,
|
updatedExisting => 'fix',
|
wtime => 0,
|
},
|
"offerStoreDE5/s128:27018,s135:27018,s209:27018" => {
|
connectionId => 930971,
|
err => undef,
|
lastOp => 5760594915478208515,
|
n => 2,
|
ok => 1,
|
updatedExisting => 'fix',
|
wtime => 0,
|
},
|
},
|
shards => [
|
"offerStoreDE1/s118:27018,s126:27018,s131:27018",
|
"offerStoreDE2/s120:27018,s127:27018,s136:27018",
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018",
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018",
|
"offerStoreDE5/s128:27018,s135:27018,s209:27018",
|
],
|
updatedExisting => 'fix',
|
};
|
$a->{shardRawGLE}{"offerStoreDE4/s115:27018,s125:27018,s132:27018"}{updatedExisting} = \${$a->{shardRawGLE}{"offerStoreDE1/s118:27018,s126:27018,s131:27018"}{updatedExisting}};
|
$a->{shardRawGLE}{"offerStoreDE5/s128:27018,s135:27018,s209:27018"}{updatedExisting} = \${$a->{shardRawGLE}{"offerStoreDE1/s118:27018,s126:27018,s131:27018"}{updatedExisting}};
|
$a->{updatedExisting} = \${$a->{shardRawGLE}{"offerStoreDE1/s118:27018,s126:27018,s131:27018"}{updatedExisting}};
|
$a;
|
}
|
We can reproduce it at any time.
The java driver WriteResult.getN() method produces the same result:
final ServerAddress address = new ServerAddress("sx210",27018);//router
|
//final ServerAddress address = new ServerAddress("s209",27018);//shard5
|
final Mongo m = new Mongo(address);
|
m.setReadPreference(ReadPreference.PRIMARY);
|
final BasicDBObject query = new BasicDBObject();
|
query.put("_id", new BasicDBObject("$in", Lists.newArrayList(1059301595l, 1059301637l)));
|
|
final DBObject set = new BasicDBObject();
|
set.put("clusterId", 10071732130l);
|
final DBObject updates = new BasicDBObject("$set", set);
|
final DB db = m.getDB("offerStore");
|
|
final WriteResult wr = db.getCollection("offer").update(query, updates, false, true);
|
final int updated = wr.getN();
|
|
log.info("updated {}", Integer.valueOf(updated));
|
|
|
Great to hear.
|
|
Mongo v2.2.3 seems to solve this issue since I couldn't reproduce it! Thanks!
|
|
Yes, there are a few related issues which probably covered and were fixed by now.
|
|
It might be related to this issue (and so it's probably already fixed in v2.3.2?):
https://jira.mongodb.org/browse/SERVER-4532
|
|
Is there any progress on this issue? Please let me know if you need more logs or other info. Thanks!
|
|
Yes, I don't get the same results when I run the same update in the shell. My shell is of version 2.0.1. All mongod's and mongos'es run v2.2.0 with --keyFile option.
Btw. db.getLastError() is always null in the shell, so I need to call db.runCommand( "getlasterror" )
mongos> db.offer.update({_id:{$in:[1059301595, 1059301637]}}, {$set:{"clusterId":10071732130}}, false, true)
|
mongos> db.runCommand( "getlasterror" )
|
{
|
"singleShard" : "offerStoreDE4/s115:27018,s125:27018,s132:27018",
|
"updatedExisting" : true,
|
"n" : 2,
|
"lastOp" : {
|
"t" : 1350653030000,
|
"i" : 2
|
},
|
"connectionId" : 710211,
|
"err" : null,
|
"ok" : 1
|
}
|
mongos>
|
#Switching to Shard3, where both documents are located
connecting to: s125:27018/offerStore
|
PRIMARY> db.offer.update({_id:{$in:[1059301595, 1059301637]}}, {$set:{"clusterId":10071732130}}, false, true)
|
PRIMARY> db.runCommand( "getlasterror" )
|
{
|
"updatedExisting" : true,
|
"n" : 2,
|
"lastOp" : {
|
"t" : 1350653057000,
|
"i" : 2
|
},
|
"connectionId" : 711789,
|
"err" : null,
|
"ok" : 1
|
}
|
PRIMARY>
|
#Switching back to the router
connecting to: sx210:27018/offerStore
|
mongos> db.offer.update({_id:{$in:[1059301595, 1059301637]}}, {$set:{"clusterId":10071732130}}, false, true)
|
mongos> db.runCommand( "getlasterror" )
|
{
|
"shards" : [
|
"offerStoreDE2/172.16.254.6:27018,s127:27018,s131:27018,s136:27018",
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018",
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018"
|
],
|
"shardRawGLE" : {
|
"offerStoreDE2/172.16.254.6:27018,s127:27018,s131:27018,s136:27018" : {
|
"n" : 0,
|
"lastOp" : {
|
"t" : 1350652447000,
|
"i" : 2
|
},
|
"connectionId" : 917842,
|
"err" : null,
|
"ok" : 1
|
},
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018" : {
|
"n" : 0,
|
"lastOp" : {
|
"t" : 1350651282000,
|
"i" : 1
|
},
|
"connectionId" : 853752,
|
"err" : null,
|
"ok" : 1
|
},
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018" : {
|
"updatedExisting" : true,
|
"n" : 2,
|
"lastOp" : {
|
"t" : 1350653074000,
|
"i" : 2
|
},
|
"connectionId" : 710211,
|
"err" : null,
|
"ok" : 1
|
}
|
},
|
"n" : 2,
|
"updatedExisting" : true,
|
"err" : null,
|
"ok" : 1
|
}
|
mongos>
|
As you can see, n is always correctly 2.
When I execute the same through the java driver, n is 3 (and not 4 as the tests before). This happens also when I use WriteConcern.SAFE:
{ "serverUsed" : "sx210/172.16.64.161:27018" ,
|
"shards" : [ "offerStoreDE2/172.16.254.6:27018,s127:27018,s131:27018,s136:27018" ,
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018" ,
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018"] ,
|
"shardRawGLE" : {
|
"offerStoreDE2/172.16.254.6:27018,s127:27018,s131:27018,s136:27018" :
|
{ "n" : 1 , "lastOp" : { "$ts" : 1350653346 , "$inc" : 2} , "connectionId" : 917845 , "err" : null , "ok" : 1.0} ,
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018" :
|
{ "n" : 0 , "lastOp" : { "$ts" : 1350651185 , "$inc" : 5} , "connectionId" : 853759 , "err" : null , "ok" : 1.0} ,
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018" :
|
{ "updatedExisting" : true , "n" : 2 , "lastOp" : { "$ts" : 1350653599 , "$inc" : 2} , "connectionId" : 710062 , "err" : null , "ok" : 1.0}} ,
|
"n" : 3 ,
|
"updatedExisting" : true ,
|
"err" : null , "ok" : 1.0}
|
|
|
The state of the balancer will change as different mongos instances take up the role, and take out the distributed lock.
The parts I'm trying to see together are the update + explain for the same query; the getLastError response from java doesn't show that unfortunately. That is why getting it all from the shell, using the same mongos, at the same time is important. It also means we can increase the logging on that mongos (even one not used by any other clients) to get better diagnostic information.
If you run the same update in the shell you don't get the same results? I don't understand how that is possible; can you provide the shell version too so we can compare it to your java operation.
|
|
I forgot to mention that we removed 2 of our 5 shards since we have replaced our spindle disks by SSD's. So, don't wonder why the number of shards and the location of chunks has changed with our new setup.
|
|
I can't reproduce it in the shell. However, using the java driver, n is 4 even though only 2 docs were updated. Here is the lastError from java:
{ "serverUsed" : "sx210/172.16.64.161:27018" ,
|
"shards" : [ "offerStoreDE2/172.16.254.6:27018,s127:27018,s131:27018,s136:27018" , "offerStoreDE3/s117:27018,s124:27018,s129:27018" , "offerStoreDE4/s115:27018,s125:27018,s132:27018"] ,
|
"shardRawGLE" : {
|
"offerStoreDE2/172.16.254.6:27018,s127:27018,s131:27018,s136:27018" :
|
{ "updatedExisting" : true , "n" : 1 , "lastOp" :
|
{ "$ts" : 1349963953 , "$inc" : 12} ,
|
"connectionId" : 233666 , "err" : null , "ok" : 1.0} ,
|
"offerStoreDE3/s117:27018,s124:27018,s129:27018" :
|
{ "updatedExisting" : true , "n" : 1 , "lastOp" : { "$ts" : 1349963925 , "$inc" : 135} ,
|
"connectionId" : 192501 , "err" : null , "ok" : 1.0} ,
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018" :
|
{ "updatedExisting" : true , "n" : 2 , "lastOp" : { "$ts" : 1349964111 , "$inc" : 4} ,
|
"connectionId" : 48611 , "err" : null , "ok" : 1.0}
|
} ,
|
"n" : 4 ,
|
"updatedExisting" : true ,
|
"err" : null ,
|
"ok" : 1.0
|
}
|
The balancer was OFF:
mongos> db.locks.find().pretty();
|
{
|
"_id" : "balancer",
|
"process" : "sx177:27018:1349938171:1810986736",
|
"state" : 0,
|
"ts" : ObjectId("5076d9bc684ba0acbbae4b33"),
|
"when" : ISODate("2012-10-11T14:37:48.364Z"),
|
"who" : "sx177:27018:1349938171:1810986736:Balancer:1419796394",
|
"why" : "doing balance round"
|
}
|
{
|
"_id" : "offerStore.offer",
|
"process" : "s125:27018:1349939144:1965402520",
|
"state" : 0,
|
"ts" : ObjectId("5076da98849c662dc808689b"),
|
"when" : ISODate("2012-10-11T14:41:28.203Z"),
|
"who" : "s125:27018:1349939144:1965402520:conn43976:1087234052",
|
"why" : "split-{ _id: 1265194389 }"
|
}
|
mongos> use offerStore
|
switched to db offerStore
|
mongos> db.offer.find({_id:{$in:[1059301595, 1059301637]}}).explain()
|
{
|
"clusteredType" : "ParallelSort",
|
"shards" : {
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018" : [
|
{
|
"cursor" : "BtreeCursor _id_ multi",
|
"isMultiKey" : false,
|
"n" : 2,
|
"nscannedObjects" : 2,
|
"nscanned" : 3,
|
"nscannedObjectsAllPlans" : 2,
|
"nscannedAllPlans" : 3,
|
"scanAndOrder" : false,
|
"indexOnly" : false,
|
"nYields" : 0,
|
"nChunkSkips" : 0,
|
"millis" : 6,
|
"indexBounds" : {
|
"_id" : [
|
[
|
1059301595,
|
1059301595
|
],
|
[
|
1059301637,
|
1059301637
|
]
|
]
|
},
|
"server" : "s125:27018"
|
}
|
]
|
},
|
"cursor" : "BtreeCursor _id_ multi",
|
"n" : 2,
|
"nChunkSkips" : 0,
|
"nYields" : 0,
|
"nscanned" : 3,
|
"nscannedAllPlans" : 3,
|
"nscannedObjects" : 2,
|
"nscannedObjectsAllPlans" : 2,
|
"millisShardTotal" : 6,
|
"millisShardAvg" : 6,
|
"numQueries" : 1,
|
"numShards" : 1,
|
"indexBounds" : {
|
"_id" : [
|
[
|
1059301595,
|
1059301595
|
],
|
[
|
1059301637,
|
1059301637
|
]
|
]
|
},
|
"millis" : 18
|
}
|
mongos>
|
After having started the balancer, it keeped in state 0. Doesn't it mean that the balancer is OFF, even though stopped is set to false? Is that normal?
mongos> use config
|
switched to db config
|
mongos> db.locks.find( { _id : "balancer" } )
|
{ "_id" : "balancer", "process" : "sx177:27018:1349938171:1810986736", "state" : 0, "ts" : ObjectId("5076d9bc684ba0acbbae4b33"), "when" : ISODate("2012-10-11T14:37:48.364Z"), "who" : "sx177:27018:1349938171:1810986736:Balancer:1419796394", "why" : "doing balance round" }
|
mongos> db.settings.update( { _id: "balancer" }, { $set : { stopped: false } } , true );
|
mongos> db.locks.find( { _id : "balancer" } )
|
{ "_id" : "balancer", "process" : "sx177:27018:1349938171:1810986736", "state" : 0, "ts" : ObjectId("5076dd0b684ba0acbbae4b34"), "when" : ISODate("2012-10-11T14:51:55.044Z"), "who" : "sx177:27018:1349938171:1810986736:Balancer:1419796394", "why" : "doing balance round" }
|
mongos> db.locks.find().pretty();
|
{
|
"_id" : "balancer",
|
"process" : "sx177:27018:1349938171:1810986736",
|
"state" : 0,
|
"ts" : ObjectId("5076dd11684ba0acbbae4b35"),
|
"when" : ISODate("2012-10-11T14:52:01.130Z"),
|
"who" : "sx177:27018:1349938171:1810986736:Balancer:1419796394",
|
"why" : "doing balance round"
|
}
|
{
|
"_id" : "offerStore.offer",
|
"process" : "s125:27018:1349939144:1965402520",
|
"state" : 0,
|
"ts" : ObjectId("5076dced849c662dc808689d"),
|
"when" : ISODate("2012-10-11T14:51:25.083Z"),
|
"who" : "s125:27018:1349939144:1965402520:conn42809:218006004",
|
"why" : "split-{ _id: 1265194389 }"
|
}
|
mongos> db.settings.find()
|
{ "_id" : "chunksize", "value" : 64 }
|
{ "_id" : "balancer", "stopped" : false }
|
|
|
Can you verify that the balancer is stopped when you run the update + explain?
mongos> use config;
|
mongos> db.locks.find().pretty();
|
The java query "Lists.newArrayList(1059301595l, 1059301637l)" and the explain "db.offer.find({_id:{$in:[1059301595, 1059301637]}}).explain()" have different ids. If you can reproduce this in just the shell it would be best.
|
|
Scott, I posted the explain via mongos on 24th September already (see above please). Isn't it what you asked for?
|
|
We really need the update results and explain from the same query at the same time via mongos. Can you please supply that?
Also, can you reproduce this when the balancer is stopped?
|
|
The shard key is _id.
mongos> db.offer.find({_id:{$in:[1059301595, 1059301637]}}).explain()
|
{
|
"clusteredType" : "ParallelSort",
|
"shards" : {
|
"offerStoreDE4/s115:27018,s125:27018,s132:27018" : [
|
{
|
"cursor" : "BtreeCursor _id_ multi",
|
"isMultiKey" : false,
|
"n" : 2,
|
"nscannedObjects" : 2,
|
"nscanned" : 3,
|
"nscannedObjectsAllPlans" : 2,
|
"nscannedAllPlans" : 3,
|
"scanAndOrder" : false,
|
"indexOnly" : false,
|
"nYields" : 0,
|
"nChunkSkips" : 0,
|
"millis" : 20,
|
"indexBounds" : {
|
"_id" : [
|
[
|
1059301595,
|
1059301595
|
],
|
[
|
1059301637,
|
1059301637
|
]
|
]
|
},
|
"server" : "s115:27018"
|
}
|
]
|
},
|
"cursor" : "BtreeCursor _id_ multi",
|
"n" : 2,
|
"nChunkSkips" : 0,
|
"nYields" : 0,
|
"nscanned" : 3,
|
"nscannedAllPlans" : 3,
|
"nscannedObjects" : 2,
|
"nscannedObjectsAllPlans" : 2,
|
"millisShardTotal" : 20,
|
"millisShardAvg" : 20,
|
"numQueries" : 1,
|
"numShards" : 1,
|
"indexBounds" : {
|
"_id" : [
|
[
|
1059301595,
|
1059301595
|
],
|
[
|
1059301637,
|
1059301637
|
]
|
]
|
},
|
"millis" : 29
|
}
|
|
|
What is your shard key for that sharded collection?
Can you run an explain query with the same criteria and post it please?
|
Generated at Thu Feb 08 03:11:14 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.