Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-5366

balancer did not remove chunk from old shard

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 2.0.3
    • Component/s: Sharding
    • Labels:
    • Environment:
      ubuntu 10.04 lts, x86_64
    • ALL

      we have seen numerous occurrences of the balancer not removing the old chunk once it has been migrated to a new shard. this manifests in strange inconsistencies for queries broadcast to all shards.

      mongos> db.coll.find({"some_field":"x"}).count()
      14
      mongos> db.coll.find({"some_field":"x"}).next()
      Thu Mar 22 13:41:24 uncaught exception: error hasNext: false
      
      
      mongos> db.coll.find({"some_field":"x"}).explain()
      {
      	"clusteredType" : "ParallelSort",
      	"shards" : {
      		"shard2/10.176.163.134:27022,10.176.164.146:27021" : [
      			{
      				"cursor" : "BtreeCursor some_field_1",
      				"nscanned" : 14,
      				"nscannedObjects" : 14,
      				"n" : 0,
      				"millis" : 0,
      				"nYields" : 0,
      				"nChunkSkips" : 14,
      				"isMultiKey" : false,
      				"indexOnly" : false,
      				"indexBounds" : {
      					"some_field" : [
      						[
      							"x",
      							"x"
      						]
      					]
      				}
      			}
      		],
      		"shard3/10.177.210.46:27017,10.177.210.47:27017" : [
      			{
      				"cursor" : "BtreeCursor some_field_1",
      				"nscanned" : 0,
      				"nscannedObjects" : 0,
      				"n" : 0,
      				"millis" : 0,
      				"nYields" : 0,
      				"nChunkSkips" : 0,
      				"isMultiKey" : false,
      				"indexOnly" : false,
      				"indexBounds" : {
      					"some_field" : [
      						[
      							"x",
      							"x"
      						]
      					]
      				}
      			}
      		],
      		"shard4/10.176.64.155:27017,10.177.205.133:27017" : [
      			{
      				"cursor" : "BtreeCursor some_field_1",
      				"nscanned" : 0,
      				"nscannedObjects" : 0,
      				"n" : 0,
      				"millis" : 0,
      				"nYields" : 0,
      				"nChunkSkips" : 0,
      				"isMultiKey" : false,
      				"indexOnly" : false,
      				"indexBounds" : {
      					"some_field" : [
      						[
      							"x",
      							"x"
      						]
      					]
      				}
      			}
      		]
      	},
      	"n" : 0,
      	"nChunkSkips" : 14,
      	"nYields" : 0,
      	"nscanned" : 14,
      	"nscannedObjects" : 14,
      	"millisTotal" : 0,
      	"millisAvg" : 0,
      	"numQueries" : 3,
      	"numShards" : 3
      }
      

      going to shard2 directly:

      PRIMARY> db.reviews.find({"some_field":"x"}).count()
      14
      PRIMARY> db.reviews.find({"some_field":"dirty"},{my_shard_id:true})
      { "_id" : 31576707, "my_shard_id" : 13181 }
      { "_id" : 31489421, "my_shard_id" : 13187 }
      { "_id" : 31596862, "my_shard_id" : 13179 }
      { "_id" : 31616772, "my_shard_id" : 13186 }
      { "_id" : 31565191, "my_shard_id" : 13193 }
      { "_id" : 31574087, "my_shard_id" : 13184 }
      { "_id" : 31468296, "my_shard_id" : 13179 }
      { "_id" : 31434373, "my_shard_id" : 13192 }
      { "_id" : 31629660, "my_shard_id" : 13192 }
      { "_id" : 31777042, "my_shard_id" : 13184 }
      { "_id" : 31626661, "my_shard_id" : 13179 }
      { "_id" : 31344196, "my_shard_id" : 13184 }
      { "_id" : 31786861, "my_shard_id" : 13192 }
      { "_id" : 31808323, "my_shard_id" : 13188 }
      

      and from mongos:

      { "my_shard_id" : 13165 } -->> { "my_shard_id" : 13200 } on : shard4 { "t" : 68000, "i" : 0 }
      

      my only hypothesis for how this could have occurred is that the balancer migrated this chunk from shard2 to shard4 but did not remove it from shard2.

            Assignee:
            greg_10gen Greg Studer
            Reporter:
            wayne530 Y. Wayne Huang
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved: