Description
sh.stopBalancer() accepts a timeout and interval, and passes them to sh.waitForBalancer(), which in turn passes them to sh.waitForBalancerOff():
sh.stopBalancer = function( timeout, interval ) { |
sh.setBalancerState( false ) |
sh.waitForBalancer( false, timeout, interval ) |
}
|
|
|
sh.waitForBalancer = function( onOrNot, timeout, interval ){ |
|
// If we're waiting for the balancer to turn on or switch state or |
// go to a particular state |
if( onOrNot ){ |
// Just wait for the balancer lock to change, can't ensure we'll ever see it |
// actually locked |
sh.waitForDLock( "balancer", undefined, timeout, interval ) |
}
|
else { |
// Otherwise we need to wait until we're sure balancing stops |
sh.waitForBalancerOff( timeout, interval )
|
}
|
|
}
|
However, sh.waitForBalancerOff does not pass these values through to sh.waitForDLock, instead passing a hardcoded value of 15 minutes:
sh.waitForBalancerOff = function( timeout, interval ){ |
|
var pings = db.getSisterDB( "config" ).mongos.find().toArray() |
var activePings = [] |
for( var i = 0; i < pings.length; i++ ){ |
if( ! pings[i].waiting ) activePings.push( pings[i] ) |
}
|
|
print( "Waiting for active hosts..." ) |
|
activePings = sh.waitForPingChange( activePings, 60 * 1000 )
|
|
// After 1min, we assume that all hosts with unchanged pings are either |
// offline (this is enough time for a full errored balance round, if a network |
// issue, which would reload settings) or balancing, which we wait for next |
// Legacy hosts we always have to wait for |
|
print( "Waiting for the balancer lock..." ) |
|
// Wait for the balancer lock to become inactive |
// We can guess this is stale after 15 mins, but need to double-check manually |
try{ |
sh.waitForDLock( "balancer", false, 15 * 60 * 1000 ) |
}
|
catch( e ){ |
print( "Balancer still may be active, you must manually verify this is not the case using the config.changelog collection." ) |
throw Error(e); |
}
|
|
print( "Waiting again for active hosts after balancer is off..." ) |
|
// Wait a short time afterwards, to catch the host which was balancing earlier |
activePings = sh.waitForPingChange( activePings, 5 * 1000 )
|
|
// Warn about all the stale host pings remaining |
for( var i = 0; i < activePings.length; i++ ){ |
print( "Warning : host " + activePings[i]._id + " seems to have been offline since " + activePings[i].ping ) |
}
|
|
}
|
The 15 minute timeout should be a default which can be overridden, and the interval should be respected, i.e.:
diff --git a/src/mongo/shell/utils_sh.js b/src/mongo/shell/utils_sh.js
|
index d9c05a3..f9215bb 100644
|
--- a/src/mongo/shell/utils_sh.js
|
+++ b/src/mongo/shell/utils_sh.js
|
@@ -225,7 +225,7 @@ sh.waitForBalancerOff = function( timeout, interval ){
|
// Wait for the balancer lock to become inactive
|
// We can guess this is stale after 15 mins, but need to double-check manually
|
try{
|
- sh.waitForDLock( "balancer", false, 15 * 60 * 1000 )
|
+ sh.waitForDLock( "balancer", false, timeout || 15 * 60 * 1000, interval )
|
}
|
catch( e ){
|
print( "Balancer still may be active, you must manually verify this is not the case using the config.changelog collection." ) |