Details
-
Bug
-
Resolution: Done
-
Major - P3
-
None
-
2.5.0
-
None
-
ALL
Description
Currently not a real threat because the 2 different orders of lock acquisition doesn't cross paths. Here's the detailed report from the user (source: https://groups.google.com/forum/?hl=en-US&fromgroups=#!topic/mongodb-dev/g1BULoCq1ck):
I cherry-picked 2d0f6cc onto v2.2, and added some extra debug printing in
this branch:
https://github.com/leifwalsh/mongo/compare/mongodb:v2.2...SERVER-6972-backportI also wrote a perl script to understand the locking and show the first
and second stacktraces for the first observation of each lock ordering:
https://gist.github.com/5732226This script parses the output of `./mongo --nodb
jstests/sharding/remove2.js` and the first time a thread establishes an
ordering between locks A and B, it shows the most recent stack traces
where A was locked, and the current stacktrace where we're locking B.
Hopefully you can reproduce this if you try, but here is what I got:
https://gist.github.com/5732253If I addr2line these, I get these stacks for the establishment of the
ordering ReplicaSetMonitor before DBConnectionPool (which I understand
from your past emails to be the invalid one):ReplicaSetMonitor is taken at
mongo::printStackTrace(std::ostream&) at /home/leif/git/mongo-vanilla/src/mongo/util/stacktrace.cpp:39
mongo::MutexDebugger::entering(char const*) at /home/leif/git/mongo-vanilla/src/mongo/util/concurrency/mutexdebugger.h:77
mongo::mutex::scoped_lock::scoped_lock(mongo::mutex&) at /home/leif/git/mongo-vanilla/src/mongo/util/concurrency/mutex.h:108
mongo::ReplicaSetMonitor::remove(std::string const&, bool) at /home/leif/git/mongo-vanilla/src/mongo/client/dbclient_rs.cpp:338
mongo::dbgrid_cmds::RemoveShardCmd::run(std::string const&, mongo::BSONObj&, int, std::string&, mongo::BSONObjBuilder&, bool) at /home/leif/git/mongo-vanilla/src/mongo/s/commands_admin.cpp:1072
mongo::Command::runAgainstRegistered(char const*, mongo::BSONObj&, mongo::BSONObjBuilder&, int) at /home/leif/git/mongo-vanilla/src/mongo/s/commands_public.cpp:1665 (discriminator 1)
mongo::SingleStrategy::queryOp(mongo::Request&) at /home/leif/git/mongo-vanilla/src/mongo/s/strategy_single.cpp:55 (discriminator 1)
mongo::ShardStrategy::queryOp(mongo::Request&) at /home/leif/git/mongo-vanilla/src/mongo/s/strategy_shard.cpp:49
mongo::Request::process(int) at /home/leif/git/mongo-vanilla/src/mongo/s/request.cpp:140
mongo::ShardedMessageHandler::process(mongo::Message&, mongo::AbstractMessagingPort*, mongo::LastError*) at /home/leif/git/mongo-vanilla/src/mongo/s/server.cpp:108
mongo::pms::threadRun(mongo::MessagingPort*) at /home/leif/git/mongo-vanilla/src/mongo/util/net/message_server_port.cpp:86and then DBConnectionPool is taken at
mongo::printStackTrace(std::ostream&) at /home/leif/git/mongo-vanilla/src/mongo/util/stacktrace.cpp:39
mongo::MutexDebugger::entering(char const*) at /home/leif/git/mongo-vanilla/src/mongo/util/concurrency/mutexdebugger.h:77
mongo::mutex::scoped_lock::scoped_lock(mongo::mutex&) at /home/leif/git/mongo-vanilla/src/mongo/util/concurrency/mutex.h:108
mongo::DBConnectionPool::removeHost(std::string const&) at /home/leif/git/mongo-vanilla/src/mongo/client/connpool.cpp:251
mongo::ReplicaSetMonitor::~ReplicaSetMonitor() at /home/leif/git/mongo-vanilla/src/mongo/client/dbclient_rs.cpp:258 (discriminator 1)
void boost::checked_delete<mongo::ReplicaSetMonitor>(mongo::ReplicaSetMonitor*) at /home/leif/git/mongo-vanilla/src/third_party/boost/boost/checked_delete.hpp:39 (discriminator 1)
boost::detail::sp_counted_impl_p<mongo::ReplicaSetMonitor>::dispose() at /home/leif/git/mongo-vanilla/src/third_party/boost/boost/smart_ptr/detail/sp_counted_impl.hpp:79
boost::detail::sp_counted_base::release() at /home/leif/git/mongo-vanilla/src/third_party/boost/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:146
boost::detail::shared_count::~shared_count() at /home/leif/git/mongo-vanilla/src/third_party/boost/boost/smart_ptr/detail/shared_count.hpp:309
boost::shared_ptr<mongo::ReplicaSetMonitor>::~shared_ptr() at /home/leif/git/mongo-vanilla/src/third_party/boost/boost/smart_ptr/shared_ptr.hpp:164
std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >::~pair() at /usr/include/c++/4.8.1/bits/stl_pair.h:96 (discriminator 1)
__gnu_cxx::new_allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >::destroy(std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >*) at /usr/include/c++/4.8.1/ext/new_allocator.h:133
std::_Rb_tree<std::string, std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >, std::_Select1st<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::_M_destroy_node(std::_Rb_tree_node<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >*) at /usr/include/c++/4.8.1/bits/stl_tree.h:395 (discriminator 1)
std::_Rb_tree<std::string, std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >, std::_Select1st<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::_M_erase_aux(std::_Rb_tree_const_iterator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >) at /usr/include/c++/4.8.1/bits/stl_tree.h:1744
std::_Rb_tree<std::string, std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >, std::_Select1st<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::erase(std::_Rb_tree_const_iterator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >) at /usr/include/c++/4.8.1/bits/stl_tree.h:828
std::_Rb_tree<std::string, std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >, std::_Select1st<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::_M_erase_aux(std::_Rb_tree_const_iterator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::_Rb_tree_const_iterator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >) at /usr/include/c++/4.8.1/bits/stl_tree.h:1756 (discriminator 1)
std::_Rb_tree<std::string, std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >, std::_Select1st<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::erase(std::_Rb_tree_iterator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::_Rb_tree_iterator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >) at /usr/include/c++/4.8.1/bits/stl_tree.h:845
std::_Rb_tree<std::string, std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> >, std::_Select1st<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > >, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::erase(std::string const&) at /usr/include/c++/4.8.1/bits/stl_tree.h:1769
std::map<std::string, boost::shared_ptr<mongo::ReplicaSetMonitor>, std::less<std::string>, std::allocator<std::pair<std::string const, boost::shared_ptr<mongo::ReplicaSetMonitor> > > >::erase(std::string const&) at /usr/include/c++/4.8.1/bits/stl_map.h:726
mongo::ReplicaSetMonitor::_remove_inlock(std::string const&, bool) at /home/leif/git/mongo-vanilla/src/mongo/client/dbclient_rs.cpp:344It seems pretty simple. ReplicaSetMonitor::remove holds the lock
ReplicaSetMonitor mutex and erases a ReplicaSetMonitorPtr from the map,
which calls ~ReplicaSetMonitor. In the destructor, we call
DBConnectionPool::removeHost, which promptly grabs the DBConnectionPool
mutex.