Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-8787

mongos crashing when running splits + inserts on collection with hashed shard key

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: Sharding
    • Labels:
      None
    • ALL

      test below. FYI the "2.4h" in the versioniterator used in the test just refers to the current build from master.

      var options = {
          mongosOptions : {binVersion:MongoRunner.versionIterator(["2.4h","2.4h"]),
                              logpath:"/tmp/stest"},
          configOptions:{binVersion:"2.4h", logpath:"/tmp/stest-config"},
          shardOptions:{binVersion:"2.4h", logpath:"/tmp/sttest-shards"},
          //rsOptions:{binVersion:allversions, logpath:"/tmp/sttest-rs"},
          rsOptions:{binVersion:MongoRunner.versionIterator(["2.4h","2.4h"]), logpath:"/tmp/sttest-rs", nopreallocj:""},
          separateConfig:true,
          sync:true,
          rs:true
      }
      
      var st = new ShardingTest({shards:2, mongos:2, other:options})
      shards = st.s0.getDB("config").shards.find().toArray();
      
      var new_mongos = MongoRunner.runMongos({binVersion:"2.4", upgrade:"", configdb:st._configDB, port:MongoRunner.nextOpenPort()})
      
      coll = new_mongos.getDB("foo").bar
      admin = new_mongos.getDB("admin")
      var confdb = new_mongos.getDB("config")
      coll.createIndex({_id:"hashed"})
      printjson(admin.runCommand({ enableSharding : coll.getDB() + "" }));
      printjson(admin.runCommand({ shardCollection : coll + "", key : { _id : "hashed" } }));
      printjson(admin.runCommand({ split : coll + "", middle : { _id : 0 } }));    
      printjson(admin.runCommand({ split : coll + "", middle : { _id : 0 } }));    
      printjson(admin.runCommand({ split : coll + "", middle : { _id : 300 } }));
      for(var i=1;i<1000;i++){
          coll.insert({x:i})
      }
      

      this is crashing mongos every time for me. In the logs I get:

      m27000| Received signal 10
      m27000| Backtrace: 0x10040045b 0x7fff83f328ea 0x100b4fd40 0x10025f267 0x100260ae3 0x100244125 0x100249168 0x1002ccb7b 0x10023721c 0x10024ae18 0x100280ba4 0x10031c34f 0x100299b20 0x100348dd0 0x10031ac16 0x10001207d 0x1003ef21b 0x10044f603 0x7fff83f44742 0x7fff83f31181
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| WARNING: mongod wrote null bytes to output
      m27000| 0   mongos                              0x000000010040045b _ZN5mongo17printStackAndExitEi + 1231   libsystem_c.dylib                   0x00007fff83f328ea _sigtramp + 262   mongos                              0x0000000100b4fd40 _ZN5mongo9ChunkType8ConfigNSE + 03   mongos                              0x000000010025f267 _ZN5mongo17ConfigDiffTrackerIN5boost10shared_ptrIKNS_5ChunkEEENS_5ShardEE19calculateConfigDiffERNS_23DBClientCursorInterfaceE + 25034   mongos                              0x0000000100260ae3 _ZN5mongo17ConfigDiffTrackerIN5boost10shared_ptrIKNS_5ChunkEEENS_5ShardEE19calculateConfigDiffESsRKSt3setINS_12ChunkVersionESt4lessIS9_ESaIS9_EE + 2915   mongos                              0x0000000100244125 _ZN5mongo12ChunkManager5_loadERKSsRSt3mapINS_7BSONObjEN5boost10shared_ptrIKNS_5ChunkEEENS_10BSONObjCmpESaISt4pairIKS4_S9_EEERSt3setINS_5ShardESt4lessISI_ESaISI_EERS3_ISI_NS_12ChunkVersionESK_SaISB_IKSI_SO_EEENS6_IKS0_EE + 5336   mongos                              0x0000000100249168 _ZN5mongo12ChunkManager18loadExistingRangesERKSs + 4087   mongos                              0x00000001002ccb7b _ZN5mongo8DBConfig15getChunkManagerERKSsbb + 12118   mongos                              0x000000010023721c _ZNK5mongo12ChunkManager6reloadEb + 1569   mongos                              0x000000010024ae18 _ZNK5mongo5Chunk13moveAndCommitERKNS_5ShardExbbRNS_7BSONObjE + 434410  mongos                              0x0000000100280ba4 _ZN5mongo11dbgrid_cmds12MoveChunkCmd3runERKSsRNS_7BSONObjEiRSsRNS_14BSONObjBuilderEb + 474011  mongos                              0x000000010031c34f _ZN5mongo7Command22execCommandClientBasicEPS0_RNS_11ClientBasicEiPKcRNS_7BSONObjERNS_14BSONObjBuilderEb + 91112  mongos                              0x0000000100299b20 _ZN5mongo7Command20runAgainstRegisteredEPKcRNS_7BSONObjERNS_14BSONObjBuilderEi + 56013  mongos                              0x0000000100348dd0 _ZN5mongo14SingleStrategy7queryOpERNS_7RequestE + 313614  mongos                              0x000000010031ac16 _ZN5mongo7Request7processEi + 53415  mongos                              0x000000010001207d _ZN5mongo21ShardedMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortEPNS_9LastErrorE + 10916  mongos                              0x00000001003ef21b _ZN5mongo17PortMessageServer17handleIncomingMsgEPv + 111517  mongos                              0x000000010044f603 thread_proxy + 16318  libsystem_c.dylib                   0x00007fff83f44742 _pthread_start + 32719  libsystem_c.dylib                   0x00007fff83f31181 thread_start + 13===
      

      Then when trying to restart the mongos again (after the crash) I get this msg repeatedly:

      m27001| Thu Feb 28 15:32:33.832 [Balancer] distributed lock 'balancer/mikes-MacBook-Pro.local:27001:1362083432:16807' acquired, ts : 512fbee119c72217764dd190
      m27001| Thu Feb 28 15:32:33.902 [Balancer] warning: inconsistent chunks found when reloading foo.bar, previous version was 2|8||512fbca70058b6b230e713b3, this should be rare
      m27001| Thu Feb 28 15:32:33.902 [Balancer] warning: ChunkManager loaded an invalid config for foo.bar, trying again
      m27001| Thu Feb 28 15:32:33.914 [Balancer] warning: inconsistent chunks found when reloading foo.bar, previous version was 2|8||512fbca70058b6b230e713b3, this should be rare
      m27001| Thu Feb 28 15:32:33.914 [Balancer] warning: ChunkManager loaded an invalid config for foo.bar, trying again
      m27001| Thu Feb 28 15:32:33.935 [Balancer] warning: inconsistent chunks found when reloading foo.bar, previous version was 2|8||512fbca70058b6b230e713b3, this should be rare
      m27001| Thu Feb 28 15:32:33.935 [Balancer] warning: ChunkManager loaded an invalid config for foo.bar, trying again
      m27001| Thu Feb 28 15:32:33.966 [Balancer] Assertion: 13282:Couldn't load a valid config for foo.bar after 3 attempts. Please try again.
      m27001| 0x100401a9b 0x1003c2b39 0x1003c301c 0x1002492c7 0x1002c8f10 0x1002c9503 0x1002cbb4b 0x1002cc60a 0x1002fcb78 0x1002267ec 0x100228e2a 0x1003c5144 0x1003c7517 0x10044f603 0x7fff83f44742 0x7fff83f31181
      m27001|  0   mongos                              0x0000000100401a9b _ZN5mongo15printStackTraceERSo + 43
      m27001|  1   mongos                              0x00000001003c2b39 _ZN5mongo11msgassertedEiPKc + 217
      m27001|  2   mongos                              0x00000001003c301c _ZN5mongo11msgassertedEiRKSs + 12
      m27001|  3   mongos                              0x00000001002492c7 _ZN5mongo12ChunkManager18loadExistingRangesERKSs + 759
      m27001|  4   mongos                              0x00000001002c8f10 _ZN5mongo8DBConfig14CollectionInfo5shardEPNS_12ChunkManagerE + 80
      m27001|  5   mongos                              0x00000001002c9503 _ZN5mongo8DBConfig14CollectionInfoC2ERKNS_7BSONObjE + 435
      m27001|  6   mongos                              0x00000001002cbb4b _ZN5mongo8DBConfig5_loadEv + 2379
      m27001|  7   mongos                              0x00000001002cc60a _ZN5mongo8DBConfig4loadEv + 42
      m27001|  8   mongos                              0x00000001002fcb78 _ZN5mongo4Grid11getDBConfigESsbRKSs + 1336
      m27001|  9   mongos                              0x00000001002267ec _ZN5mongo8Balancer15_doBalanceRoundERNS_12DBClientBaseEPSt6vectorIN5boost10shared_ptrINS_11MigrateInfoEEESaIS7_EE + 16604
      m27001|  10  mongos                              0x0000000100228e2a _ZN5mongo8Balancer3runEv + 4394
      m27001|  11  mongos                              0x00000001003c5144 _ZN5mongo13BackgroundJob7jobBodyEN5boost10shared_ptrINS0_9JobStatusEEE + 292
      m27001|  12  mongos                              0x00000001003c7517 _ZN5boost6detail11thread_dataINS_3_bi6bind_tIvNS_4_mfi3mf1IvN5mongo13BackgroundJobENS_10shared_ptrINS7_9JobStatusEEEEENS2_5list2INS2_5valueIPS7_EENSD_ISA_EEEEEEE3runEv + 119
      m27001|  13  mongos                              0x000000010044f603 thread_proxy + 163
      m27001|  14  libsystem_c.dylib                   0x00007fff83f44742 _pthread_start + 327
      m27001|  15  libsystem_c.dylib                   0x00007fff83f31181 thread_start + 13
      
      

            Assignee:
            greg_10gen Greg Studer
            Reporter:
            mikeo@mongodb.com Michael O'Brien
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

              Created:
              Updated:
              Resolved: