Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-58356

Cannot kill the dataSize operation

    • Fully Compatible
    • ALL
    • Hide

      Testing:

       

      Create a significant amount of data so that you can run the datasize command and you have some time to attempt to kill the operation.

       
      Run the datasize command and quickly find the op to kill (next step)

       MongoDB Enterprise > db.runCommand({ dataSize: "database.collection" })
      { "estimate" : false, "size" : 9393026191, "numObjects" : 3293296, "millis" : 14353, "ok" : 1 }

       

      Find and kill the op (as you see the op does not get killed)

      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.killOp(1133128)
      { "info" : "attempting to kill op", "ok" : 1 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      { "opid" : 1133128 }
      MongoDB Enterprise > db.getSiblingDB("admin").aggregate( [{ $currentOp : { allUsers: true, idleSessions: true } },{$match: {"command.dataSize":  "database.collection"}},{$project:{opid:1}}] )
      MongoDB Enterprise >
       

      From the logs:

      {"t":{"$date":"2021-07-07T11:25:07.476+10:00"},"s":"I",  "c":"COMMAND",  "id":20482,   "ctx":"conn12","msg":"Going to kill op","attr":{"opId":1133128}}
      {"t":{"$date":"2021-07-07T11:25:07.477+10:00"},"s":"I",  "c":"COMMAND",  "id":20884,   "ctx":"conn12","msg":"Killed operation","attr":{"opId":1133128}}        >> This says killed, although the operation kept going
      {"t":{"$date":"2021-07-07T11:25:16.670+10:00"},"s":"W",  "c":"COMMAND",  "id":20525,   "ctx":"conn17","msg":"Failed to gather storage statistics for slow operation","attr":{"opId":1133128,"error":"lock acquire timeout"}}
      {"t":{"$date":"2021-07-07T11:25:16.671+10:00"},"s":"I",  "c":"COMMAND",  "id":51803,   "ctx":"conn17","msg":"Slow query","attr":{"type":"command","ns":"database.collection","appName":"MongoDB Shell","command":{"dataSize":"database.collection","lsid":{"id":{"$uuid":"12825971-559f-4a68-9022-86bc601ca7ac"}},"$db":"test"},"numYields":0,"reslen":91,"locks":{"ReplicationStateTransition":{"acquireCount":{"w":1}},"Global":{"acquireCount":{"r":1}},"Database":{"acquireCount":{"r":1}},"Collection":{"acquireCount":{"r":1}},"Mutex":{"acquireCount":{"r":1}}},"protocol":"op_msg","durationMillis":14353}}.  >> THIS indicate that the operation started around "2021-07-07T11:25:02 more or less"
      {"t":{"$date":"2021-07-07T11:25:25.269+10:00"},"s":"I",  "c":"NETWORK",  "id":22944,   "ctx":"conn17","msg":"Connection ended","attr":{"remote":"127.0.0.1:58530","connectionId":17,"connectionCount":2}}
      

        
       
       
      This is a small test so the difference in time between the operation was attempted to be killed and the operation completed is small. But for customers with a larger data set the only solution is to restart the node.

      Show
      Testing:   Create a significant amount of data so that you can run the datasize command and you have some time to attempt to kill the operation.   Run the datasize command and quickly find the op to kill (next step) MongoDB Enterprise > db.runCommand({ dataSize: "database.collection" }) { "estimate" : false , "size" : 9393026191, "numObjects" : 3293296, "millis" : 14353, "ok" : 1 }   Find and kill the op (as you see the op does not get killed) MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.killOp(1133128) { "info" : "attempting to kill op" , "ok" : 1 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) { "opid" : 1133128 } MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] ) MongoDB Enterprise > From the logs: { "t" :{ "$date" : "2021-07-07T11:25:07.476+10:00" }, "s" : "I" , "c" : "COMMAND" , "id" :20482, "ctx" : "conn12" , "msg" : "Going to kill op" , "attr" :{ "opId" :1133128}} { "t" :{ "$date" : "2021-07-07T11:25:07.477+10:00" }, "s" : "I" , "c" : "COMMAND" , "id" :20884, "ctx" : "conn12" , "msg" : "Killed operation" , "attr" :{ "opId" :1133128}} >> This says killed, although the operation kept going { "t" :{ "$date" : "2021-07-07T11:25:16.670+10:00" }, "s" : "W" , "c" : "COMMAND" , "id" :20525, "ctx" : "conn17" , "msg" : "Failed to gather storage statistics for slow operation" , "attr" :{ "opId" :1133128, "error" : "lock acquire timeout" }} { "t" :{ "$date" : "2021-07-07T11:25:16.671+10:00" }, "s" : "I" , "c" : "COMMAND" , "id" :51803, "ctx" : "conn17" , "msg" : "Slow query" , "attr" :{ "type" : "command" , "ns" : "database.collection" , "appName" : "MongoDB Shell" , "command" :{ "dataSize" : "database.collection" , "lsid" :{ "id" :{ "$uuid" : "12825971-559f-4a68-9022-86bc601ca7ac" }}, "$db" : "test" }, "numYields" :0, "reslen" :91, "locks" :{ "ReplicationStateTransition" :{ "acquireCount" :{ "w" :1}}, "Global" :{ "acquireCount" :{ "r" :1}}, "Database" :{ "acquireCount" :{ "r" :1}}, "Collection" :{ "acquireCount" :{ "r" :1}}, "Mutex" :{ "acquireCount" :{ "r" :1}}}, "protocol" : "op_msg" , "durationMillis" :14353}}. >> THIS indicate that the operation started around "2021-07-07T11:25:02 more or less" { "t" :{ "$date" : "2021-07-07T11:25:25.269+10:00" }, "s" : "I" , "c" : "NETWORK" , "id" :22944, "ctx" : "conn17" , "msg" : "Connection ended" , "attr" :{ "remote" : "127.0.0.1:58530" , "connectionId" :17, "connectionCount" :2}}        This is a small test so the difference in time between the operation was attempted to be killed and the operation completed is small. But for customers with a larger data set the only solution is to restart the node.
    • QE 2021-08-09, QE 2021-08-23, QE 2021-09-20, QE 2021-10-04

       Cannot kill the dataSize operation, see steps below to reproduce.

            Assignee:
            denis.grebennicov@mongodb.com Denis Grebennicov
            Reporter:
            ivan.grigolon@mongodb.com Ivan Grigolon
            Votes:
            2 Vote for this issue
            Watchers:
            14 Start watching this issue

              Created:
              Updated:
              Resolved: