I have a sharded cluster with one shard (replica set of three nodes) and case of restoring data using percona-backup-mongodb.
The restore process was stuck on index creation for two days. I tried to restore gridFS collection of 400 GB in size, and index creation was performed for 3 days and didn't end. I watched adminCommand and thought, that it is becouse of cillection size. There were no information about locks:
rs0:PRIMARY> db.adminCommand(
... {
... currentOp: true,
... $or: [
... \{ op: "command", "command.createIndexes": { $exists: true } },
... \{ op: "none", "msg" : /^Index Build/ }
... ]
... }
... )
{
"inprog" : [
{
"type" : "op",
"host" : "kpak-mongo-gridfs-2:27017",
"desc" : "conn295769",
"connectionId" : 295769,
"client" : "127.0.0.1:59226",
"appName" : "mongorestore",
"clientMetadata" : {
"driver" : {
"name" : "mongo-go-driver",
"version" : "v1.7.0"
},
"os" : {
"type" : "linux",
"architecture" : "amd64"
},
"platform" : "go1.16.9",
"application" : {
"name" : "mongorestore"
}
},
"active" : true,
"currentOpTime" : "2022-07-04T14:44:56.860+03:00",
"effectiveUsers" : [
{
"user" : "pbmuser",
"db" : "admin"
}
],
"opid" : 362857456,
"lsid" : {
"id" : UUID("4aeb2829-b2dd-4bd2-8e4a-ea28c77ed31e"),
"uid" : BinData(0,"8L/kOoqHkvDRIRJTrmrrO3wwOr+ToO8WLvmn15Ql7G0=")
},
"secs_running" : NumberLong(236403),
"microsecs_running" : NumberLong("236403076334"),
"op" : "command",
"ns" : "file-store.fs.chunks",
"command" : {
"createIndexes" : "fs.chunks",
"indexes" : [
{
"key" : {
"files_id" : 1,
"n" : 1
},
"name" : "chunkFileIds",
"ns" : "file-store.fs.chunks"
}
],
"ignoreUnknownIndexOptions" : true,
"lsid" : {
"id" : UUID("4aeb2829-b2dd-4bd2-8e4a-ea28c77ed31e")
},
"$clusterTime" : {
"clusterTime" : Timestamp(1656698693, 534),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"$db" : "file-store",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"numYields" : 0,
"waitingForLatch" : {
"timestamp" : ISODate("2022-07-01T18:04:54.326Z"),
"captureName" : "FutureResolution"
},
"locks" : {
},
"waitingForLock" : false,
"lockStats" : {
"ParallelBatchWriterMode" : {
"acquireCount" : {
"r" : NumberLong(3)
}
},
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(4)
}
},
"Global" : {
"acquireCount" : {
"w" : NumberLong(4)
}
},
"Database" : {
"acquireCount" : {
"w" : NumberLong(3)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(1),
"w" : NumberLong(1),
"W" : NumberLong(1)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(3)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
"acquireCount" : NumberLong(3),
"timeAcquiringMicros" : NumberLong(4)
}
},
{
"type" : "op",
"host" : "kpak-mongo-gridfs-2:27017",
"desc" : "conn295766",
"connectionId" : 295766,
"client" : "127.0.0.1:59214",
"appName" : "mongorestore",
"clientMetadata" : {
"driver" : {
"name" : "mongo-go-driver",
"version" : "v1.7.0"
},
"os" : {
"type" : "linux",
"architecture" : "amd64"
},
"platform" : "go1.16.9",
"application" : {
"name" : "mongorestore"
}
},
"active" : true,
"currentOpTime" : "2022-07-04T14:44:56.860+03:00",
"effectiveUsers" : [
{
"user" : "pbmuser",
"db" : "admin"
}
],
"opid" : 362857455,
"lsid" : {
"id" : UUID("559b3155-422e-453a-9df1-dc158688d89c"),
"uid" : BinData(0,"8L/kOoqHkvDRIRJTrmrrO3wwOr+ToO8WLvmn15Ql7G0=")
},
"secs_running" : NumberLong(236403),
"microsecs_running" : NumberLong("236403076648"),
"op" : "command",
"ns" : "file-store.fs.files",
"command" : {
"createIndexes" : "fs.files",
"indexes" : [
{
"key" : {
"filename" : 1,
"uploadDate" : 1
},
"name" : "fileNames",
"ns" : "file-store.fs.files"
}
],
"ignoreUnknownIndexOptions" : true,
"lsid" : {
"id" : UUID("559b3155-422e-453a-9df1-dc158688d89c")
},
"$clusterTime" : {
"clusterTime" : Timestamp(1656698693, 534),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"$db" : "file-store",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"numYields" : 0,
"waitingForLatch" : {
"timestamp" : ISODate("2022-07-01T18:04:54.206Z"),
"captureName" : "FutureResolution"
},
"locks" : {
},
"waitingForLock" : false,
"lockStats" : {
"ParallelBatchWriterMode" : {
"acquireCount" : {
"r" : NumberLong(3)
}
},
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(4)
}
},
"Global" : {
"acquireCount" : {
"w" : NumberLong(4)
}
},
"Database" : {
"acquireCount" : {
"w" : NumberLong(3)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(1),
"w" : NumberLong(1),
"W" : NumberLong(1)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(3)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
"acquireCount" : NumberLong(3),
"timeAcquiringMicros" : NumberLong(2)
}
}
],
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000003")
},
"lastCommittedOpTime" : Timestamp(1656935091, 1),
"$configServerState" : {
"opTime" : {
"ts" : Timestamp(1656935096, 1),
"t" : NumberLong(1)
}
},
"$clusterTime" : {
"clusterTime" : Timestamp(1656935096, 1),
"signature" : {
"hash" : BinData(0,"9aQA7SE2bdYHsIVCKODI8WjxuCg="),
"keyId" : NumberLong("7073745711275180033")
}
},
"operationTime" : Timestamp(1656935091, 1)
}
Later I found in mongod logs that index creation hung after the phase "Index build: waiting for next action before completing final phase".
All nodes of replica set were alive. Сould you explain what the problem was? I use 4.4 version of mongodb.