(Not sure whether this is a bug or improvement request)
GridFSBucket.download_to_stream is very slow after updating to pymongo 4.x version. Apparently caused by PYTHON-2824.
Issue repeated on macOS and Linux, mongodb 4.4, 5.x and 6.x.
The slowdown is especially bad on non-default chunk sizes.
Test script:
import time
import random
import io
import pymongo
import gridfs
def tst(client, name, chunk_size_kb, data):
bucket = gridfs.GridFSBucket(client[name], name, chunk_size_bytes=1024*chunk_size_kb)
id_ = bucket.upload_from_stream(name, data)
buf = io.BytesIO()
ts = time.monotonic()
bucket.download_to_stream_by_name(name, buf)
duration = time.monotonic() - ts
bucket.delete(id_)
return duration
def main():
client = pymongo.MongoClient()
{{ print(f"
{pymongo.{}version{}=}")}}
for object_size_mb in (1, 5, 20, 80):
data = random.randbytes(object_size_mb*1024*1024)
for chunk_size_kb in (128, 256, 512, 1024, 8192):
duration = tst(client, "bucket-name", chunk_size_kb, data)
print(f"{object_size_mb=:2}, {chunk_size_kb=:4}, {duration:.3f}s")
if {}name{} == "{}main{}":
main()
Results with pymongo 3.12.1:
{{client.server_info()['version']='4.4.17' pymongo.{}version{}='3.12.1'
object_size_mb= 1, chunk_size_kb= 128, 0.003s
object_size_mb= 1, chunk_size_kb= 256, 0.002s
object_size_mb= 1, chunk_size_kb= 512, 0.003s
object_size_mb= 1, chunk_size_kb=1024, 0.002s
object_size_mb= 1, chunk_size_kb=8192, 0.002s
object_size_mb= 5, chunk_size_kb= 128, 0.014s
object_size_mb= 5, chunk_size_kb= 256, 0.007s
object_size_mb= 5, chunk_size_kb= 512, 0.006s
object_size_mb= 5, chunk_size_kb=1024, 0.006s
object_size_mb= 5, chunk_size_kb=8192, 0.012s
object_size_mb=20, chunk_size_kb= 128, 0.050s
object_size_mb=20, chunk_size_kb= 256, 0.021s
object_size_mb=20, chunk_size_kb= 512, 0.029s
object_size_mb=20, chunk_size_kb=1024, 0.021s
object_size_mb=20, chunk_size_kb=8192, 0.048s
object_size_mb=80, chunk_size_kb= 128, 0.181s
object_size_mb=80, chunk_size_kb= 256, 0.216s
object_size_mb=80, chunk_size_kb= 512, 0.235s
object_size_mb=80, chunk_size_kb=1024, 0.232s
object_size_mb=80, chunk_size_kb=8192, 0.292s}}
Results with pymongo 4.3.2:
client.server_info()['version']='4.4.17' pymongo._version_='4.3.2'
object_size_mb= 1, chunk_size_kb= 128, 0.032s
object_size_mb= 1, chunk_size_kb= 256, 0.048s
object_size_mb= 1, chunk_size_kb= 512, 0.106s
object_size_mb= 1, chunk_size_kb=1024, 0.284s
object_size_mb= 1, chunk_size_kb=8192, 0.265s
object_size_mb= 5, chunk_size_kb= 128, 0.157s
object_size_mb= 5, chunk_size_kb= 256, 0.226s
object_size_mb= 5, chunk_size_kb= 512, 0.477s
object_size_mb= 5, chunk_size_kb=1024, 1.313s
object_size_mb= 5, chunk_size_kb=8192, 11.322s
object_size_mb=20, chunk_size_kb= 128, 0.582s
object_size_mb=20, chunk_size_kb= 256, 0.880s
object_size_mb=20, chunk_size_kb= 512, 1.876s
object_size_mb=20, chunk_size_kb=1024, 5.100s
object_size_mb=20, chunk_size_kb=8192, 69.239s
object_size_mb=80, chunk_size_kb= 128, 2.484s
object_size_mb=80, chunk_size_kb= 256, 3.623s
object_size_mb=80, chunk_size_kb= 512, 8.359s
object_size_mb=80, chunk_size_kb=1024, 20.651s
object_size_mb=80, chunk_size_kb=8192, 304.091s
Results with pymongo 4.3.2:
- is related to
-
PYTHON-3508 GridOut.readline and GridOut.read are slow with large chunk sizes
- Closed