|
The Python team has a JSON library (https://github.com/mongodb-labs/python-bsonjs) built on top of libbson for better performance and our benchmarks show that it’s about 10x faster than PyMongo’s built in JSON encoder. However, a user just reported and I’ve reproduced (PYTHON-3395) that on documents comprised of large string fields, PyMongo performs 4x faster than libbson for both encoding and decoding.
For reference here's an example:
"""Benchmark bsonjs (libbson) vs bson.json_util (pymongo)."""
|
import timeit
|
import bsonjs
|
import bson
|
from bson import json_util
|
|
doc = {
|
'_id': bson.ObjectId(),
|
'string': 's'*20000
|
}
|
b = bson.encode(doc)
|
j = json_util.dumps(doc)
|
|
def time(fn, iterations=25):
|
print('Timing: ' + fn.__name__)
|
best = min(timeit.Timer(fn).repeat(5, number=iterations))
|
print('{0} loops, best of 5: {1}'.format(iterations, best))
|
return best
|
|
def compare(bsonjs_stmt, json_util_stmt):
|
bsonjs_secs = time(bsonjs_stmt)
|
json_util_secs = time(json_util_stmt)
|
print('bsonjs is {0:.2f}x faster than json_util\n'.format(
|
json_util_secs/bsonjs_secs))
|
|
def dumps_bsonjs():
|
bsonjs.dumps(b)
|
|
def dumps_json_util():
|
json_util.dumps(bson.decode(b))
|
|
def loads_bsonjs():
|
bsonjs.loads(j)
|
|
def loads_json_util():
|
bson.encode(json_util.loads(j))
|
|
def main():
|
compare(dumps_bsonjs, dumps_json_util)
|
compare(loads_bsonjs, loads_json_util)
|
|
if __name__ == "__main__":
|
main()
|
And the output:
$ python3.10 benchmark_str_perf.py
|
Timing: dumps_bsonjs
|
25 loops, best of 5: 0.00783308400423266
|
Timing: dumps_json_util
|
25 loops, best of 5: 0.002030832998570986
|
bsonjs is 0.26x faster than json_util
|
|
Timing: loads_bsonjs
|
25 loops, best of 5: 0.001949673009221442
|
Timing: loads_json_util
|
25 loops, best of 5: 0.000629648013273254
|
bsonjs is 0.32x faster than json_util
|
Removing the large string from the document yields the expected perf improvement:
doc = {
|
'_id': bson.ObjectId(),
|
'string': 's'*10,
|
'foo': [1, 2],
|
'bar': {'hello': 'world'},
|
'date': datetime.datetime(2009, 12, 9, 15),
|
}
|
...
|
$ python3.10 benchmark_str_perf.py
|
Timing: dumps_bsonjs
|
25 loops, best of 5: 0.00018512399401515722
|
Timing: dumps_json_util
|
25 loops, best of 5: 0.001294998000958003
|
bsonjs is 7.00x faster than json_util
|
|
Timing: loads_bsonjs
|
25 loops, best of 5: 0.00016003800556063652
|
Timing: loads_json_util
|
25 loops, best of 5: 0.0011928190069738775
|
bsonjs is 7.45x faster than json_util
|
CC: colby.pike@mongodb.com
|