|
As of 3.4 we already only write new entries in case of $push to the end of array (i.e. without change of order or size of array}
# $push + $each
|
db.getSiblingDB("test").foo.update({"_id" : ObjectId("59bb8e19d2c59520578f669b")},{$push:{a:{$each:[{range:10}, {range:2000}]}}})
|
|
# Oplog Entry
|
{ "ts" : Timestamp(1505464705, 1), "t" : NumberLong(74), "h" : NumberLong("-2552628992460001834"), "v" : 2, "op" : "u", "ns" : "test.foo", "o2" : { "_id" : ObjectId("59bb8e19d2c59520578f669b") }, "o" : { "$set" : { "a.5" : { "range" : 10 }, "a.6" : { "range" : 2000 } } } }
|
# ----------------
|
|
# just $push
|
db.getSiblingDB("test").foo.update({"_id" : ObjectId("59bb8e19d2c59520578f669b")},{$push:{a:{score:1000}}})
|
|
# Oplot entry
|
{ "ts" : Timestamp(1505464948, 2), "t" : NumberLong(74), "h" : NumberLong("-7585672497113090885"), "v" : 2, "op" : "u", "ns" : "test.foo", "o2" : { "_id" : ObjectId("59bb8e19d2c59520578f669b") }, "o" : { "$set" : { "a.7" : { "score" : 1000 } } } }
|
# --------------------
|
|
# $push + $each + $sort
|
db.getSiblingDB("test").foo.update({"_id" : ObjectId("59bb8e19d2c59520578f669b")},{$push:{a:{$each:[{range:10}],$sort:{range:1}}}})
|
|
# Oplog entry
|
{ "ts" : Timestamp(1505465098, 2), "t" : NumberLong(74), "h" : NumberLong("4916741289234778671"), "v" : 2, "op" : "u", "ns" : "test.foo", "o2" : { "_id" : ObjectId("59bb8e19d2c59520578f669b") }, "o" : { "$set" : { "a" : [ { "score" : 1000 }, { "score" : 1 }, { "score" : 0 }, { "score" : 1000 }, { "range" : 10 }, { "range" : 10 }, { "range" : 10 }, { "range" : 200 }, { "range" : 2000 } ] } } }
|
|
|
We already detect many cases where $push is a noop (and don't write any oplog entry) and when pushing to the end of the array we only write a single new entry into the oplog (or several in case where several values are pushed to the end of the array).
> Oplog entries for $push must duplicate the entire array in order to be idempotent
So this is the case only when the element is pushed into position other than last in the array or when the order of the array is changed (via $sort) or if elements are removed from an array (via $pull or $slice).
|
|
Here's a python snippet to demonstrate the problem:
def testSubdocumentUpdate(each=True)
|
mong = pymongo.MongoClient(MONGO_HOST)
|
db = mong.testDatabase
|
objId = db.metricTest.insert({'viewId':1})
|
|
xPos = ('0.5,'*4)[:-1]
|
yPos = xPos
|
frames = '1,5,10,15'
|
if each:
|
db.metricTest.update({'_id':objId},{'$push':{'mouse':{'$each':[{'x':xPos,'y':yPos,'f':frames}]}}})
|
frames = '20,25,30,35'
|
db.metricTest.update({'_id':objId},{'$push':{'mouse':{'$each':[{'x':xPos,'y':yPos,'f':frames}]}}})
|
else:
|
db.metricTest.update({'_id':objId},{'$push':{'mouse':{'x':xPos,'y':yPos,'f':frames}}})
|
frames = '20,25,30,35'
|
db.metricTest.update({'_id':objId},{'$push':{'mouse':{'x':xPos,'y':yPos,'f':frames}}})
|
docs = mong.local.oplog.rs.find({'o2._id':objId})
|
|
for d in docs:
|
print(d)
|
|
finalDoc = db.metricPerfTest.find({'_id':objId}).next()
|
print(finalDoc)
|
return
|
If each is True then the oplog contains $set with all of the previous subdocuments included. When False then it contains $set <subdoc>.<index> with just the subdocument you've pushed.
If for example you have a document with a large number of subdocuments each subsequent push of multiple subdocuments will result in the entire previous set being included in the oplog. Under heavy load this can take down our replica set entirely.
|