|
Author:
{u'username': u'benjaminmurphy', u'name': u'Benjamin Murphy', u'email': u'benjamin_murphy@me.com'}
Message: SERVER-23922 Lookup correctly serializes an internalized match.
Branch: master
https://github.com/mongodb/mongo/commit/c1814956391a9c37f4f4e6cca93c08f7fcacc85d
|
|
I spent some more time with Ben looking into this issue. The problem is that DocumentSourceLookUp::serializeToArray() serializes the match expression absorbed into the $lookup after it has already descended on the "as" field. For example, we'll descend on the path "x" with the match expression {$match: {"x.b": "b"}} and execute it as {b: {$eq: "b"}} on the "from" collection. We should be serializing it back out {$match: {"x.b": {$eq: "b"}}}, but end up serializing it as {$match: {b: {$eq: "b"}}}. This issue affects debug builds of MongoDB and aggregations using $lookup in a sharded cluster.
Process 38648 stopped
|
* thread #2: tid = 0x1bf89c, 0x0000000100436db0 mongod`mongo::PipelineCommand::run(this=0x00000001030e4258, txn=0x0000000105b17e20, db=0x000000010bfcfa40, cmdObj=0x000000010bfcfa70, options=0, errmsg=0x000000010bfcfa58, result=0x000000010bfcfe68) + 2080 at pipeline_command.cpp:220, stop reason = step over
|
frame #0: 0x0000000100436db0 mongod`mongo::PipelineCommand::run(this=0x00000001030e4258, txn=0x0000000105b17e20, db=0x000000010bfcfa40, cmdObj=0x000000010bfcfa70, options=0, errmsg=0x000000010bfcfa58, result=0x000000010bfcfe68) + 2080 at pipeline_command.cpp:220
|
217 // aggregations rely on this ability. Skipping when inShard because this has
|
218 // already been through the transformation (and this unsets pCtx->inShard).
|
219 parsed = pPipeline->serialize().toBson();
|
-> 220 pPipeline = Pipeline::parseCommand(errmsg, parsed, pCtx);
|
221 verify(pPipeline);
|
222 }
|
223
|
(lldb) p parsed.toString(false, false)
|
(std::__1::string) $2 = "{ aggregate: \"a\", pipeline: [ { $lookup: { from: \"b\", as: \"x\", localField: \"a\", foreignField: \"a\" } }, { $unwind: { path: \"$x\" } }, { $match: { b: { $eq: \"b\" } } } ] }"
|
|
|
I can't reproduce this with builds for any platform other than OS X so far.
|
|
These are the binaries producing test failure for me:
https://s3.amazonaws.com/mciuploads/mongodb-mongo-master/osx-107-debug/046441de98dd72689397a4a06f8a68ef58a4c226/binaries/mongo-mongodb_mongo_master_osx_107_debug_046441de98dd72689397a4a06f8a68ef58a4c226_16_04_25_21_26_45.tgz
edit later Mac OS X binaries also reproduce the error.
|
|
My result with Max's test (I'm on Mac)
test2@local:33599(3.3.5-21-g046441d) > (function() { db.dropDatabase(); assert.writeOK(db.a.insert([ {_id: ObjectId("571ea21117443c201ac21e0c"), a: "a", b: "b1"}, {_id: ObjectId("571ea21717443c201ac21e0d"), a: "a", b: "b2"}, {_id: ObjectId("571ea22117443c201ac21e0e"), a: "a3", b: "b3"}, ])); assert.writeOK(db.b.insert([ {_id: ObjectId("571ea22c17443c201ac21e0f"), a: "a", b: "b"}, {_id: ObjectId("571ea23117443c201ac21e10"), a: "a", b: "c"}, ])); var optimized = db.a.aggregate([ {$lookup: { from: "b", localField: "a", foreignField: "a", as:"x" }}, {$unwind: "$x"}, {$match: {"x.b": "b"}}, ]).toArray(); var nonOptimized = db.a.aggregate([ {$lookup: { from: "b", localField: "a", foreignField: "a", as:"x" }}, {$unwind: "$x"}, {$limit: 1000}, {$match: {"x.b": "b"}}, ]).toArray(); assert.eq(optimized, nonOptimized); })();
|
assert: [[ ]] != [[
|
{
|
"_id" : ObjectId("571ea21117443c201ac21e0c"),
|
"a" : "a",
|
"b" : "b1",
|
"x" : {
|
"_id" : ObjectId("571ea22c17443c201ac21e0f"),
|
"a" : "a",
|
"b" : "b"
|
}
|
},
|
{
|
"_id" : ObjectId("571ea21717443c201ac21e0d"),
|
"a" : "a",
|
"b" : "b2",
|
"x" : {
|
"_id" : ObjectId("571ea22c17443c201ac21e0f"),
|
"a" : "a",
|
"b" : "b"
|
}
|
}
|
]] are not equal : undefined
|
doassert@src/mongo/shell/assert.js:15:14
|
assert.eq@src/mongo/shell/assert.js:51:5
|
@(shell):1:1020
|
@(shell):1:2
|
|
2016-04-26T08:47:31.835-0700 E QUERY [thread1] Error: [[ ]] != [[
|
{
|
"_id" : ObjectId("571ea21117443c201ac21e0c"),
|
"a" : "a",
|
"b" : "b1",
|
"x" : {
|
"_id" : ObjectId("571ea22c17443c201ac21e0f"),
|
"a" : "a",
|
"b" : "b"
|
}
|
},
|
{
|
"_id" : ObjectId("571ea21717443c201ac21e0d"),
|
"a" : "a",
|
"b" : "b2",
|
"x" : {
|
"_id" : ObjectId("571ea22c17443c201ac21e0f"),
|
"a" : "a",
|
"b" : "b"
|
}
|
}
|
]] are not equal : undefined :
|
doassert@src/mongo/shell/assert.js:15:14
|
assert.eq@src/mongo/shell/assert.js:51:5
|
@(shell):1:1020
|
@(shell):1:2
|
|
test2@local:33599(3.3.5-21-g046441d) > db.serverBuildInfo()
|
{
|
"version" : "3.3.5-21-g046441d",
|
"gitVersion" : "046441de98dd72689397a4a06f8a68ef58a4c226",
|
"modules" : [ ],
|
"allocator" : "system",
|
"javascriptEngine" : "mozjs",
|
"sysInfo" : "deprecated",
|
"versionArray" : [
|
3,
|
3,
|
6,
|
-100
|
],
|
"openssl" : {
|
"running" : "disabled",
|
"compiled" : "disabled"
|
},
|
"buildEnvironment" : {
|
"distmod" : "",
|
"distarch" : "x86_64",
|
"cc" : "gcc: Apple LLVM version 7.0.2 (clang-700.1.81)",
|
"ccflags" : "-fno-omit-frame-pointer -fPIC -fno-strict-aliasing -ggdb -pthread -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch -Werror -O2 -Wno-unused-local-typedefs -Wno-unused-function -Wno-unused-private-field -Wno-deprecated-declarations -Wno-tautological-constant-out-of-range-compare -Wno-unused-const-variable -Wno-missing-braces -Wno-inconsistent-missing-override -Wno-potentially-evaluated-expression -mmacosx-version-min=10.7 -fno-builtin-memcmp",
|
"cxx" : "g++: Apple LLVM version 7.0.2 (clang-700.1.81)",
|
"cxxflags" : "-Wnon-virtual-dtor -Woverloaded-virtual -stdlib=libc++ -std=c++11",
|
"linkflags" : "-fPIC -pthread -Wl,-bind_at_load -mmacosx-version-min=10.7 -stdlib=libc++",
|
"target_arch" : "x86_64",
|
"target_os" : "osx"
|
},
|
"bits" : 64,
|
"debug" : true,
|
"maxBsonObjectSize" : 16777216,
|
"storageEngines" : [
|
"devnull",
|
"ephemeralForTest",
|
"mmapv1",
|
"wiredTiger"
|
],
|
"ok" : 1
|
}
|
test2@local:33599(3.3.5-21-g046441d) >
|
|
|
I had no other indexes when it happened.
I did add some indexes to see if that would change behavior but it didn't. I will try to figure out the difference between my reproducer and yours.
|
|
asya, I put together the following test script based on the documents inserted and aggregation pipelines shown above. However, I cannot reproduce the issue you described when running MongoDB version 3.3.5-23-g632dfe3.
Are you able to reliably reproduce this issue, and if so, are there any additional indexes that you have created on either the test.a or test.b collections?
(function() {
|
db.dropDatabase();
|
|
assert.writeOK(db.a.insert([
|
{_id: ObjectId("571ea21117443c201ac21e0c"), a: "a", b: "b1"},
|
{_id: ObjectId("571ea21717443c201ac21e0d"), a: "a", b: "b2"},
|
{_id: ObjectId("571ea22117443c201ac21e0e"), a: "a3", b: "b3"},
|
]));
|
|
assert.writeOK(db.b.insert([
|
{_id: ObjectId("571ea22c17443c201ac21e0f"), a: "a", b: "b"},
|
{_id: ObjectId("571ea23117443c201ac21e10"), a: "a", b: "c"},
|
]));
|
|
var optimized = db.a.aggregate([
|
{$lookup: {
|
from: "b",
|
localField: "a",
|
foreignField: "a",
|
as:"x"
|
}},
|
{$unwind: "$x"},
|
{$match: {"x.b": "b"}},
|
]).toArray();
|
|
var nonOptimized = db.a.aggregate([
|
{$lookup: {
|
from: "b",
|
localField: "a",
|
foreignField: "a",
|
as:"x"
|
}},
|
{$unwind: "$x"},
|
{$limit: 1000},
|
{$match: {"x.b": "b"}},
|
]).toArray();
|
|
assert.eq(optimized, nonOptimized);
|
})();
|
|
"version" : "3.3.5-21-g046441d",
|
"gitVersion" : "046441de98dd72689397a4a06f8a68ef58a4c226",
|
|
> db.a.find()
|
{ "_id" : ObjectId("571ea21117443c201ac21e0c"), "a" : "a", "b" : "b1" }
|
{ "_id" : ObjectId("571ea21717443c201ac21e0d"), "a" : "a", "b" : "b2" }
|
{ "_id" : ObjectId("571ea22117443c201ac21e0e"), "a" : "a3", "b" : "b3" }
|
> db.b.find()
|
{ "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" }
|
{ "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" }
|
> db.a.aggregate({$lookup:{from:"b",localField:"a",foreignField:"a",as:"x"}})
|
{ "_id" : ObjectId("571ea21117443c201ac21e0c"), "a" : "a", "b" : "b1", "x" : [ { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" }, { "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" } ] }
|
{ "_id" : ObjectId("571ea21717443c201ac21e0d"), "a" : "a", "b" : "b2", "x" : [ { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" }, { "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" } ] }
|
{ "_id" : ObjectId("571ea22117443c201ac21e0e"), "a" : "a3", "b" : "b3", "x" : [ ] }
|
> db.a.aggregate({$lookup:{from:"b",localField:"a",foreignField:"a",as:"x"}},{$match:{"x.b":"b"}})
|
{ "_id" : ObjectId("571ea21117443c201ac21e0c"), "a" : "a", "b" : "b1", "x" : [ { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" }, { "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" } ] }
|
{ "_id" : ObjectId("571ea21717443c201ac21e0d"), "a" : "a", "b" : "b2", "x" : [ { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" }, { "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" } ] }
|
> db.a.aggregate({$lookup:{from:"b",localField:"a",foreignField:"a",as:"x"}},{$match:{"x.b":"b"}},{$unwind:"$x"})
|
{ "_id" : ObjectId("571ea21117443c201ac21e0c"), "a" : "a", "b" : "b1", "x" : { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" } }
|
{ "_id" : ObjectId("571ea21117443c201ac21e0c"), "a" : "a", "b" : "b1", "x" : { "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" } }
|
{ "_id" : ObjectId("571ea21717443c201ac21e0d"), "a" : "a", "b" : "b2", "x" : { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" } }
|
{ "_id" : ObjectId("571ea21717443c201ac21e0d"), "a" : "a", "b" : "b2", "x" : { "_id" : ObjectId("571ea23117443c201ac21e10"), "a" : "a", "b" : "c" } }
|
> db.a.aggregate({$lookup:{from:"b",localField:"a",foreignField:"a",as:"x"}},{$unwind:"$x"},{$match:{"x.b":"b"}})
|
// no results! add $limit to block optimization of $match
|
> db.a.aggregate({$lookup:{from:"b",localField:"a",foreignField:"a",as:"x"}},{$unwind:"$x"},{$limit:1000},{$match:{"x.b":"b"}})
|
{ "_id" : ObjectId("571ea21117443c201ac21e0c"), "a" : "a", "b" : "b1", "x" : { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" } }
|
{ "_id" : ObjectId("571ea21717443c201ac21e0d"), "a" : "a", "b" : "b2", "x" : { "_id" : ObjectId("571ea22c17443c201ac21e0f"), "a" : "a", "b" : "b" } }
|
Increasing debugging makes it seem like the extra $match condition may be applied to source collection ("a" rather than "b" but it's hard to tell).
Explain for problematic query
db.a.aggregate([{$lookup:{from:"b",localField:"a",foreignField:"a",as:"x"}},{$unwind:"$x"},{$match:{"x.b":"b"}}],{explain:true})
|
{
|
"waitedMS" : NumberLong(0),
|
"stages" : [
|
{
|
"$cursor" : {
|
"query" : {
|
|
},
|
"queryPlanner" : {
|
"plannerVersion" : 1,
|
"namespace" : "test.a",
|
"indexFilterSet" : false,
|
"parsedQuery" : {
|
|
},
|
"winningPlan" : {
|
"stage" : "COLLSCAN",
|
"direction" : "forward"
|
},
|
"rejectedPlans" : [ ]
|
}
|
}
|
},
|
{
|
"$lookup" : {
|
"from" : "b",
|
"as" : "x",
|
"localField" : "a",
|
"foreignField" : "a",
|
"unwinding" : {
|
"preserveNullAndEmptyArrays" : false
|
},
|
"matching" : {
|
"b" : {
|
"$eq" : "b"
|
}
|
}
|
}
|
}
|
],
|
"ok" : 1
|
}
|
Logs show:
Canonical query:
|
ns=test.aTree: b == "b"
|
Sort: {}
|
Proj: {}
|
=============================
|
2016-04-25T16:12:50.793-0700 D QUERY [conn1] Index 0 is kp: { _id: 1 } unique name: '_id_' io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.a" }
|
2016-04-25T16:12:50.793-0700 D QUERY [conn1] Predicate over field 'b'
|
2016-04-25T16:12:50.793-0700 D QUERY [conn1] Rated tree:
|
b == "b" || First: notFirst: full path: b
|
2016-04-25T16:12:50.793-0700 D QUERY [conn1] Planner: outputted 0 indexed solutions.
|
2016-04-25T16:12:50.793-0700 D QUERY [conn1] Planner: outputting a collscan:
|
COLLSCAN
|
---ns = test.a
|
---filter = b == "b"
|
---fetched = 1
|
---sortedByDiskLoc = 0
|
---getSort = []
|
2016-04-25T16:12:50.793-0700 D QUERY [conn1] Only one plan is available; it will be run but will not be cached. query: { b: { $eq: "b" } } sort: {} projection: {}, planSummary: COLLSCAN
|
which seems to be applying the condition to the wrong collection (a instead of b)
|
Generated at Thu Feb 08 04:04:50 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.