Post-$lookup $match arrayness is not checked

    • Type: Bug
    • Resolution: Unresolved
    • Priority: Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • Query Optimization
    • ALL
    • Hide
      /**
       * Repro for a join-optimization tassert on a post-$lookup $match.
       *
       * The $lookup join keys are scalar, but the later $match compares dotted paths that traverse
       * arrays in the joined documents. Join optimization adds that $match equality to the join graph
       * without rechecking path arrayness, so CE treats the paths as scalar.
       *
       * @tags: [
       *   requires_fcv_90,
       *   requires_sbe,
       * ]
       */
      
      TestData.cleanUpCoreDumpsFromExpectedCrash = true;
      const conn = MongoRunner.runMongod({
          setParameter: {
              featureFlagPathArrayness: true,
              internalEnableJoinOptimization: true,
              internalEnablePathArrayness: true,
              internalJoinReorderMode: "bottomUp",
              internalJoinMethod: "HJ",
          },
      });
      assert(conn);
      const testDB = conn.getDB(jsTestName());
      const base = testDB.base;
      const left = testDB.left;
      const right = testDB.right;
      assert.commandWorked(base.insertOne({_id: 0, lk: 1, rk: 1}));
      assert.commandWorked(left.insertOne({_id: "left", lk: 1, arr: [{v: 1}]}));
      assert.commandWorked(right.insertOne({_id: "right", rk: 1, arr: [{v: 1}]}));
      
      // Indexes are required so the path-arrayness API can prove the lookup join keys are scalar.
      assert.commandWorked(base.createIndex({lk: 1, rk: 1}));
      assert.commandWorked(left.createIndex({lk: 1}));
      assert.commandWorked(right.createIndex({rk: 1}));
      const pipeline = [
          {$lookup: {from: left.getName(), localField: "lk", foreignField: "lk", as: "left"}},
          {$unwind: "$left"},
          {$lookup: {from: right.getName(), localField: "rk", foreignField: "rk", as: "right"}},
          {$unwind: "$right"},
          {$match: {$expr: {$eq: ["$left.arr.v", "$right.arr.v"]}}},
          {$project: {_id: 0, "left._id": 1, "right._id": 1}},
      ];
      
      // Sanity check the naive plan.
      assert.commandWorked(testDB.adminCommand({setParameter: 1, internalEnableJoinOptimization: false}));
      const naive = base.aggregate(pipeline).toArray();
      assert.eq(
          [{left: {_id: "left"}, right: {_id: "right"}}],
          naive,
          "Naive $lookup should match the two dotted paths through arrays",
      );
      
      assert.commandWorked(testDB.adminCommand({setParameter: 1, internalEnableJoinOptimization: true}));
      const cmdRes = testDB.runCommand({aggregate: base.getName(), pipeline, cursor: {}});
      
      // WILL FAIL: 11158502 "Encountered unexpected array in NDV computation"
      assert.commandWorked(cmdRes);
      
      MongoRunner.stopMongod(conn, null, {allowedExitCode: MongoRunner.EXIT_ABORT});
       
      Show
      /**  * Repro for a join-optimization tassert on a post-$lookup $match.  *  * The $lookup join keys are scalar, but the later $match compares dotted paths that traverse  * arrays in the joined documents. Join optimization adds that $match equality to the join graph  * without rechecking path arrayness, so CE treats the paths as scalar.  *  * @tags: [  *   requires_fcv_90,  *   requires_sbe,  * ]  */ TestData.cleanUpCoreDumpsFromExpectedCrash = true ; const conn = MongoRunner.runMongod({     setParameter: {         featureFlagPathArrayness: true ,         internalEnableJoinOptimization: true ,         internalEnablePathArrayness: true ,         internalJoinReorderMode: "bottomUp" ,         internalJoinMethod: "HJ" ,     }, }); assert (conn); const testDB = conn.getDB(jsTestName()); const base = testDB.base; const left = testDB.left; const right = testDB.right; assert .commandWorked(base.insertOne({_id: 0, lk: 1, rk: 1})); assert .commandWorked(left.insertOne({_id: "left" , lk: 1, arr: [{v: 1}]})); assert .commandWorked(right.insertOne({_id: "right" , rk: 1, arr: [{v: 1}]})); // Indexes are required so the path-arrayness API can prove the lookup join keys are scalar. assert .commandWorked(base.createIndex({lk: 1, rk: 1})); assert .commandWorked(left.createIndex({lk: 1})); assert .commandWorked(right.createIndex({rk: 1})); const pipeline = [     {$lookup: {from: left.getName(), localField: "lk" , foreignField: "lk" , as: "left" }},     {$unwind: "$left" },     {$lookup: {from: right.getName(), localField: "rk" , foreignField: "rk" , as: "right" }},     {$unwind: "$right" },     {$match: {$expr: {$eq: [ "$left.arr.v" , "$right.arr.v" ]}}},     {$project: {_id: 0, "left._id" : 1, "right._id" : 1}}, ]; // Sanity check the naive plan. assert .commandWorked(testDB.adminCommand({setParameter: 1, internalEnableJoinOptimization: false })); const naive = base.aggregate(pipeline).toArray(); assert .eq(     [{left: {_id: "left" }, right: {_id: "right" }}],     naive,     "Naive $lookup should match the two dotted paths through arrays" , ); assert .commandWorked(testDB.adminCommand({setParameter: 1, internalEnableJoinOptimization: true })); const cmdRes = testDB.runCommand({aggregate: base.getName(), pipeline, cursor: {}}); // WILL FAIL: 11158502 "Encountered unexpected array in NDV computation" assert .commandWorked(cmdRes); MongoRunner.stopMongod(conn, null , {allowedExitCode: MongoRunner.EXIT_ABORT});
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      Join optimization can incorrectly treat dotted paths as scalar after they have been added to the join graph from a post-$lookup $match.

            Assignee:
            Unassigned
            Reporter:
            Max Verbinnen
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

              Created:
              Updated: