[CSHARP-4600] LINQ3: slower performance with generated join query Created: 05/Apr/23  Updated: 21/Apr/23

Status: Backlog
Project: C# Driver
Component/s: LINQ3
Affects Version/s: 2.19.1
Fix Version/s: None

Type: Bug Priority: Unknown
Reporter: Brandon N/A Assignee: Unassigned
Resolution: Unresolved Votes: 0
Labels: triage
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified


 Description   

Summary

For LINQ3 the generated aggregation for one of the queries we have using joins results in a slow query. The query is just searching for a document and including some related data (might make more sense after looking at the example).

Please provide the version of the driver. If applicable, please provide the MongoDB server version and topology (standalone, replica set, or sharded cluster).

Driver 2.19.1

How to Reproduce

Example app:

using System.Diagnostics;
using MongoDB.Driver;
using MongoDB.Driver.Linq;
 
var Query = async (LinqProvider provider) =>
{
    var connectionstring = "mongodb://docker:mongopw@localhost:49153";
    var settings = MongoClientSettings.FromConnectionString(connectionstring);
    settings.LinqProvider = provider;
    var client = new MongoClient(settings);
    var db = client.GetDatabase("db1");
 
    await db.DropCollectionAsync("collectionA");
    await db.DropCollectionAsync("collectionB");
    await db.DropCollectionAsync("collectionC");
 
    var collectionA = db.GetCollection<ClassA>("collectionA");
    var collectionB = db.GetCollection<ClassB>("collectionB");
    var collectionC = db.GetCollection<ClassC>("collectionC");
 
    var numItems = 5000;
    var Adocuments = Enumerable.Range(0, numItems).Select((index) => new ClassA { Id = $"A_{index}" }).ToList();
    var Bdocuments = Enumerable.Range(0, numItems).Select((index) => new ClassB { Id = $"B_{index}", ClassAId = Adocuments[index].Id }).ToList();
    var Cdocuments = Enumerable.Range(0, numItems).Select((index) => new ClassC { Id = $"C_{index}", ClassBId = Bdocuments[index].Id }).ToList();
 
    await collectionA.InsertManyAsync(Adocuments);
    await collectionB.InsertManyAsync(Bdocuments);
    await collectionC.InsertManyAsync(Cdocuments);
 
    var id = Adocuments[0].Id;
 
    var query = from a in collectionA.AsQueryable()
           join b in collectionB.AsQueryable() on a.Id equals b.ClassAId
           join c in collectionC.AsQueryable() on b.Id equals c.ClassBId
           where a.Id == id
           select new { a.Id };
 
    Console.WriteLine(query);
 
    var stopwatch = new Stopwatch();
    stopwatch.Start();
 
    await query.SingleAsync();
 
    stopwatch.Stop();
    Console.WriteLine(stopwatch.Elapsed);
};
 
await Query(LinqProvider.V2);
await Query(LinqProvider.V3);
 
class ClassA
{
    public string Id { get; set; }
}
 
class ClassB
{
    public string Id { get; set; }
    public string ClassAId { get; set; }
}
 
class ClassC
{
    public string Id { get; set; }
    public string ClassBId { get; set; }
}

 

Additional Background

LINQ2 Aggregation:

[
  {
    "$lookup": {
      "from": "collectionB",
      "localField": "_id",
      "foreignField": "ClassAId",
      "as": "b"
    }
  },
  { "$unwind": "$b" },
  {
    "$lookup": {
      "from": "collectionC",
      "localField": "b._id",
      "foreignField": "ClassBId",
      "as": "c"
    }
  },
  { "$unwind": "$c" },
  { "$match": { "_id": "A_0" } },
  { "$project": { "Id": "$_id", "_id": 0 } }
]

LINQ3 Aggregation:

[
  { "$project": { "_outer": "$$ROOT", "_id": 0 } },
  {
    "$lookup": {
      "from": "collectionB",
      "localField": "_outer._id",
      "foreignField": "ClassAId",
      "as": "_inner"
    }
  },
  { "$unwind": "$_inner" },
  { "$project": { "a": "$_outer", "b": "$_inner", "_id": 0 } },
  { "$project": { "_outer": "$$ROOT", "_id": 0 } },
  {
    "$lookup": {
      "from": "collectionC",
      "localField": "_outer.b._id",
      "foreignField": "ClassBId",
      "as": "_inner"
    }
  },
  { "$unwind": "$_inner" },
  {
    "$project": {
      "<>h__TransparentIdentifier0": "$_outer",
      "c": "$_inner",
      "_id": 0
    }
  },
  { "$match": { "<>h__TransparentIdentifier0.a._id": "A_0" } },
  { "$project": { "Id": "$<>h__TransparentIdentifier0.a._id", "_id": 0 } }
]

LINQ2 explain

{
  "explainVersion": "1",
  "stages": [
    {
      "$cursor": {
        "queryPlanner": {
          "namespace": "db1.collectionA",
          "indexFilterSet": false,
          "parsedQuery": {
            "_id": {
              "$eq": "A_0"
            }
          },
          "queryHash": "64D746CC",
          "planCacheKey": "77980A9F",
          "maxIndexedOrSolutionsReached": false,
          "maxIndexedAndSolutionsReached": false,
          "maxScansToExplodeReached": false,
          "winningPlan": {
            "stage": "PROJECTION_DEFAULT",
            "transformBy": {
              "_id": 1,
              "b._id": 1
            },
            "inputStage": {
              "stage": "IDHACK"
            }
          },
          "rejectedPlans": []
        }
      }
    },
    {
      "$lookup": {
        "from": "collectionB",
        "as": "b",
        "localField": "_id",
        "foreignField": "ClassAId",
        "unwinding": {
          "preserveNullAndEmptyArrays": false
        }
      }
    },
    {
      "$lookup": {
        "from": "collectionC",
        "as": "c",
        "localField": "b._id",
        "foreignField": "ClassBId",
        "unwinding": {
          "preserveNullAndEmptyArrays": false
        }
      }
    },
    {
      "$project": {
        "Id": "$_id",
        "_id": false
      }
    }
  ],
  "serverInfo": {
    "host": "1b4a207c02de",
    "port": 27017,
    "version": "6.0.5",
    "gitVersion": "c9a99c120371d4d4c52cbb15dac34a36ce8d3b1d"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
  },
  "command": {
    "aggregate": "collectionA",
    "pipeline": [
      {
        "$lookup": {
          "from": "collectionB",
          "localField": "_id",
          "foreignField": "ClassAId",
          "as": "b"
        }
      },
      {
        "$unwind": "$b"
      },
      {
        "$lookup": {
          "from": "collectionC",
          "localField": "b._id",
          "foreignField": "ClassBId",
          "as": "c"
        }
      },
      {
        "$unwind": "$c"
      },
      {
        "$match": {
          "_id": "A_0"
        }
      },
      {
        "$project": {
          "Id": "$_id",
          "_id": 0
        }
      }
    ],
    "cursor": {},
    "$db": "db1"
  },
  "ok": 1
}

LINQ3 explain:

{
  "explainVersion": "1",
  "stages": [
    {
      "$cursor": {
        "queryPlanner": {
          "namespace": "db1.collectionA",
          "indexFilterSet": false,
          "parsedQuery": {},
          "queryHash": "17830885",
          "planCacheKey": "17830885",
          "maxIndexedOrSolutionsReached": false,
          "maxIndexedAndSolutionsReached": false,
          "maxScansToExplodeReached": false,
          "winningPlan": {
            "stage": "PROJECTION_DEFAULT",
            "transformBy": {
              "_outer": "$$ROOT",
              "_id": false
            },
            "inputStage": {
              "stage": "COLLSCAN",
              "direction": "forward"
            }
          },
          "rejectedPlans": []
        }
      }
    },
    {
      "$lookup": {
        "from": "collectionB",
        "as": "_inner",
        "localField": "_outer._id",
        "foreignField": "ClassAId",
        "unwinding": {
          "preserveNullAndEmptyArrays": false
        }
      }
    },
    {
      "$project": {
        "a": "$_outer",
        "b": "$_inner",
        "_id": false
      }
    },
    {
      "$project": {
        "_outer": "$$ROOT",
        "_id": false
      }
    },
    {
      "$match": {
        "_outer.a._id": {
          "$eq": "A_0"
        }
      }
    },
    {
      "$lookup": {
        "from": "collectionC",
        "as": "_inner",
        "localField": "_outer.b._id",
        "foreignField": "ClassBId",
        "unwinding": {
          "preserveNullAndEmptyArrays": false
        }
      }
    },
    {
      "$project": {
        "<>h__TransparentIdentifier0": "$_outer",
        "c": "$_inner",
        "_id": false
      }
    },
    {
      "$project": {
        "Id": "$<>h__TransparentIdentifier0.a._id",
        "_id": false
      }
    }
  ],
  "serverInfo": {
    "host": "1b4a207c02de",
    "port": 27017,
    "version": "6.0.5",
    "gitVersion": "c9a99c120371d4d4c52cbb15dac34a36ce8d3b1d"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
  },
  "command": {
    "aggregate": "collectionA",
    "pipeline": [
      {
        "$project": {
          "_outer": "$$ROOT",
          "_id": 0
        }
      },
      {
        "$lookup": {
          "from": "collectionB",
          "localField": "_outer._id",
          "foreignField": "ClassAId",
          "as": "_inner"
        }
      },
      {
        "$unwind": "$_inner"
      },
      {
        "$project": {
          "a": "$_outer",
          "b": "$_inner",
          "_id": 0
        }
      },
      {
        "$project": {
          "_outer": "$$ROOT",
          "_id": 0
        }
      },
      {
        "$lookup": {
          "from": "collectionC",
          "localField": "_outer.b._id",
          "foreignField": "ClassBId",
          "as": "_inner"
        }
      },
      {
        "$unwind": "$_inner"
      },
      {
        "$project": {
          "<>h__TransparentIdentifier0": "$_outer",
          "c": "$_inner",
          "_id": 0
        }
      },
      {
        "$match": {
          "<>h__TransparentIdentifier0.a._id": "A_0"
        }
      },
      {
        "$project": {
          "Id": "$<>h__TransparentIdentifier0.a._id",
          "_id": 0
        }
      }
    ],
    "cursor": {},
    "$db": "db1"
  },
  "ok": 1
}

Comparing the explain output, it seems like the linq2 aggregation is able to be optimized to match the id before projecting?

Workarounds:

  • Manually write the aggregation
  • Query collections separately


 Comments   
Comment by Robert Stam [ 05/Apr/23 ]

Thanks for reporting this. It is very interesting. Thanks for the amount of detailed data you have collected.

LINQ3 uses the `_outer` temporary field primarily as an aid to understanding the translated MQL. I did not anticipate it would have such an impact on performance (at least in a case like yours).

I will investigate the impact of removing the `_outer` temporary variable.

Generated at Wed Feb 07 21:48:44 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.