Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-94908

Group stage in aggregation pipeline act as a project stage

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Blocker - P1 Blocker - P1
    • None
    • Affects Version/s: 6.0.17
    • Component/s: None
    • None
    • ALL
    • Hide

      Script to reproduce on 6.0.17 mongo:

      • Populate a collection with a field "Foo" as a UUID()
      • Prepare first 1000 "Foo" values in a variable foo1kIds (we don't care about order or specific value or naming)
      • Prepare first 2000 "Foo" values in a variable foo2kIds (we don't care about order or specific value or naming)
      • Run an aggregation pipeline where you match `"Foo" : { $in: foo1kIds }` => Result OK
      • Run an aggregation pipeline where you match `"Foo" : { $in: foo2kIds }` => Result KO

      Script:
      ```

      for (var i = 0; i < 5000; i++) { db.agg_col.insertOne(

      { "_id": UUID(), "Foo": UUID() }

      ); }

      var foo1kIds = db.getCollection("agg_col").find({}, { "Foo" : 1.0, "_id" : 0.0 }).limit(1000).map(function(el) { return el.Foo }).toArray();

      var foo2kIds = db.getCollection("agg_col").find({}, { "Foo" : 1.0, "_id" : 0.0 }).limit(2000).map(function(el) { return el.Foo }).toArray();

      db.getCollection("agg_col").aggregate( [ { "$match" : { "$and" : [ { "Foo" :

      { "$in" : foo1kIds }

      } ] } },  { "$group" : { "_id" :

      { "Foo" : "$Foo" }

      , "Count": {$sum:1} } } ] );

      db.getCollection("agg_col").aggregate( [ { "$match" : { "$and" : [ { "Foo" :

      { "$in" : foo2kIds }

      } ] } },  { "$group" : { "_id" :

      { "Foo" : "$Foo" }

      , "Count": {$sum:1} } } ] );

      ```

      Result for first aggregate pipeline:
      ```

      [
       

      {     _id: \{ Foo: UUID('f21e9088-e326-4ebe-bbfd-e1138bb1e6d5') }

      ,
          Count: 1
        },
       

      {     _id: \{ Foo: UUID('10743196-f9bd-4099-a035-1616483aba31') }

      ,
          Count: 1
        },
       

      {     _id: \{ Foo: UUID('fb39e5e0-75b6-4212-8c20-4b0b75f1059a') }

      ,
          Count: 1
        },

      ...

      ]
      ```

      Result for second aggregate pipeline (should be similar)
      ```

      [
        { Foo: UUID('e63f352d-c63e-4edc-9009-7a83ab823993') },
        { Foo: UUID('6bf89563-d5a7-492f-95c4-1362630a7213') },
        { Foo: UUID('2b8d3083-e8c0-4962-8d03-3e8186333090') },

      ...

      ]
      ```
       
      Reproduced on mongo:6.0.17 image:
      ```
      "Id": "sha256:2835e1541e708cfe27676f8a61a1ca93a1d21f0dbdd03fb30d2780bd7a64ef0f",
      "RepoTags": [ "mongo:6.0.17" ],
      "RepoDigests": [ "mongo@sha256:5426e97e68aa428bcbdcaad0ec21cb3c8f0f473a9665fc051fb3c8feed31331b" ]
      ```

      See attachment for reproduced walkthrough

      Show
      Script to reproduce on 6.0.17 mongo: Populate a collection with a field "Foo" as a UUID() Prepare first 1000 "Foo" values in a variable foo1kIds (we don't care about order or specific value or naming) Prepare first 2000 "Foo" values in a variable foo2kIds (we don't care about order or specific value or naming) Run an aggregation pipeline where you match `"Foo" : { $in: foo1kIds }` => Result OK Run an aggregation pipeline where you match `"Foo" : { $in: foo2kIds }` => Result KO Script: ``` for (var i = 0; i < 5000; i++) { db.agg_col.insertOne( { "_id": UUID(), "Foo": UUID() } ); } var foo1kIds = db.getCollection("agg_col").find({}, { "Foo" : 1.0, "_id" : 0.0 }).limit(1000).map(function(el) { return el.Foo }).toArray(); var foo2kIds = db.getCollection("agg_col").find({}, { "Foo" : 1.0, "_id" : 0.0 }).limit(2000).map(function(el) { return el.Foo }).toArray(); db.getCollection("agg_col").aggregate( [ { "$match" : { "$and" : [ { "Foo" : { "$in" : foo1kIds } } ] } },  { "$group" : { "_id" : { "Foo" : "$Foo" } , "Count": {$sum:1} } } ] ); db.getCollection("agg_col").aggregate( [ { "$match" : { "$and" : [ { "Foo" : { "$in" : foo2kIds } } ] } },  { "$group" : { "_id" : { "Foo" : "$Foo" } , "Count": {$sum:1} } } ] ); ``` Result for first aggregate pipeline: ``` [   {     _id: \{ Foo: UUID('f21e9088-e326-4ebe-bbfd-e1138bb1e6d5') } ,     Count: 1   },   {     _id: \{ Foo: UUID('10743196-f9bd-4099-a035-1616483aba31') } ,     Count: 1   },   {     _id: \{ Foo: UUID('fb39e5e0-75b6-4212-8c20-4b0b75f1059a') } ,     Count: 1   }, ... ] ``` Result for second aggregate pipeline (should be similar) ``` [   { Foo: UUID('e63f352d-c63e-4edc-9009-7a83ab823993') },   { Foo: UUID('6bf89563-d5a7-492f-95c4-1362630a7213') },   { Foo: UUID('2b8d3083-e8c0-4962-8d03-3e8186333090') }, ... ] ```   Reproduced on mongo:6.0.17 image: ``` "Id": "sha256:2835e1541e708cfe27676f8a61a1ca93a1d21f0dbdd03fb30d2780bd7a64ef0f", "RepoTags": [ "mongo:6.0.17" ], "RepoDigests": [ "mongo@sha256:5426e97e68aa428bcbdcaad0ec21cb3c8f0f473a9665fc051fb3c8feed31331b" ] ``` See attachment for reproduced walkthrough

      `Group` stage in aggregation pipeline acts as a `project` stage of what is defined inside "_id" when previous match stage returns more than ~1100-1200 documents.  

        

      Appeared on our Prod Environment using Mongo 6.0.17 on Ubuntu 22.04, reproduced in Docker mongo:6.0.17.  

        

      Our emergency action to resolve : Downgrade to 6.0.16  

       

      Note: 6.0.16 and 6.0.18-rc0 do not have this behavior.  

            Assignee:
            chris.kelly@mongodb.com Chris Kelly
            Reporter:
            thue@idecsi.com Thibault Hue
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: