Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-45185

$graphLookup's internal cache handles null/missing incorrectly, resulting in incorrect query results

    • Query Execution
    • ALL
    • Hide
      const documentList =
       [
          {_id: 1037, "array": [[], ] }, // 187
          {_id: 1055, "obj": { "obj": {_id: 1058, "str": null, "array": [[]], }, }, }, // 190
      ];
      
      const aggregation = [
              {$sort: {
                      "nonexistentfield": -1,
                      "obj.obj.array": 1
                  }
              },
              {$graphLookup: {from: "fuzzer_coll_lookup", startWith: {$sqrt: "$obj.obj.num"
                      }, connectFromField: "obj.array", connectToField: "obj.obj.str", as: "array", maxDepth: 5
                  }
              },
          ];
      

      4.3 result set:

      [
      	{
      		"_id" : 1037,
      		"array" : [
      			{
      				"_id" : 1055,
      				"obj" : {
      					"obj" : {
      						"_id" : 1058,
      						"str" : null,
      						"array" : [
      							[ ]
      						]
      					}
      				}
      			},
      			{
      				"_id" : 1037,
      				"array" : [
      					[ ]
      				]
      			}
      		]
      	},
      	{
      		"_id" : 1055,
      		"obj" : {
      			"obj" : {
      				"_id" : 1058,
      				"str" : null,
      				"array" : [
      					[ ]
      				]
      			}
      		},
      		"array" : [
      			{
      				"_id" : 1055,
      				"obj" : {
      					"obj" : {
      						"_id" : 1058,
      						"str" : null,
      						"array" : [
      							[ ]
      						]
      					}
      				}
      			}
      		]
      	}
      ]
      

      4.2 result set:

      [
      	{
      		"_id" : 1055,
      		"obj" : {
      			"obj" : {
      				"_id" : 1058,
      				"str" : null,
      				"array" : [
      					[ ]
      				]
      			}
      		},
      		"array" : [
      			{
      				"_id" : 1037,
      				"array" : [
      					[ ]
      				]
      			},
      			{
      				"_id" : 1055,
      				"obj" : {
      					"obj" : {
      						"_id" : 1058,
      						"str" : null,
      						"array" : [
      							[ ]
      						]
      					}
      				}
      			}
      		]
      	},
      	{
      		"_id" : 1037,
      		"array" : [
      			{
      				"_id" : 1055,
      				"obj" : {
      					"obj" : {
      						"_id" : 1058,
      						"str" : null,
      						"array" : [
      							[ ]
      						]
      					}
      				}
      			}
      		]
      	}
      ]
      
      
      Show
      const documentList = [ {_id: 1037, "array": [[], ] }, // 187 {_id: 1055, "obj": { "obj": {_id: 1058, "str": null, "array": [[]], }, }, }, // 190 ]; const aggregation = [ {$sort: { "nonexistentfield": -1, "obj.obj.array": 1 } }, {$graphLookup: {from: "fuzzer_coll_lookup", startWith: {$sqrt: "$obj.obj.num" }, connectFromField: "obj.array", connectToField: "obj.obj.str", as: "array", maxDepth: 5 } }, ]; 4.3 result set: [ { "_id" : 1037, "array" : [ { "_id" : 1055, "obj" : { "obj" : { "_id" : 1058, "str" : null, "array" : [ [ ] ] } } }, { "_id" : 1037, "array" : [ [ ] ] } ] }, { "_id" : 1055, "obj" : { "obj" : { "_id" : 1058, "str" : null, "array" : [ [ ] ] } }, "array" : [ { "_id" : 1055, "obj" : { "obj" : { "_id" : 1058, "str" : null, "array" : [ [ ] ] } } } ] } ] 4.2 result set: [ { "_id" : 1055, "obj" : { "obj" : { "_id" : 1058, "str" : null, "array" : [ [ ] ] } }, "array" : [ { "_id" : 1037, "array" : [ [ ] ] }, { "_id" : 1055, "obj" : { "obj" : { "_id" : 1058, "str" : null, "array" : [ [ ] ] } } } ] }, { "_id" : 1037, "array" : [ { "_id" : 1055, "obj" : { "obj" : { "_id" : 1058, "str" : null, "array" : [ [ ] ] } } } ] } ]

      When the "connect from" document contains null but the "connect to" document is missing, the "connect to" document is not correctly inserted into DocumentSourceGraphLookup::_cache. This can result in incorrect query results when there is a subsequent "connect from" value of null.

      This issue was originally detected by the multiversion agg fuzzer, since it appears that the incorrect result sets can manifest differently in different versions. There is a simpler repro in this comment below and a detailed description of the issue in this comment.

      Original description

      I've attached a somewhat minimal agg-fuzzer repro and a file with 4.3/4.2 explain outputs. The results for 4.3/4.2 contain the same values, but they are being assigned to different fields.

        1. explainOutput.log
          47 kB
        2. graphLookupErr.js
          41 kB

            Assignee:
            backlog-query-execution [DO NOT USE] Backlog - Query Execution
            Reporter:
            vlad.rachev@mongodb.com Vlad Rachev (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

              Created:
              Updated:
              Resolved: