Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-93380

$unionWith contains executionStats info even when in the merging pipeline

    • Type: Icon: Bug Bug
    • Resolution: Unresolved
    • Priority: Icon: Minor - P4 Minor - P4
    • None
    • Affects Version/s: None
    • Component/s: None
    • Query Optimization
    • ALL
    • Hide

      Copy below into a test file

      const st = new ShardingTest({shards: 2, mongos: 1});
      const mongos = st.s;
      const dbName = jsTestName();
      const testDB = mongos.getDB(dbName);
      const shardedBaseColl = testDB.getCollection("base_sharded");
      const unshardedUnionColl = testDB.getCollection("unshardedUnionColl");
      // Ensure db's primary shard is shard1
      assert.commandWorked(
          mongos.getDB("admin").runCommand({enableSharding: dbName, primaryShard: st.shard1.shardName}));
      
          const baseCollDocs = [
              {"_id": 100, "localField": "cakes", "weird": false},
              // Split between first and second shard will be here.
              {"_id": 101, "localField": "cakes and kale", "weird": true},
          ];
          
          const unionCollDocs = [
              {"_id": 0, "title": "cakes"},
              {"_id": 1, "title": "cookies and cakes"},
              {"_id": 2, "title": "vegetables"},
          ];
          
          function loadData(coll, docs) {
              coll.drop();
              var bulk = coll.initializeUnorderedBulkOp();
              for (const doc of docs) {
                  bulk.insert(doc);
              }
              assert.commandWorked(bulk.execute());
          }
          
          loadData(shardedBaseColl, baseCollDocs);
          loadData(unshardedUnionColl, unionCollDocs);
      
      // Shard base collection.
      st.shardColl(shardedBaseColl, {_id: 1}, {_id: 101}, {_id: 101});
      
      
      let result = shardedBaseColl.explain("executionStats").aggregate([{
          $unionWith: {
              coll: unshardedUnionColl.getName(),
              pipeline: [
                  {
                      $project: {
                          "_id": 0,
                      }
                  }
              ]
          }
      }]);
      
      jsTestLog(result);
      /** snippet from result -- executionStats contained in mergerPart which is contrary to how it normally works
       * "mergerPart" : [
       			{
       				"$mergeCursors" : {...}
       			},
       			{
       				"$unionWith" : {
       					"coll" : "unshardedUnionColl",
       					"pipeline" : {
       						"splitPipeline" : null,
       						"shards" : {
       							"unionWith_replicate-rs1" : {
       								"host" : "ip-10-122-6-95:20044",
       								"explainVersion" : "1",
       								"queryPlanner" : {
       									"namespace" : "unionWith_replicate.unshardedUnionColl",
       									"parsedQuery" : {
      
       									},
       									"indexFilterSet" : false,
       									"queryHash" : "8F2383EE",
       									"planCacheKey" : "7DF350EE",
       									"optimizationTimeMillis" : 0,
       									"optimizedPipeline" : true,
       									"maxIndexedOrSolutionsReached" : false,
       									"maxIndexedAndSolutionsReached" : false,
       									"maxScansToExplodeReached" : false,
       									"prunedSimilarIndexes" : false,
       									"winningPlan" : {
       										"isCached" : false,
       										"stage" : "PROJECTION_SIMPLE",
       										"transformBy" : {
       											"_id" : false
       										},
       										"inputStage" : {
       											"stage" : "COLLSCAN",
       											"direction" : "forward"
       										}
       									},
       									"rejectedPlans" : [ ]
       								},
       								"executionStats" : {
       									"executionSuccess" : true,
       									"nReturned" : 3,
       									"executionTimeMillis" : 0,
       									"totalKeysExamined" : 0,
       									"totalDocsExamined" : 3,
       									"executionStages" : {
       										"isCached" : false,
       										"stage" : "PROJECTION_SIMPLE",
       										"nReturned" : 3,
       										"executionTimeMillisEstimate" : 0,
       										"works" : 4,
       										"advanced" : 3,
       										"needTime" : 0,
       										"needYield" : 0,
       										"saveState" : 0,
       										"restoreState" : 0,
       										"isEOF" : 1,
       										"transformBy" : {
       											"_id" : false
       										},
       										"inputStage" : {
       											"stage" : "COLLSCAN",
       											"nReturned" : 3,
       											"executionTimeMillisEstimate" : 0,
       											"works" : 4,
       											"advanced" : 3,
       											"needTime" : 0,
       											"needYield" : 0,
       											"saveState" : 0,
       											"restoreState" : 0,
       											"isEOF" : 1,
       											"direction" : "forward",
       											"docsExamined" : 3
       										}
       									}
       								}
       							}
       						}
      */
      st.stop();
      
      Show
      Copy below into a test file const st = new ShardingTest({shards: 2, mongos: 1}); const mongos = st.s; const dbName = jsTestName(); const testDB = mongos.getDB(dbName); const shardedBaseColl = testDB.getCollection( "base_sharded" ); const unshardedUnionColl = testDB.getCollection( "unshardedUnionColl" ); // Ensure db's primary shard is shard1 assert .commandWorked( mongos.getDB( "admin" ).runCommand({enableSharding: dbName, primaryShard: st.shard1.shardName})); const baseCollDocs = [ { "_id" : 100, "localField" : "cakes" , "weird" : false }, // Split between first and second shard will be here. { "_id" : 101, "localField" : "cakes and kale" , "weird" : true }, ]; const unionCollDocs = [ { "_id" : 0, "title" : "cakes" }, { "_id" : 1, "title" : "cookies and cakes" }, { "_id" : 2, "title" : "vegetables" }, ]; function loadData(coll, docs) { coll.drop(); var bulk = coll.initializeUnorderedBulkOp(); for ( const doc of docs) { bulk.insert(doc); } assert .commandWorked(bulk.execute()); } loadData(shardedBaseColl, baseCollDocs); loadData(unshardedUnionColl, unionCollDocs); // Shard base collection. st.shardColl(shardedBaseColl, {_id: 1}, {_id: 101}, {_id: 101}); let result = shardedBaseColl.explain( "executionStats" ).aggregate([{ $unionWith: { coll: unshardedUnionColl.getName(), pipeline: [ { $project: { "_id" : 0, } } ] } }]); jsTestLog(result); /** snippet from result -- executionStats contained in mergerPart which is contrary to how it normally works * "mergerPart" : [ { "$mergeCursors" : {...} }, { "$unionWith" : { "coll" : "unshardedUnionColl" , "pipeline" : { "splitPipeline" : null , "shards" : { "unionWith_replicate-rs1" : { "host" : "ip-10-122-6-95:20044" , "explainVersion" : "1" , "queryPlanner" : { "namespace" : "unionWith_replicate.unshardedUnionColl" , "parsedQuery" : { }, "indexFilterSet" : false , "queryHash" : "8F2383EE" , "planCacheKey" : "7DF350EE" , "optimizationTimeMillis" : 0, "optimizedPipeline" : true , "maxIndexedOrSolutionsReached" : false , "maxIndexedAndSolutionsReached" : false , "maxScansToExplodeReached" : false , "prunedSimilarIndexes" : false , "winningPlan" : { "isCached" : false , "stage" : "PROJECTION_SIMPLE" , "transformBy" : { "_id" : false }, "inputStage" : { "stage" : "COLLSCAN" , "direction" : "forward" } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true , "nReturned" : 3, "executionTimeMillis" : 0, "totalKeysExamined" : 0, "totalDocsExamined" : 3, "executionStages" : { "isCached" : false , "stage" : "PROJECTION_SIMPLE" , "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "transformBy" : { "_id" : false }, "inputStage" : { "stage" : "COLLSCAN" , "nReturned" : 3, "executionTimeMillisEstimate" : 0, "works" : 4, "advanced" : 3, "needTime" : 0, "needYield" : 0, "saveState" : 0, "restoreState" : 0, "isEOF" : 1, "direction" : "forward" , "docsExamined" : 3 } } } } } */ st.stop();

      The $unionWith stage executes its subpipeline for explain run with "executionStats" verbosity even when in a merging pipeline. This doesn't match the general behavior of explain of the merging pipeline https://jira.mongodb.org/browse/SPM-3100 (only queryPlanner info) and doesn't seem to be expected behavior.

      If $unionWith is part of the merging pipeline, the verbosity passed into the serialize function of $unionWith is "queryPlanner" due to being part of the merging pipeline and that is passed into preparePipelineAndExplain().

      However, the preparePipelineAndExplain() functions for sharded clusters never set the verbosity of the expCtx of the pipeline to the verbosity passed in. So if the expCtx has "executionStats" verbosity, the subpipeline is fully executed and has stats in the explain output even when "queryPlanner" behavior was expected.

      $unionWith is the only stage that calls preparePipelineAndExplain() in its serialize function, so it is the only one affected by this.

            Assignee:
            Unassigned Unassigned
            Reporter:
            erin.zhu@mongodb.com Erin Zhu
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

              Created:
              Updated: