Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-42565

Aggregations and find commands sort missing fields differently

    • Minor Change
    • ALL
    • v4.2, v4.0, v3.6
    • Hide
      python3 buildscripts/resmoke.py --storageEngine wiredTiger --suites aggregation missing_sort_key.js
      
      missing_sort_key.js
      'use strict';
      db.my_coll.drop();
      
      assert.commandWorked(db.my_coll.insert([
          {_id: 1, uniquer: 1, a: undefined},
          {_id: 2, uniquer: 2, a: null},
          {_id: 3, uniquer: 3, a: []},
          {_id: 4, uniquer: 4},
          {_id: 5, uniquer: 5},
          {_id: 6, uniquer: 6, a: []},
          {_id: 7, uniquer: 7, a: null},
          {_id: 8, uniquer: 8, a: undefined},
      ]));
      
      const findCmd = {
          find: "my_coll",
          sort: {a: -1, uniquer: -1},
          projection: {sortKey: {$meta: 'sortKey'}},
      };
      
      const aggCmd = {
          aggregate: "my_coll",
          pipeline: [{$sort: {a: -1, uniquer: -1}}],
          cursor: {},
      };
      
      const findRes = db.runCommand(findCmd);
      const res1 = new DBCommandCursor(db, findRes).toArray();
      const aggRes = db.runCommand(aggCmd);
      const res2 = new DBCommandCursor(db, aggRes).toArray();
      print('find command results');
      print(tojson(res1));
      print('agg command results');
      print(tojson(res2));
      
      Show
      python3 buildscripts/resmoke.py --storageEngine wiredTiger --suites aggregation missing_sort_key.js missing_sort_key.js 'use strict' ; db.my_coll.drop(); assert.commandWorked(db.my_coll.insert([ {_id: 1, uniquer: 1, a: undefined}, {_id: 2, uniquer: 2, a: null }, {_id: 3, uniquer: 3, a: []}, {_id: 4, uniquer: 4}, {_id: 5, uniquer: 5}, {_id: 6, uniquer: 6, a: []}, {_id: 7, uniquer: 7, a: null }, {_id: 8, uniquer: 8, a: undefined}, ])); const findCmd = { find: "my_coll" , sort: {a: -1, uniquer: -1}, projection: {sortKey: {$meta: 'sortKey' }}, }; const aggCmd = { aggregate: "my_coll" , pipeline: [{$sort: {a: -1, uniquer: -1}}], cursor: {}, }; const findRes = db.runCommand(findCmd); const res1 = new DBCommandCursor(db, findRes).toArray(); const aggRes = db.runCommand(aggCmd); const res2 = new DBCommandCursor(db, aggRes).toArray(); print( 'find command results' ); print(tojson(res1)); print( 'agg command results' ); print(tojson(res2));
    • Query 2019-08-26, Query 2019-10-21, Query 2019-11-04

      An aggregation pipeline's $sort and find command's sort treat missing fields differently. A find command evaluates missing as equivalent to null while an aggregation pipeline evaluates missing as equivalent to undefined.  As a consequence, find commands and aggregation pipelines do not guarantee the same sort order when at least one of the documents in a collection is missing at least one of the fields being sorted on. It is likely that this behavior arises from the difference in behavior between the fast and slow methods for extracting a sortKey.

      @@@ (fast) fast sort key for {_id: "missing 1", uniquer: 4} is [MISSING, 4]
      @@@ (fast) slow sort key for {_id: "missing 1", uniquer: 4} is [null, 4]
      

      Unable to find source-code formatter for language: diff. Available languages are: actionscript, ada, applescript, bash, c, c#, c++, cpp, css, erlang, go, groovy, haskell, html, java, javascript, js, json, lua, none, nyan, objc, perl, php, python, r, rainbow, ruby, scala, sh, sql, swift, visualbasic, xml, yaml
      diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
      index fe32168c17..4d9fde0afe 100644
      --- a/src/mongo/db/pipeline/document_source_sort.cpp
      +++ b/src/mongo/db/pipeline/document_source_sort.cpp
      @@ -27,6 +27,8 @@
        *    it in the license file.
        */
       
      +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
      +
       #include "mongo/platform/basic.h"
       
       #include "mongo/db/pipeline/document_source_sort.h"
      @@ -41,6 +43,7 @@
       #include "mongo/db/query/collation/collation_index_key.h"
       #include "mongo/platform/overflow_arithmetic.h"
       #include "mongo/s/query/document_source_merge_cursors.h"
      +#include "mongo/util/log.h"
       
       namespace mongo {
       
      @@ -358,6 +361,11 @@ std::pair<Value, Document> DocumentSourceSort::extractSortKey(Document&& doc) co
       
           auto fastKey = extractKeyFast(doc);
           if (fastKey.isOK()) {
      +        log() << "@@@ (fast) fast sort key for " << doc << " is " << fastKey.getValue();
      +        auto tempInMemorySortKey =
      +            deserializeSortKey(_sortExecutor->sortPattern().size(), extractKeyWithArray(doc));
      +        log() << "@@@ (fast) slow sort key for " << doc << " is " << tempInMemorySortKey;
      +
               inMemorySortKey = std::move(fastKey.getValue());
               if (pExpCtx->needsMerge) {
                   serializedSortKey =
      @@ -368,9 +376,12 @@ std::pair<Value, Document> DocumentSourceSort::extractSortKey(Document&& doc) co
               // sort key, which is an object with empty field names. We then need to convert this BSON
               // representation into the corresponding array of keys as a Value. BSONObj {'': 1, '': [2,
               // 3]} becomes Value [1, [2, 3]].
      +        log() << "@@@ (slow) fast sort key for " << doc
      +              << " couldn't be extracted: " << fastKey.getStatus();
               serializedSortKey = extractKeyWithArray(doc);
               inMemorySortKey =
                   deserializeSortKey(_sortExecutor->sortPattern().size(), *serializedSortKey);
      +        log() << "@@@ (slow) slow sort key for " << doc << " is " << inMemorySortKey;
           }
       
           MutableDocument toBeSorted(std::move(doc));
      

            Assignee:
            justin.seyster@mongodb.com Justin Seyster
            Reporter:
            claire.childs@mongodb.com Claire Childs (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            16 Start watching this issue

              Created:
              Updated:
              Resolved: