[SERVER-63260] Fix the issue that $group returns different results for in-memory group and spilled group when using $avg Created: 03/Feb/22  Updated: 29/Oct/23  Resolved: 15/Mar/22

Status: Closed
Project: Core Server
Component/s: None
Affects Version/s: None
Fix Version/s: 6.0.0-rc0

Type: Bug Priority: Major - P3
Reporter: Mihai Andrei Assignee: Yoon Soo Kim
Resolution: Fixed Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Related
related to SERVER-62940 DocumentSourceGroup returns different... Closed
Backwards Compatibility: Fully Compatible
Operating System: ALL
Sprint: QE 2022-03-21
Participants:

 Description   

See SERVER-62940 for details. This ticket is to investigate (and potentially implement a fix) whether $avg suffers from the same issue as $sum (it is likely that this is the case given that in the case of a Decimal total for $avg, we convert the DoubleDoubleSummation to a Decimal and lose precision in the same way that $sum does)



 Comments   
Comment by Githook User [ 15/Mar/22 ]

Author:

{'name': 'Yoonsoo Kim', 'email': 'yoonsoo.kim@mongodb.com', 'username': 'yun-soo'}

Message: SERVER-63260 Fix incorrect $avg result when merging partial results
Branch: master
https://github.com/mongodb/mongo/commit/a9b05b292e6d92ad08244578b7d48a6ea7506b90

Comment by Yoon Soo Kim [ 04/Feb/22 ]

Confirmed that $avg also has the same issue.

Here's a repro for sharded $sum/$avg issue. This repro should run on an optimized build because the classic engine deliberately spills data on a debug build for testing purpose and always produces a wrong result.

(function() {
'use strict';
 
const st = new ShardingTest({shards: 2, shardOptions: {setParameter: "featureFlagSBEGroupPushdown=true"}});
 
const db = st.getDB("sharding");
const dbAtShard0 = st.shard0.getDB("sharding");
const dbAtShard1 = st.shard1.getDB("sharding");
 
assert(
    assert.commandWorked(dbAtShard0.adminCommand({getParameter: 1, featureFlagSBEGroupPushdown: 1}))
        .featureFlagSBEGroupPushdown.value);
assert(
    assert.commandWorked(dbAtShard1.adminCommand({getParameter: 1, featureFlagSBEGroupPushdown: 1}))
        .featureFlagSBEGroupPushdown.value);
 
assert.commandWorked(st.s0.adminCommand({enableSharding: db.getName()}));
 
let runShardedGroupOnBothEngine = (coll, pipeline) => {
    // Turns to the classic engine at the shard before figuring out its result.
    assert.commandWorked(
        dbAtShard0.adminCommand({setParameter: 1, internalQueryForceClassicEngine: true}));
    assert.commandWorked(
        dbAtShard1.adminCommand({setParameter: 1, internalQueryForceClassicEngine: true}));
 
    // Collects the classic engine's result as the expected result, executing the pipeline at the
    // mongos.
    const classicalRes =
        coll.runCommand({aggregate: coll.getName(), pipeline: pipeline, cursor: {}})
            .cursor.firstBatch;
 
    jsTestLog("classic result ---");
    jsTestLog(classicalRes);
 
    // Turns to the SBE engine at the shard.
    assert.commandWorked(
        dbAtShard0.adminCommand({setParameter: 1, internalQueryForceClassicEngine: false}));
    assert.commandWorked(
        dbAtShard1.adminCommand({setParameter: 1, internalQueryForceClassicEngine: false}));
 
    const sbeRes = coll.runCommand({aggregate: coll.getName(), pipeline: pipeline, cursor: {}})
                       .cursor.firstBatch;
 
    jsTestLog("SBE result ---");
    jsTestLog(sbeRes);
};
 
let prepareCollection = coll => {
    coll.drop();
 
    // Makes sure that the collection is sharded.
    assert.commandWorked(st.s0.adminCommand({shardCollection: coll.getFullName(), key: {_id: "hashed"}}));
 
    return coll;
};
 
// Hash-sharded collection
let coll = prepareCollection(db.partial_sum);
// Unsharded collection
let coll2 = db.partial_sum2;
 
for (let i = 0; i < 3; ++i) {
    assert.commandWorked(
        coll.insert([
            {k: i, n: 1e+34},
            {k: i, n: NumberDecimal("0.1")},
            {k: i, n: NumberDecimal("0.01")},
            {k: i, n: -1e+34}]));
    assert.commandWorked(
        coll2.insert([
            {k: i, n: 1e+34},
            {k: i, n: NumberDecimal("0.1")},
            {k: i, n: NumberDecimal("0.01")},
            {k: i, n: -1e+34}]));
}
 
runShardedGroupOnBothEngine(coll, [{$group: {_id: "$k", s: {$sum: "$n"}}}, {$group: {_id: "$s"}}]);
// classic: [ { "_id" : NumberDecimal("0.11") }, { "_id" : NumberDecimal("0") } ]
// SBE: [ { "_id" : NumberDecimal("0") }, { "_id" : NumberDecimal("0.11") } ]
runShardedGroupOnBothEngine(coll2, [{$group: {_id: "$k", s: {$sum: "$n"}}}, {$group: {_id: "$s"}}]);
// classic: [ { "_id" : NumberDecimal("0.11") } ]
// SBE: [ { "_id" : NumberDecimal("0.11") } ]
 
runShardedGroupOnBothEngine(coll, [{$group: {_id: "$k", a: {$avg: "$n"}}}, {$group: {_id: "$a"}}]);
// classic: [ { "_id" : NumberDecimal("0.0275") }, { "_id" : NumberDecimal("0") } ]
// SBE: [ { "_id" : NumberDecimal("0") }, { "_id" : NumberDecimal("0.0275") } ]
runShardedGroupOnBothEngine(coll2, [{$group: {_id: "$k", a: {$avg: "$n"}}}, {$group: {_id: "$a"}}]);
// classic: [ { "_id" : NumberDecimal("0.0275") } ]
// SBE: [ { "_id" : NumberDecimal("0.0275") } ]
 
st.stop();
}());

Generated at Thu Feb 08 05:57:18 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.