[SERVER-84910] Investigate why remaining SSB queries don't use column index Created: 24/Sep/22  Updated: 12/Jan/24  Resolved: 18/Oct/22

Status: Closed
Project: Core Server
Component/s: None
Affects Version/s: None
Fix Version/s: None

Type: Task Priority: Major - P3
Reporter: Ian Boros Assignee: Mohammad Dashti (Inactive)
Resolution: Fixed Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Related
Sprint: QE 2022-10-17, QE 2022-10-31
Participants:

 Description   

This task is to determine why the remaining SSB benchmarks don't use the column index. It may be that some expressions are not supported, or a stage cannot be pushed down, etc. A short comment on the ticket describing what's preventing the column index from being used is all that's necessary here.

Currently, ssb.q2.2 and ssb.q4.1 cannot use column-store-index predicate push-down.

Here are the steps to reproduce the issue:

1- You can compile the code like this:

activate && ./buildscripts/scons.py --ssl -j50 --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --cache=nolinked --modules="" ICECC=icecc CCACHE=ccache --dbg=off --ninja generate-ninja --link-model=static && ninja -j500 install-devcore

2- Run the server:

mongod --wiredTigerCacheSizeGB 16 --oplogSize 100000 --syncdelay 3600 --setParameter ttlMonitorEnabled=false --setParameter diagnosticDataCollectionEnabled=false --setParameter maxIndexBuildMemoryUsageMegabytes=16000 --setParameter featureFlagSbeFull=true --setParameter featureFlagColumnstoreIndexes=true

3- Download dump.zip
4- Unzip contents to reveal dump folder
5- Load the data like this:

cd ~/ssb
mongorestore --gzip dump
cd ~/mongo
mongo ssb --eval 'db.order_lineitem.createIndex({"$**": "columnstore"})'

6- Create a JS script named ssb_test.js:

(function() {
"use strict";
 
load("./measure-time.js");
 
load("jstests/libs/sbe_util.js");  // For "checkSBEEnabled.""
 
const REPEAT = 5;
const MEASURE_PERF = false;
 
const columnstoreEnabled =
    checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]);
if (!columnstoreEnabled) {
    jsTestLog("Columnstore index is NOT active.");
    return;
} else {
    jsTestLog("Columnstore index is active.");
}
 
let currIndexes = db.order_lineitem.getIndexes()
assert.eq(currIndexes.length, 2);
assert(currIndexes[0].name == "_id_" || currIndexes[1].name == "_id_");
assert(currIndexes[0].name == "$**_columnstore" || currIndexes[1].name == "$**_columnstore");
 
const queries = {
    "ssb.Q2.2": {
        "pipeline": [
            {
                $match: {
                    'part.p_brand':
                        {$in: ['Brand#21', 'Brand#22', 'Brand#15', 'Brand#41', 'Brand#41']}
                }
            },
            {
                $lookup: {
                    from: 'supplier',
                    localField: 'l_suppkey',
                    foreignField: 's_suppkey',
                    'as': 'supplier'
                }
            },
            {$match: {'supplier.nation.region.r_name': 'ASIA'}},
            {$addFields: {supplier: {$first: '$supplier'}}},
            {
                $group: {
                    _id: {order_year: {$year: '$order.o_orderdate'}, brand: '$part.p_brand'},
                    revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}}
                }
            }
        ]
    },
    "ssb.Q4.1": {
        "pipeline": [
            {$match: {'part.p_mfgr': {$in: ['Manufacturer#1', 'Manufacturer#2']}}},
            {
                $lookup: {
                    from: 'supplier',
                    localField: 'l_suppkey',
                    foreignField: 's_suppkey',
                    'as': 'supplier'
                }
            },
            {$addFields: {supplier: {$first: '$supplier'}}},
            {$match: {'supplier.nation.region.r_name': 'AMERICA'}},
            {
                $lookup: {
                    from: 'customer',
                    localField: 'order.o_custkey',
                    foreignField: 'c_custkey',
                    'as': 'customer'
                }
            },
            {$addFields: {customer: {$first: '$customer'}}},
            {$match: {'customer.nation.region.r_name': 'AMERICA'}},
            {
                $group: {
                    _id: {
                        order_year: {$year: '$order.o_orderdate'},
                        c_nation: '$customer.nation.n_name'
                    },
                    revenue: {
                        $sum: {
                            $subtract: [
                                '$l_extendedprice',
                                {$multiply: ['$l_extendedprice', '$l_discount']}
                            ]
                        }
                    }
                }
            },
            {$addFields: {order_year: '$_id.order_year', customer_nation: '$_id.c_nation'}}
        ]
    },
};
 
for (const [qName, qAgg] of Object.entries(queries)) {
    print("--------------------------------------------------------------------------------------");
    jsTestLog(`Handling ${qName}:`);
    print(`${qName}.pipeline:`)
    printjson(qAgg.pipeline)
    if (qAgg.options) {
        print(`${qName}.options:`)
        printjson(qAgg.options)
    }
    let explain = qAgg.options ? db.order_lineitem.explain().aggregate(qAgg.pipeline, qAgg.options)
                               : db.order_lineitem.explain().aggregate(qAgg.pipeline);
    print(`${qName} uses COLUMN_SCAN? ` + JSON.stringify(explain).includes('COLUMN_SCAN'));
    printjson(explain);
    if (MEASURE_PERF) {
        let durations = [];
        for (let i = 0; i < REPEAT; ++i) {
            jsTestLog(`${qName} iteration ${i}`);
            const dur = qAgg.options
                ? measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline, qAgg.options))
                : measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline));
            jsTestLog(`${qName} took ${dur}ms`);
            durations.push(dur);
        }
        jsTestLog(`experiment,${qName},${durations}`);
    }
}
})();

7- Run this script like this:

mongo ssb ./ssb_test.js

The output of this program shows the SBE plans for each SSB query.



 Comments   
Comment by Mohammad Dashti (Inactive) [ 18/Oct/22 ]

BTW, ssb.q1.3 can be rewritten using $addFields as follows (to avoid $expr) to enable the column-store predicate push-down usage:

db.order_lineitem.aggregate([
    {
        $addFields:
            {order_year: {$year: '$order.o_orderdate'}, order_week: {$week: '$order.o_orderdate'}}
    },
    {
        $match: {
            $and: [
                {order_year: 1994},
                {order_week: 6},
                {l_discount: {$gte: 0.04}},
                {l_discount: {$lte: 0.06}},
                {l_quantity: {$gte: 26}},
                {l_quantity: {$lt: 33}}
            ]
        }
    },
    {$group: {_id: 1, revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}}}}
])

Comment by Mohammad Dashti (Inactive) [ 18/Oct/22 ]

Confirmed the above comment, i.e., after adding support for $in predicare push-down on on the mdashti/SERVER-69885 branch, both ssb.q2.2 and ssb.q4.1 queries are now using COLUMN_SCAN and push down their predicates.

However, in between master@9529d985675 (examined in WRITING-11919) and the most recent version of master@be9cb19df, now ssb.q1.3 is not using COLUMN_SCAN anymore, due to the usage of $expr in its definition:

"ssb.Q1.3": {
    "pipeline": [
        {
            $match: {
                $and: [
                    {$expr: {$eq: [{$year: '$order.o_orderdate'}, 1994]}},
                    {$expr: {$eq: [{$week: '$order.o_orderdate'}, 6]}},
                    {l_discount: {$gte: 0.04}},
                    {l_discount: {$lte: 0.06}},
                    {l_quantity: {$gte: 26}},
                    {l_quantity: {$lt: 33}}
                ]
            }
        },
        {$group: {_id: 1, revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}}}}
    ]
}

Comment by Mohammad Dashti (Inactive) [ 13/Oct/22 ]

Looking at the ssb.q2.2 and ssb.q4.1 queries, both have $in, which is not going to be supported before SERVER-69885 lands on the master branch. I'm going to check on the mdashti/SERVER-69885 branch to see if the push-down happens after this change or not.

"ssb.Q2.2": {
    "pipeline": [
        {
            $match: {
                'part.p_brand':
                    {$in: ['Brand#21', 'Brand#22', 'Brand#15', 'Brand#41', 'Brand#41']}
            }
        },
        {
            $lookup: {
                from: 'supplier',
                localField: 'l_suppkey',
                foreignField: 's_suppkey',
                'as': 'supplier'
            }
        },
        {$match: {'supplier.nation.region.r_name': 'ASIA'}},
        {$addFields: {supplier: {$first: '$supplier'}}},
        {
            $group: {
                _id: {order_year: {$year: '$order.o_orderdate'}, brand: '$part.p_brand'},
                revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}}
            }
        }
    ]
},
"ssb.Q4.1": {
    "pipeline": [
        {$match: {'part.p_mfgr': {$in: ['Manufacturer#1', 'Manufacturer#2']}}},
        {
            $lookup: {
                from: 'supplier',
                localField: 'l_suppkey',
                foreignField: 's_suppkey',
                'as': 'supplier'
            }
        },
        {$addFields: {supplier: {$first: '$supplier'}}},
        {$match: {'supplier.nation.region.r_name': 'AMERICA'}},
        {
            $lookup: {
                from: 'customer',
                localField: 'order.o_custkey',
                foreignField: 'c_custkey',
                'as': 'customer'
            }
        },
        {$addFields: {customer: {$first: '$customer'}}},
        {$match: {'customer.nation.region.r_name': 'AMERICA'}},
        {
            $group: {
                _id: {
                    order_year: {$year: '$order.o_orderdate'},
                    c_nation: '$customer.nation.n_name'
                },
                revenue: {
                    $sum: {
                        $subtract: [
                            '$l_extendedprice',
                            {$multiply: ['$l_extendedprice', '$l_discount']}
                        ]
                    }
                }
            }
        },
        {$addFields: {order_year: '$_id.order_year', customer_nation: '$_id.c_nation'}}
    ]
}

Generated at Thu Feb 08 06:56:19 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.