-
Type:
Task
-
Resolution: Fixed
-
Priority:
Major - P3
-
None
-
Affects Version/s: None
-
Component/s: None
-
None
-
QE 2022-10-17, QE 2022-10-31
-
None
-
None
-
None
-
None
-
None
-
None
-
None
This task is to determine why the remaining SSB benchmarks don't use the column index. It may be that some expressions are not supported, or a stage cannot be pushed down, etc. A short comment on the ticket describing what's preventing the column index from being used is all that's necessary here.
Currently, ssb.q2.2 and ssb.q4.1 cannot use column-store-index predicate push-down.
Here are the steps to reproduce the issue:
1- You can compile the code like this:
activate && ./buildscripts/scons.py --ssl -j50 --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --cache=nolinked --modules="" ICECC=icecc CCACHE=ccache --dbg=off --ninja generate-ninja --link-model=static && ninja -j500 install-devcore
2- Run the server:
mongod --wiredTigerCacheSizeGB 16 --oplogSize 100000 --syncdelay 3600 --setParameter ttlMonitorEnabled=false --setParameter diagnosticDataCollectionEnabled=false --setParameter maxIndexBuildMemoryUsageMegabytes=16000 --setParameter featureFlagSbeFull=true --setParameter featureFlagColumnstoreIndexes=true
3- Download dump.zip
4- Unzip contents to reveal dump folder
5- Load the data like this:
cd ~/ssb
mongorestore --gzip dump
cd ~/mongo
mongo ssb --eval 'db.order_lineitem.createIndex({"$**": "columnstore"})'
6- Create a JS script named ssb_test.js:
(function() {
"use strict";
load("./measure-time.js");
load("jstests/libs/sbe_util.js"); // For "checkSBEEnabled.""
const REPEAT = 5;
const MEASURE_PERF = false;
const columnstoreEnabled =
checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]);
if (!columnstoreEnabled) {
jsTestLog("Columnstore index is NOT active.");
return;
} else {
jsTestLog("Columnstore index is active.");
}
let currIndexes = db.order_lineitem.getIndexes()
assert.eq(currIndexes.length, 2);
assert(currIndexes[0].name == "_id_" || currIndexes[1].name == "_id_");
assert(currIndexes[0].name == "$**_columnstore" || currIndexes[1].name == "$**_columnstore");
const queries = {
"ssb.Q2.2": {
"pipeline": [
{
$match: {
'part.p_brand':
{$in: ['Brand#21', 'Brand#22', 'Brand#15', 'Brand#41', 'Brand#41']}
}
},
{
$lookup: {
from: 'supplier',
localField: 'l_suppkey',
foreignField: 's_suppkey',
'as': 'supplier'
}
},
{$match: {'supplier.nation.region.r_name': 'ASIA'}},
{$addFields: {supplier: {$first: '$supplier'}}},
{
$group: {
_id: {order_year: {$year: '$order.o_orderdate'}, brand: '$part.p_brand'},
revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}}
}
}
]
},
"ssb.Q4.1": {
"pipeline": [
{$match: {'part.p_mfgr': {$in: ['Manufacturer#1', 'Manufacturer#2']}}},
{
$lookup: {
from: 'supplier',
localField: 'l_suppkey',
foreignField: 's_suppkey',
'as': 'supplier'
}
},
{$addFields: {supplier: {$first: '$supplier'}}},
{$match: {'supplier.nation.region.r_name': 'AMERICA'}},
{
$lookup: {
from: 'customer',
localField: 'order.o_custkey',
foreignField: 'c_custkey',
'as': 'customer'
}
},
{$addFields: {customer: {$first: '$customer'}}},
{$match: {'customer.nation.region.r_name': 'AMERICA'}},
{
$group: {
_id: {
order_year: {$year: '$order.o_orderdate'},
c_nation: '$customer.nation.n_name'
},
revenue: {
$sum: {
$subtract: [
'$l_extendedprice',
{$multiply: ['$l_extendedprice', '$l_discount']}
]
}
}
}
},
{$addFields: {order_year: '$_id.order_year', customer_nation: '$_id.c_nation'}}
]
},
};
for (const [qName, qAgg] of Object.entries(queries)) {
print("--------------------------------------------------------------------------------------");
jsTestLog(`Handling ${qName}:`);
print(`${qName}.pipeline:`)
printjson(qAgg.pipeline)
if (qAgg.options) {
print(`${qName}.options:`)
printjson(qAgg.options)
}
let explain = qAgg.options ? db.order_lineitem.explain().aggregate(qAgg.pipeline, qAgg.options)
: db.order_lineitem.explain().aggregate(qAgg.pipeline);
print(`${qName} uses COLUMN_SCAN? ` + JSON.stringify(explain).includes('COLUMN_SCAN'));
printjson(explain);
if (MEASURE_PERF) {
let durations = [];
for (let i = 0; i < REPEAT; ++i) {
jsTestLog(`${qName} iteration ${i}`);
const dur = qAgg.options
? measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline, qAgg.options))
: measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline));
jsTestLog(`${qName} took ${dur}ms`);
durations.push(dur);
}
jsTestLog(`experiment,${qName},${durations}`);
}
}
})();
7- Run this script like this:
mongo ssb ./ssb_test.js
The output of this program shows the SBE plans for each SSB query.