#!/bin/bash function runmongo() { echo "mongo" "$@" mongo --tls --tlsAllowInvalidHostnames "$@" } function runmangler() { echo "mangler" "--url" "$@" python3 mongo-mangler.py "$@" } declare -a indexes indexes=('{"name.last": 1, "name.first": 1, "name.middle":1}' '{"age" : 1}' '{"race" : 1}' '{"marital_status": 1}' '{"legal_status": 1}' '{"dependent_count": 1}' '{"income_category": 1}' '{"home_ownership": 1}' '{"employment_status": 1}' '{"employment_industry": 1}' '{"employment_position": 1}' '{"employment_length": 1}' '{"credit_score": 1}' '{"card_start": 1}' '{"card_expiry": 1}' '{"rank": 1}') hostport="$1" docs="$2" nindexes="$3" runmongo $hostport --eval 'db.getSiblingDB("testdb").big_collection.drop()' # create the data time runmangler -m "$hostport" -o 'testdb' -t 'big_collection' -s "$docs" -p './pipeline_garaudy.js' #create the indexes indexspecs="" for index in "${indexes[@]}"; do if (( --nindexes < 0)) ; then break; fi if [[ -z "$indexspecs" ]]; then indexspecs="$index" else indexspecs+=",$index" fi done time runmongo $hostport --eval "db.getSiblingDB('testdb').big_collection.createIndexes([$indexspecs])" # mangler starts sharded by ID, we reshard by card number time runmongo $hostport --eval 'db.adminCommand({reshardCollection: "testdb.big_collection", key: {card_number: "hashed"}})'