pdbench/census/run_queries.sh
2016-10-06 19:10:22 -04:00

126 lines
3.9 KiB
Bash

# RUN QUERIES
# define scenarios
DB=postgres
USER=postgres
DATA_DIR=data
QUERIES_DIR=queries
declare sizes_s=( 100K 500K 750K 1M 5M 7.5M 10M 12.5M )
declare sizes=( 100000 500000 750000 1000000 5000000 7500000 10000000 12491667 )
declare noise=( 0.00005 0.0001 0.0005 0.001 )
declare queries=( q1.sql q2.sql q3.sql q4.sql q5.sql q6.sql )
# create tables for storing results statistics
psql -d $DB -U $USER -c 'create table results(t timestamp, scenario varchar, r_size int, f_size int, c_size int,
num_components int, comp_size1 int, comp_size2 int,
comp_size3 int, comp_size4_and_more int);'
psql -d $DB -U $USER -c 'create table q_results(t timestamp, scenario varchar,
query varchar, r_size int, f_size int, c_size int,
num_components int, comp_size1 int, comp_size2 int,
comp_size3 int, comp_size4_and_more int);'
for (( i = 0; i < 8; i++ ))
do
SIZE=${sizes[$i]}
SIZE_S=${sizes_s[$i]}
for (( j = 0; j < 4; j++ ))
do
NOISE=${noise[$j]}
SCENARIO=n${SIZE_S}x${NOISE}
echo
echo '---------------------------------------------'
echo $SCENARIO
echo 'Time: ' `date`
echo '---------------------------------------------'
# drop tables
psql --quiet -d $DB -U $USER -c 'drop table rt cascade;'
psql --quiet -d $DB -U $USER -c 'drop table ft cascade;'
psql --quiet -d $DB -U $USER -c 'drop table ct cascade;'
psql --quiet -d $DB -U $USER -c 'drop table wt cascade;'
# load data
DUMP_FILE=$SCENARIO.dump.gz
echo 'Load data...'
/usr/bin/time -f "%e sec" gunzip -c $DATA_DIR/$DUMP_FILE | psql --quiet -d $DB -U $USER
echo 'done.'
#TODO: vacuum + analyze
echo 'Vacuum + analyze ...'
psql --quiet -d $DB -U $USER -c 'vacuum analyze;'
echo 'done.'
# gather statistics about the data
echo 'Gather statistics...'
psql --quiet -d $DB -U $USER -f $QUERIES_DIR/statistics.sql
psql --quiet -d $DB -U $USER -c "select stat('$SCENARIO') as input_statistics;"
echo 'done.'
# run queries
for (( k = 1; k <= 6; k++ ))
do
echo "Query $k"
echo "---------"
QUERY=${queries[$k-1]}
CLEAN_QUERY="clean_$QUERY"
ONEWORLD_QUERY="oneworld_$QUERY"
ONEWORLD_CLEAN_QUERY="oneworld_$CLEAN_QUERY"
if [ $k -eq 5 ] # rename columns for the join in Q5
then
echo 'renaming columns'
psql -d $DB -U $USER -c 'alter table r2 rename POWSTATE to POWSTATE1;'
psql -d $DB -U $USER -c 'alter table r3 rename POWSTATE to POWSTATE2;'
fi
if [ $l -eq 1 ] # only run the one-world query the first time for the given size
then
# clean old oneworld results
psql -d $DB -U $USER -f $QUERIES_DIR/$ONEWORLD_CLEAN_QUERY
if [ $k -eq 5 ] # rename columns for the join in Q5
then
echo 'renaming columns'
psql -d $DB -U $USER -c 'alter table rr2 rename POWSTATE to POWSTATE1;'
psql -d $DB -U $USER -c 'alter table rr3 rename POWSTATE to POWSTATE2;'
fi
# run oneworld query
echo "Query $k one world"
echo "---------"
/usr/bin/time -f "%e sec" psql -d $DB -U $USER -f $QUERIES_DIR/$ONEWORLD_QUERY
fi
# clean old results
psql --quiet -d $DB -U $USER -f $QUERIES_DIR/$CLEAN_QUERY
# load pg/plSQL scripts for joins
if [ $k -eq 3 ];
then
psql -d $DB -U $USER -f $QUERIES_DIR/fun-q3.sql;
elif [ $k -eq 5 ];
then
psql -d $DB -U $USER -f $QUERIES_DIR/fun-q5.sql;
fi
# run query
echo "Query $k world-set, run $l"
echo "-------------------------"
/usr/bin/time -f "%e sec" psql -d $DB -U $USER -f $QUERIES_DIR/$QUERY
# gather statistics about the results
if [ $l -eq 1 ]
then
R="r$k"
F="f$k"
C="c$k"
psql -d $DB -U $USER -f $QUERIES_DIR/statistics.sql
psql -d $DB -U $USER -c "select result_stat('$SCENARIO', '$k', '$R', '$F', '$C') as result_statistics;"
fi
done
done
done