pdbench/census/generate_data.sh

74 lines
2.5 KiB
Bash

# GENERATE DATA
# define scenarios
DB=postgres
USER=postgres
DATA_DIR=data
declare sizes_s=( 100K 500K 750K 1M 5M 7.5M 10M 12.5M )
declare sizes=( 100000 500000 750000 1000000 5000000 7500000 10000000 12491667 )
declare noise=( 0.00005 0.0001 0.0005 0.001 0.005 0.01 0.05 0.1 )
for (( i = 0; i < 8; i++ ))
do
SIZE=${sizes[$i]}
SIZE_S=${sizes_s[$i]}
for (( j = 0; j < 4; j++ ))
do
NOISE=${noise[$j]}
SCENARIO=n${SIZE_S}x${NOISE}
echo
echo '---------------------------------------------'
echo $SCENARIO
echo 'Time: ' `date`
echo '---------------------------------------------'
# drop tables
psql -q -d $DB -U $USER -c 'drop table rt cascade;'
psql -q -d $DB -U $USER -c 'drop table ft cascade;'
psql -q -d $DB -U $USER -c 'drop table ct cascade;'
psql -q -d $DB -U $USER -c 'drop table wt cascade;'
# create tables
psql -q -d $DB -U $USER -c 'create table rt AS SELECT * FROM pums WHERE tid <= '${SIZE}';'
psql -q -d $DB -U $USER -c 'create index rtididx on rt(tid);'
# introduce noise
echo "Generating Noise: $SCENARIO ... "
/usr/bin/time -f "%e sec" java -classpath DataNoise/bin:DataNoise/jdbc3.jar Main rt ft ct $SIZE $NOISE DataNoise/settings.xml | psql -U $USER -d $DB
echo "Dumping or-set relations..."
# dump or-set relations before chase
pg_dump -U $USER -O -t rt -t ft -t ct -t wt $DB | gzip > $DATA_DIR/$SCENARIO.or.dump.gz
# chase
echo
echo "Chasing..."
# make sure temporary results are cleaned
psql -U $USER -d $DB -c 'drop table eqrel;'
psql -U $USER -d $DB -c 'drop table eqmap;'
psql -U $USER -d $DB -c 'drop table eqcomp;'
psql -U $USER -d $DB -c 'drop table eqworld;'
psql -U $USER -d $DB -c 'drop table invalid;'
psql -d $DB -U $USER -f Chase/prsel.sql
psql -U $USER -d $DB -c 'create index ftididx on ft(tid);'
psql -U $USER -d $DB -c 'create index fcididx on ft(cid);'
psql -U $USER -d $DB -c 'create index fhididx on ft(hid);'
psql -U $USER -d $DB -c 'create index ccididx on ct(cid);'
psql -U $USER -d $DB -c 'create index chididx on ct(hid);'
/usr/bin/time -f "%e sec" java -classpath Chase/bin:Chase/jdbc3.jar Main rt ft ct Chase/dependencies.xml
# dump tables
echo
echo "Dumping chased relations..."
pg_dump -U $USER -O -t rt -t ft -t ct -t wt $DB | gzip > $DATA_DIR/$SCENARIO.dump.gz
done
done