refactor on the organization of tooling.

macrotesting
Nick Brown 2022-07-01 17:07:02 -04:00
parent 256f0cf978
commit 73cb9049e6
Signed by: bicknrown
GPG Key ID: 47AF495B3DCCE9C3
27 changed files with 49 additions and 32 deletions

12
.gitignore vendored
View File

@ -111,3 +111,15 @@ spark-warehouse/
# For Node.js
node_modules
# build flags
.compileSetup
.package
.data
.graphSetup
# output data
timing/output/*
timing/*.tar
plotting/output/*
plotting/.venv

View File

@ -2,56 +2,60 @@ SPARKSHELL = bin/spark-shell
SBT = build/sbt
SBT_FLAGS = -Pscala-2.13
FLAGS = .flags
.PHONY: cleanbuild cleandata cleanflags cleanvenv cleangraphs cleanall compile interactive data
all: setup compile package data graphs
all: .compileSetup .package .data graph
cleanbuild:
$(SBT) $(SBT_FLAGS) clean
cleandata:
rm -rf scripts/timing/output
rm -rf ./timing/output
cleanflags:
rm -rf .flags/
rm -rf .data .package .compileSetup .graphSetup
cleanvenv:
rm -rf ./scripts/plotting/.venv/
rm -rf ./plotting/.venv/
cleangraphs:
rm -rf scripts/plotting/output
rm -rf ./plotting/output
cleanall:
$(MAKE) cleanflags cleanvenv cleangraphs cleanbuild
$(FLAGS):
mkdir $(FLAGS)
$(FLAGS)/setup: | $(FLAGS)
.compileSetup:
./dev/change-scala-version.sh 2.13
cd scripts/plotting && python3 -m venv .venv &&\
source .venv/bin/activate &&\
python3 -m pip install -r requierments.txt
touch $@
setup: $(FLAGS)/setup
.graphSetup:
cd ./plotting && python3 -m venv .venv &&\
source .venv/bin/activate &&\
python3 -m pip install -r requirements.txt
touch $@
compile: setup
compile: .compileSetup
$(SBT) $(SBT_FLAGS) compile
package: setup
.package: .compileSetup
$(SBT) $(SBT_FLAGS) package
touch $@
interactive: setup
package:
$(MAKE) .package
interactive: .compileSetup
$(SBT) $(SBT_FLAGS)
data: setup
cd scripts/timing && ./tpchBench.sh
.data: .package
cd ./timing && ./tpchBench.sh
touch $@
graph: setup
cd ./scripts/plotting/ &&\
mkdir output &&\
data:
$(MAKE) .data
graph: .data .graphSetup
cd ./plotting/ &&\
mkdir -p output &&\
source .venv/bin/activate &&\
python3 plotting.py

View File

@ -1,4 +1,5 @@
from collections import defaultdict
import time
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
@ -30,10 +31,10 @@ import re
# #
#############################
# where you could have an arbitrary number of sub directories and data files with arbitrary names
dataOutputDirectory = "./scripts/timing/output/"
dataOutputDirectory = "../timing/output/"
# graph output directory
graphOutputDirectory = "./scripts/plotting/output/"
graphOutputDirectory = "../plotting/output/"
# regular expression string to match the JSON data string
reDataString = '{"data":\D.*}}'
@ -45,7 +46,7 @@ reQueryString = r'Query.(?P<number>\d\d|\d)."'
numberOfRuns = -1
# graph output names
stackedGraphOutputFile = "stackedGraph.pdf"
stackedGraphOutputFile = "stackedGraph-" + str(time.strftime("%F__%H_%M_%S")) + ".pdf"
# storage for totals, then the averages of the data from runs
queryDataDict = defaultdict(dict)

View File

@ -1,4 +1,2 @@
numpy
pathlib
matplotlib
pandas

View File

@ -6,17 +6,17 @@ trap break INT
# paths.
OUTPUT="./output/"
SHELLCOMMANDS="./shellCommands/"
SPARKSHELL="../../bin/spark-shell"
SPARKSHELL="../bin/spark-shell"
RUNS=5
# sanity checks.
if [ -f "../../bin/spark-shell" ]; then
if [ -f "$SPARKSHELL" ]; then
echo "spark-shell found!"
else
echo "spark-shell not found! ../../bin/spark-shell"
echo "Please run from the \`scripts/timing\` folder"
echo "spark-shell not found! $SPARKSHELL"
echo "Please run from the \`./timing\` folder"
exit 1
fi
@ -42,3 +42,5 @@ for run in $(seq 1 $RUNS); do
cat $file | $SPARKSHELL | tee ${OUTPUT}run${run}/$(basename "$file" .scala).txt
done
done
tar -cvf output_$(date +%F__%H_%M_%S).tar ./output