refactor on the organization of tooling.
parent
256f0cf978
commit
73cb9049e6
|
@ -111,3 +111,15 @@ spark-warehouse/
|
|||
|
||||
# For Node.js
|
||||
node_modules
|
||||
|
||||
# build flags
|
||||
.compileSetup
|
||||
.package
|
||||
.data
|
||||
.graphSetup
|
||||
|
||||
# output data
|
||||
timing/output/*
|
||||
timing/*.tar
|
||||
plotting/output/*
|
||||
plotting/.venv
|
||||
|
|
50
Makefile
50
Makefile
|
@ -2,56 +2,60 @@ SPARKSHELL = bin/spark-shell
|
|||
SBT = build/sbt
|
||||
SBT_FLAGS = -Pscala-2.13
|
||||
|
||||
FLAGS = .flags
|
||||
|
||||
.PHONY: cleanbuild cleandata cleanflags cleanvenv cleangraphs cleanall compile interactive data
|
||||
|
||||
all: setup compile package data graphs
|
||||
all: .compileSetup .package .data graph
|
||||
|
||||
cleanbuild:
|
||||
$(SBT) $(SBT_FLAGS) clean
|
||||
|
||||
cleandata:
|
||||
rm -rf scripts/timing/output
|
||||
rm -rf ./timing/output
|
||||
|
||||
cleanflags:
|
||||
rm -rf .flags/
|
||||
rm -rf .data .package .compileSetup .graphSetup
|
||||
|
||||
cleanvenv:
|
||||
rm -rf ./scripts/plotting/.venv/
|
||||
rm -rf ./plotting/.venv/
|
||||
|
||||
cleangraphs:
|
||||
rm -rf scripts/plotting/output
|
||||
rm -rf ./plotting/output
|
||||
|
||||
cleanall:
|
||||
$(MAKE) cleanflags cleanvenv cleangraphs cleanbuild
|
||||
|
||||
$(FLAGS):
|
||||
mkdir $(FLAGS)
|
||||
|
||||
$(FLAGS)/setup: | $(FLAGS)
|
||||
.compileSetup:
|
||||
./dev/change-scala-version.sh 2.13
|
||||
cd scripts/plotting && python3 -m venv .venv &&\
|
||||
source .venv/bin/activate &&\
|
||||
python3 -m pip install -r requierments.txt
|
||||
touch $@
|
||||
|
||||
setup: $(FLAGS)/setup
|
||||
.graphSetup:
|
||||
cd ./plotting && python3 -m venv .venv &&\
|
||||
source .venv/bin/activate &&\
|
||||
python3 -m pip install -r requirements.txt
|
||||
touch $@
|
||||
|
||||
compile: setup
|
||||
compile: .compileSetup
|
||||
$(SBT) $(SBT_FLAGS) compile
|
||||
|
||||
package: setup
|
||||
.package: .compileSetup
|
||||
$(SBT) $(SBT_FLAGS) package
|
||||
touch $@
|
||||
|
||||
interactive: setup
|
||||
package:
|
||||
$(MAKE) .package
|
||||
|
||||
interactive: .compileSetup
|
||||
$(SBT) $(SBT_FLAGS)
|
||||
|
||||
data: setup
|
||||
cd scripts/timing && ./tpchBench.sh
|
||||
.data: .package
|
||||
cd ./timing && ./tpchBench.sh
|
||||
touch $@
|
||||
|
||||
graph: setup
|
||||
cd ./scripts/plotting/ &&\
|
||||
mkdir output &&\
|
||||
data:
|
||||
$(MAKE) .data
|
||||
|
||||
graph: .data .graphSetup
|
||||
cd ./plotting/ &&\
|
||||
mkdir -p output &&\
|
||||
source .venv/bin/activate &&\
|
||||
python3 plotting.py
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from collections import defaultdict
|
||||
import time
|
||||
import json
|
||||
import matplotlib as mpl
|
||||
import matplotlib.pyplot as plt
|
||||
|
@ -30,10 +31,10 @@ import re
|
|||
# #
|
||||
#############################
|
||||
# where you could have an arbitrary number of sub directories and data files with arbitrary names
|
||||
dataOutputDirectory = "./scripts/timing/output/"
|
||||
dataOutputDirectory = "../timing/output/"
|
||||
|
||||
# graph output directory
|
||||
graphOutputDirectory = "./scripts/plotting/output/"
|
||||
graphOutputDirectory = "../plotting/output/"
|
||||
|
||||
# regular expression string to match the JSON data string
|
||||
reDataString = '{"data":\D.*}}'
|
||||
|
@ -45,7 +46,7 @@ reQueryString = r'Query.(?P<number>\d\d|\d)."'
|
|||
numberOfRuns = -1
|
||||
|
||||
# graph output names
|
||||
stackedGraphOutputFile = "stackedGraph.pdf"
|
||||
stackedGraphOutputFile = "stackedGraph-" + str(time.strftime("%F__%H_%M_%S")) + ".pdf"
|
||||
|
||||
# storage for totals, then the averages of the data from runs
|
||||
queryDataDict = defaultdict(dict)
|
|
@ -1,4 +1,2 @@
|
|||
numpy
|
||||
pathlib
|
||||
matplotlib
|
||||
pandas
|
|
@ -6,17 +6,17 @@ trap break INT
|
|||
# paths.
|
||||
OUTPUT="./output/"
|
||||
SHELLCOMMANDS="./shellCommands/"
|
||||
SPARKSHELL="../../bin/spark-shell"
|
||||
SPARKSHELL="../bin/spark-shell"
|
||||
|
||||
RUNS=5
|
||||
|
||||
# sanity checks.
|
||||
|
||||
if [ -f "../../bin/spark-shell" ]; then
|
||||
if [ -f "$SPARKSHELL" ]; then
|
||||
echo "spark-shell found!"
|
||||
else
|
||||
echo "spark-shell not found! ../../bin/spark-shell"
|
||||
echo "Please run from the \`scripts/timing\` folder"
|
||||
echo "spark-shell not found! $SPARKSHELL"
|
||||
echo "Please run from the \`./timing\` folder"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -42,3 +42,5 @@ for run in $(seq 1 $RUNS); do
|
|||
cat $file | $SPARKSHELL | tee ${OUTPUT}run${run}/$(basename "$file" .scala).txt
|
||||
done
|
||||
done
|
||||
|
||||
tar -cvf output_$(date +%F__%H_%M_%S).tar ./output
|
Loading…
Reference in New Issue