refactor on the organization of tooling.

2022-07-01 17:07:02 -04:00 · 2022-07-01 17:07:02 -04:00 · 73cb9049e6
parent 256f0cf978
commit 73cb9049e6
27 changed files with 49 additions and 32 deletions
--- a/.gitignore
+++ b/.gitignore
@ -111,3 +111,15 @@ spark-warehouse/

 # For Node.js
 node_modules
+
+# build flags
+.compileSetup
+.package
+.data
+.graphSetup
+
+# output data
+timing/output/*
+timing/*.tar
+plotting/output/*
+plotting/.venv
--- a/50
+++ b/50
@ -2,56 +2,60 @@ SPARKSHELL = bin/spark-shell
 SBT = build/sbt
 SBT_FLAGS = -Pscala-2.13

-FLAGS = .flags
-
 .PHONY: cleanbuild cleandata cleanflags cleanvenv cleangraphs cleanall compile interactive data

-all: setup compile package data graphs
+all: .compileSetup .package .data graph

 cleanbuild:
 	$(SBT) $(SBT_FLAGS) clean

 cleandata:
-	rm -rf scripts/timing/output
+	rm -rf ./timing/output

 cleanflags:
-	rm -rf .flags/
+	rm -rf .data .package .compileSetup .graphSetup

 cleanvenv:
-	rm -rf ./scripts/plotting/.venv/
+	rm -rf ./plotting/.venv/

 cleangraphs:
-	rm -rf scripts/plotting/output
+	rm -rf ./plotting/output

 cleanall:
 	$(MAKE) cleanflags cleanvenv cleangraphs cleanbuild

-$(FLAGS):
-	mkdir $(FLAGS)
-
-$(FLAGS)/setup: | $(FLAGS)
+.compileSetup:
 	./dev/change-scala-version.sh 2.13
-	cd scripts/plotting && python3 -m venv .venv &&\
-		source .venv/bin/activate &&\
-		python3 -m pip install -r requierments.txt
 	touch $@

-setup: $(FLAGS)/setup
+.graphSetup:
+	cd ./plotting && python3 -m venv .venv &&\
+		source .venv/bin/activate &&\
+		python3 -m pip install -r requirements.txt
+	touch $@

-compile: setup
+compile: .compileSetup
 	$(SBT) $(SBT_FLAGS) compile

-package: setup
+.package: .compileSetup
 	$(SBT) $(SBT_FLAGS) package
+	touch $@

-interactive: setup
+package:
+	$(MAKE) .package
+
+interactive: .compileSetup
 	$(SBT) $(SBT_FLAGS)

-data: setup
-	cd scripts/timing && ./tpchBench.sh
+.data: .package
+	cd ./timing && ./tpchBench.sh
+	touch $@

-graph: setup
-	cd ./scripts/plotting/ &&\
-		mkdir output &&\
+data:
+	$(MAKE) .data
+
+graph: .data .graphSetup
+	cd ./plotting/ &&\
+		mkdir -p output &&\
 		source .venv/bin/activate &&\
 		python3 plotting.py
--- a/scripts/plotting/plotting.py
+++ b/scripts/plotting/plotting.py
@ -1,4 +1,5 @@
 from collections import defaultdict
+import time
 import json
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@ -30,10 +31,10 @@ import re
 #                           #
 #############################
 # where you could have an arbitrary number of sub directories and data files with arbitrary names
-dataOutputDirectory = "./scripts/timing/output/"
+dataOutputDirectory = "../timing/output/"

 # graph output directory
-graphOutputDirectory = "./scripts/plotting/output/"
+graphOutputDirectory = "../plotting/output/"

 # regular expression string to match the JSON data string
 reDataString = '{"data":\D.*}}'
@ -45,7 +46,7 @@ reQueryString = r'Query.(?P<number>\d\d|\d)."'
 numberOfRuns = -1

 # graph output names
-stackedGraphOutputFile = "stackedGraph.pdf"
+stackedGraphOutputFile = "stackedGraph-" + str(time.strftime("%F__%H_%M_%S")) + ".pdf"

 # storage for totals, then the averages of the data from runs
 queryDataDict = defaultdict(dict)
--- a/scripts/plotting/requirements.txt
+++ b/scripts/plotting/requirements.txt
@ -1,4 +1,2 @@
 numpy
-pathlib
 matplotlib
-pandas
--- a/scripts/timing/shellCommands/sparkCommand1.scala
+++ b/scripts/timing/shellCommands/sparkCommand1.scala
--- a/scripts/timing/shellCommands/sparkCommand10.scala
+++ b/scripts/timing/shellCommands/sparkCommand10.scala
--- a/scripts/timing/shellCommands/sparkCommand11.scala
+++ b/scripts/timing/shellCommands/sparkCommand11.scala
--- a/scripts/timing/shellCommands/sparkCommand12.scala
+++ b/scripts/timing/shellCommands/sparkCommand12.scala
--- a/scripts/timing/shellCommands/sparkCommand13.scala
+++ b/scripts/timing/shellCommands/sparkCommand13.scala
--- a/scripts/timing/shellCommands/sparkCommand14.scala
+++ b/scripts/timing/shellCommands/sparkCommand14.scala
--- a/scripts/timing/shellCommands/sparkCommand15.scala
+++ b/scripts/timing/shellCommands/sparkCommand15.scala
--- a/scripts/timing/shellCommands/sparkCommand16.scala
+++ b/scripts/timing/shellCommands/sparkCommand16.scala
--- a/scripts/timing/shellCommands/sparkCommand17.scala
+++ b/scripts/timing/shellCommands/sparkCommand17.scala
--- a/scripts/timing/shellCommands/sparkCommand18.scala
+++ b/scripts/timing/shellCommands/sparkCommand18.scala
--- a/scripts/timing/shellCommands/sparkCommand19.scala
+++ b/scripts/timing/shellCommands/sparkCommand19.scala
--- a/scripts/timing/shellCommands/sparkCommand2.scala
+++ b/scripts/timing/shellCommands/sparkCommand2.scala
--- a/scripts/timing/shellCommands/sparkCommand20.scala
+++ b/scripts/timing/shellCommands/sparkCommand20.scala
--- a/scripts/timing/shellCommands/sparkCommand21.scala
+++ b/scripts/timing/shellCommands/sparkCommand21.scala
--- a/scripts/timing/shellCommands/sparkCommand22.scala
+++ b/scripts/timing/shellCommands/sparkCommand22.scala
--- a/scripts/timing/shellCommands/sparkCommand3.scala
+++ b/scripts/timing/shellCommands/sparkCommand3.scala
--- a/scripts/timing/shellCommands/sparkCommand4.scala
+++ b/scripts/timing/shellCommands/sparkCommand4.scala
--- a/scripts/timing/shellCommands/sparkCommand5.scala
+++ b/scripts/timing/shellCommands/sparkCommand5.scala
--- a/scripts/timing/shellCommands/sparkCommand6.scala
+++ b/scripts/timing/shellCommands/sparkCommand6.scala
--- a/scripts/timing/shellCommands/sparkCommand7.scala
+++ b/scripts/timing/shellCommands/sparkCommand7.scala
--- a/scripts/timing/shellCommands/sparkCommand8.scala
+++ b/scripts/timing/shellCommands/sparkCommand8.scala
--- a/scripts/timing/shellCommands/sparkCommand9.scala
+++ b/scripts/timing/shellCommands/sparkCommand9.scala
--- a/scripts/timing/tpchBench.sh
+++ b/scripts/timing/tpchBench.sh
@ -6,17 +6,17 @@ trap break INT
 # paths.
 OUTPUT="./output/"
 SHELLCOMMANDS="./shellCommands/"
-SPARKSHELL="../../bin/spark-shell"
+SPARKSHELL="../bin/spark-shell"

 RUNS=5

 # sanity checks.

-if [ -f "../../bin/spark-shell" ]; then
+if [ -f "$SPARKSHELL" ]; then
    echo "spark-shell found!" 
 else
-    echo "spark-shell not found! ../../bin/spark-shell"
-    echo "Please run from the \`scripts/timing\` folder"
+    echo "spark-shell not found! $SPARKSHELL"
+    echo "Please run from the \`./timing\` folder"
    exit 1
 fi

@ -42,3 +42,5 @@ for run in $(seq 1 $RUNS); do
        cat $file | $SPARKSHELL | tee ${OUTPUT}run${run}/$(basename "$file" .scala).txt
    done
 done
+
+tar -cvf output_$(date +%F__%H_%M_%S).tar ./output