Initial experiments from Mike... the x-axes aren't identical in both cases though

2023-03-28 16:20:22 -04:00 · 2023-03-28 16:20:22 -04:00 · f3f21f0d5a
parent 33e2e379ec
commit f3f21f0d5a
7 changed files with 112 additions and 0 deletions
--- a/results/desktop-init_formulas.pdf
+++ b/results/desktop-init_formulas.pdf
--- a/results/desktop-init_formulas.png
+++ b/results/desktop-init_formulas.png
--- a/results/desktop-init_spreadsheet.pdf
+++ b/results/desktop-init_spreadsheet.pdf
--- a/results/desktop-init_spreadsheet.png
+++ b/results/desktop-init_spreadsheet.png
--- a/results/desktop-update_one.pdf
+++ b/results/desktop-update_one.pdf
--- a/results/desktop-update_one.png
+++ b/results/desktop-update_one.png
--- a/results/gen_graph.py
+++ b/results/gen_graph.py
@ -0,0 +1,112 @@
+import re
+#import matplotlib as mpl
+import matplotlib.pyplot as plt
+#import numpy as np
+
+def read_dataspread(testbed):
+    def extract(line):
+        match = re.match("(.+)@(\\d+): ([^:]+): ([0-9.]+)", line)
+        if match is None:
+            return []
+        else:
+            return [(
+              testbed, 
+              "dataspread", 
+              int(match.group(2)), 
+              match.group(3).lower(), 
+              float(match.group(4))
+            )]
+
+    with open(f"{testbed}-dataspread.log") as f:
+        data = [
+          match
+          for line in f.readlines()
+          for match in extract(line)
+        ]
+    return data
+
+
+def read_vizier(testbed):
+    def extract(line):
+        match = re.match("(.*)@(\\d+)/(true|false): ([^:]+): ([0-9.]+)", line)
+        if match is None:
+            print(line)
+            return []
+        else:
+            return [(
+              testbed, 
+              "vizier-batch" if match.group(3) == "true" else "vizier",
+              int(match.group(2)), 
+              match.group(4).lower(), 
+              float(match.group(5))
+            )]
+
+    with open(f"{testbed}-vizier.log") as f:
+        data = [
+          match
+          for line in f.readlines()
+          for match in extract(line)
+        ]
+    return data
+
+
+# Schema:
+# 0. testbed platform: 'desktop' or 'laptop'
+# 1. system: 'vizier', 'vizier-batch', or 'dataspread'
+# 2. data-size: int (number of rows of lineitem)
+# 3. test-stage: 
+#     'init spreadsheet' - time to load the spreadsheet
+#     'init formulas' - time to start processing formulas
+#     'monitoring overhead' - vizier specific... not relevant
+#     'update one' - time to update one cell
+#     'update all' - time to update an entire column (not used)
+# 4. time: float (number of seconds)
+
+data = [
+  record
+  for ds in [
+    read_vizier("desktop"),
+    read_vizier("laptop"),
+    read_dataspread("desktop"),
+    read_dataspread("laptop")
+  ]
+  for record in ds
+]
+
+stages = set(i[3] for i in data)
+
+print(data)
+print(stages)
+
+
+def plot_one(testbed, stage):
+    global data
+    fig, ax = plt.subplots()
+
+    # ax.set_title(f"{stage} ({testbed})")
+    ax.set_ylabel(f"{stage} (s)")
+    ax.set_xlabel("Data Size (number of rows)")
+    
+    for system in ["vizier", "vizier-batch", "dataspread"]:
+        points = sorted([
+          (record[2], record[4])
+          for record in data
+          if record[0] == testbed
+          and record[1] == system
+          and record[3] == stage
+        ], key=lambda x: x[0])
+
+        ax.plot(
+          [pt[0] for pt in points],
+          [pt[1] for pt in points],
+          label=system
+        )
+    ax.legend()
+    stage = stage.replace(" ", "_")
+    fig.savefig(f"{testbed}-{stage}.pdf")
+    fig.savefig(f"{testbed}-{stage}.png")
+
+
+plot_one("desktop", "init spreadsheet")
+plot_one("desktop", "init formulas")
+plot_one("desktop", "update one")