Initial experiments from Mike... the x-axes aren't identical in both cases though

main
Oliver Kennedy 2023-03-28 16:20:22 -04:00
parent 33e2e379ec
commit f3f21f0d5a
Signed by: okennedy
GPG Key ID: 3E5F9B3ABD3FDB60
7 changed files with 112 additions and 0 deletions

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

112
results/gen_graph.py Normal file
View File

@ -0,0 +1,112 @@
import re
#import matplotlib as mpl
import matplotlib.pyplot as plt
#import numpy as np
def read_dataspread(testbed):
def extract(line):
match = re.match("(.+)@(\\d+): ([^:]+): ([0-9.]+)", line)
if match is None:
return []
else:
return [(
testbed,
"dataspread",
int(match.group(2)),
match.group(3).lower(),
float(match.group(4))
)]
with open(f"{testbed}-dataspread.log") as f:
data = [
match
for line in f.readlines()
for match in extract(line)
]
return data
def read_vizier(testbed):
def extract(line):
match = re.match("(.*)@(\\d+)/(true|false): ([^:]+): ([0-9.]+)", line)
if match is None:
print(line)
return []
else:
return [(
testbed,
"vizier-batch" if match.group(3) == "true" else "vizier",
int(match.group(2)),
match.group(4).lower(),
float(match.group(5))
)]
with open(f"{testbed}-vizier.log") as f:
data = [
match
for line in f.readlines()
for match in extract(line)
]
return data
# Schema:
# 0. testbed platform: 'desktop' or 'laptop'
# 1. system: 'vizier', 'vizier-batch', or 'dataspread'
# 2. data-size: int (number of rows of lineitem)
# 3. test-stage:
# 'init spreadsheet' - time to load the spreadsheet
# 'init formulas' - time to start processing formulas
# 'monitoring overhead' - vizier specific... not relevant
# 'update one' - time to update one cell
# 'update all' - time to update an entire column (not used)
# 4. time: float (number of seconds)
data = [
record
for ds in [
read_vizier("desktop"),
read_vizier("laptop"),
read_dataspread("desktop"),
read_dataspread("laptop")
]
for record in ds
]
stages = set(i[3] for i in data)
print(data)
print(stages)
def plot_one(testbed, stage):
global data
fig, ax = plt.subplots()
# ax.set_title(f"{stage} ({testbed})")
ax.set_ylabel(f"{stage} (s)")
ax.set_xlabel("Data Size (number of rows)")
for system in ["vizier", "vizier-batch", "dataspread"]:
points = sorted([
(record[2], record[4])
for record in data
if record[0] == testbed
and record[1] == system
and record[3] == stage
], key=lambda x: x[0])
ax.plot(
[pt[0] for pt in points],
[pt[1] for pt in points],
label=system
)
ax.legend()
stage = stage.replace(" ", "_")
fig.savefig(f"{testbed}-{stage}.pdf")
fig.savefig(f"{testbed}-{stage}.png")
plot_one("desktop", "init spreadsheet")
plot_one("desktop", "init formulas")
plot_one("desktop", "update one")