179 lines
4.6 KiB
Python
179 lines
4.6 KiB
Python
import re
|
|
#import matplotlib as mpl
|
|
import matplotlib.pyplot as plt
|
|
#import numpy as np
|
|
|
|
def read_dataspread(testbed, experiment):
|
|
def extract(line):
|
|
match = re.match("(.+)@(\\d+): ([^:]+): ([0-9.]+)", line)
|
|
if match is None:
|
|
return []
|
|
else:
|
|
return [(
|
|
testbed,
|
|
"dataspread",
|
|
int(match.group(2)),
|
|
match.group(3).lower(),
|
|
float(match.group(4)),
|
|
experiment,
|
|
)]
|
|
|
|
with open(f"{testbed}-dataspread-{experiment}.log") as f:
|
|
data = [
|
|
match
|
|
for line in f.readlines()
|
|
for match in extract(line)
|
|
]
|
|
return data
|
|
|
|
|
|
def read_vizier(testbed, experiment):
|
|
def extract(line):
|
|
match = re.match("(.*)@(\\d+)/(true|false): ([^:]+): ([0-9.]+)", line)
|
|
if match is None:
|
|
print(line)
|
|
return []
|
|
else:
|
|
return [(
|
|
testbed,
|
|
"vizier-batch" if match.group(3) == "true" else "vizier",
|
|
int(match.group(2)),
|
|
match.group(4).lower(),
|
|
float(match.group(5)),
|
|
experiment,
|
|
)]
|
|
|
|
with open(f"{testbed}-vizier-{experiment}.log") as f:
|
|
data = [
|
|
match
|
|
for line in f.readlines()
|
|
for match in extract(line)
|
|
]
|
|
return data
|
|
|
|
|
|
# Schema:
|
|
# 0. testbed platform: 'desktop' or 'laptop'
|
|
# 1. system: 'vizier', 'vizier-batch', or 'dataspread'
|
|
# 2. data-size: int (number of rows of lineitem)
|
|
# 3. test-stage:
|
|
# 'init spreadsheet' - time to load the spreadsheet
|
|
# 'init formulas' - time to start processing formulas
|
|
# 'monitoring overhead' - vizier specific... not relevant
|
|
# 'update one' - time to update one cell
|
|
# 'update all' - time to update an entire column (not used)
|
|
# 4. time: float (number of seconds)
|
|
# 5. experiment: 'varystart', 'varysize', 'varystartandsize'
|
|
|
|
data = [
|
|
record
|
|
for ds in [
|
|
read_vizier("desktop", "varystart"),
|
|
read_dataspread("desktop", "varystart"),
|
|
read_vizier("desktop", "varysize"),
|
|
read_dataspread("desktop", "varysize"),
|
|
read_vizier("desktop", "varystartandsize"),
|
|
read_dataspread("desktop", "varystartandsize"),
|
|
]
|
|
for record in ds
|
|
]
|
|
|
|
stages = set(i[3] for i in data)
|
|
sizes = set(i[2] for i in data)
|
|
|
|
experiment_xlabels = {
|
|
"varystart" : "First visible row",
|
|
"varysize" : "Number of rows",
|
|
"varystartandsize" : "Number of rows",
|
|
}
|
|
|
|
system_labels = {
|
|
"vizier" : "Vizier",
|
|
"vizier-batch" : "Vizier (Simulated Batching)",
|
|
"dataspread" : "DataSpread"
|
|
}
|
|
|
|
|
|
init_costs = {}
|
|
init_fields = [
|
|
"init spreadsheet",
|
|
"init formulas"
|
|
]
|
|
for record in data:
|
|
if record[3] in init_fields:
|
|
key = (
|
|
record[0],
|
|
record[1],
|
|
record[2],
|
|
record[5]
|
|
)
|
|
print(key)
|
|
init_costs[key] = init_costs.get(key, 0) + record[4]
|
|
|
|
data += [
|
|
(
|
|
key[0],
|
|
key[1],
|
|
key[2],
|
|
"init",
|
|
init_costs[key],
|
|
key[3]
|
|
)
|
|
for key in init_costs
|
|
]
|
|
|
|
print(data)
|
|
print(stages)
|
|
|
|
def plot_one(testbed, stage, experiment):
|
|
global data
|
|
fig, ax = plt.subplots(
|
|
figsize=(4, 2),
|
|
constrained_layout=True,
|
|
)
|
|
|
|
# ax.set_title(f"{stage} ({testbed})")
|
|
ax.set_ylabel(f"{stage} (s)")
|
|
ax.set_xlabel(experiment_xlabels[experiment])
|
|
ax.set_xscale("log")
|
|
ax.set_yscale("log")
|
|
|
|
|
|
for system in system_labels:
|
|
points = sorted([
|
|
(record[2], record[4])
|
|
for record in data
|
|
if record[0] == testbed
|
|
and record[1] == system
|
|
and record[3] == stage
|
|
and record[5] == experiment
|
|
], key=lambda x: x[0])
|
|
|
|
ax.plot(
|
|
[pt[0] for pt in points],
|
|
[pt[1] for pt in points],
|
|
label=system_labels[system]
|
|
)
|
|
ax.legend()
|
|
stage = stage.replace(" ", "_")
|
|
fig.savefig(f"{testbed}-{stage}-{experiment}.pdf")
|
|
fig.savefig(f"{testbed}-{stage}-{experiment}.png")
|
|
|
|
|
|
|
|
|
|
plot_one("desktop", "init spreadsheet", "varystart")
|
|
plot_one("desktop", "init formulas", "varystart")
|
|
plot_one("desktop", "init", "varystart")
|
|
plot_one("desktop", "update one", "varystart")
|
|
|
|
plot_one("desktop", "init spreadsheet", "varysize")
|
|
plot_one("desktop", "init formulas", "varysize")
|
|
plot_one("desktop", "init", "varysize")
|
|
plot_one("desktop", "update one", "varysize")
|
|
|
|
plot_one("desktop", "init spreadsheet", "varystartandsize")
|
|
plot_one("desktop", "init formulas", "varystartandsize")
|
|
plot_one("desktop", "init", "varystartandsize")
|
|
plot_one("desktop", "update one", "varystartandsize")
|