Starting updated tour talk
This commit is contained in:
parent
1d80613c10
commit
836ddd5ccd
95
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/Rakefile
Executable file
95
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/Rakefile
Executable file
|
@ -0,0 +1,95 @@
|
|||
require 'rubygems'
|
||||
require 'gnuplot'
|
||||
|
||||
$:.push(".")
|
||||
require 'util'
|
||||
require 'plot'
|
||||
require 'csvx'
|
||||
|
||||
$modes = [:deterministic,:classic,:partition,:inline,:hybrid]
|
||||
$query_names = {
|
||||
:q1_noagg => "Q1",
|
||||
:q3_noagg => "Q3",
|
||||
:q5_noagg => "Q5",
|
||||
:q9_noagg => "Q9"
|
||||
}
|
||||
# plot_output :pdf, size: "5in,2.5in", fsize: "12"
|
||||
plot_output :aqua
|
||||
# auto_open_plots :true
|
||||
|
||||
# $plot_auto_open = true
|
||||
|
||||
def to_seconds(time)
|
||||
case time
|
||||
when /([0-9]+)m([0-9.]+)s/ then
|
||||
$1.to_i * 60 + $2.to_f
|
||||
when /\?/ then
|
||||
0
|
||||
when /Timeout/ then
|
||||
1000000
|
||||
else
|
||||
raise "Unknown time value '#{time}'"
|
||||
end
|
||||
end
|
||||
|
||||
def sort_by_cols(order,hash)
|
||||
order.map { |col|
|
||||
hash[col]
|
||||
}
|
||||
end
|
||||
|
||||
$data =
|
||||
File.csv("data.csv", separator: / *, */).
|
||||
map { |db,q,sf,mode,time|
|
||||
[ (db+"_"+sf).to_sym,
|
||||
[q.to_sym,
|
||||
[mode.to_sym, to_seconds(time)]]]
|
||||
}.reduce { |db, db_trials|
|
||||
db_trials.reduce { |q, q_trials|
|
||||
sort_by_cols($modes, q_trials.to_h)
|
||||
}
|
||||
}
|
||||
|
||||
def plot_timing_bar_plot(gp, db, details = {})
|
||||
clusters = $query_names.keys.sort;
|
||||
data = sort_by_cols(
|
||||
clusters,
|
||||
$data[db]
|
||||
).map {|timings|
|
||||
det_t = timings.shift
|
||||
timings.map { |t| t / det_t * 100}
|
||||
}
|
||||
gp.yrange details.fetch(:yrange, "[0:300]")
|
||||
gp.key "font \"Times-Roman,10\" opaque box top left"
|
||||
gp.ylabel "% of Deterministic Time"
|
||||
draw_clustered_bar_plot(gp,
|
||||
data: data,
|
||||
dataset_labels:
|
||||
$modes.map {|m| m.to_s.capitalize}[1..-1],
|
||||
group_labels:
|
||||
clusters.map {|c| $query_names[c]},
|
||||
box_style:
|
||||
lambda {|i| "boxes fill solid #{(i.to_f/6)+0.25} lc #{$pretty_styles[4-i][:lt]}"}
|
||||
)
|
||||
end
|
||||
|
||||
plot 'sqlite100m' => "Rakefile" do |gp|
|
||||
plot_timing_bar_plot(gp, :sqlite_100m)
|
||||
end
|
||||
plot 'sqlite1g' => "Rakefile" do |gp|
|
||||
plot_timing_bar_plot(gp, :sqlite_1g)
|
||||
end
|
||||
plot 'dbx100m' => "Rakefile" do |gp|
|
||||
plot_timing_bar_plot(gp, :oracle_100m,
|
||||
yrange: "[0:400]"
|
||||
)
|
||||
end
|
||||
plot 'dbx1g' => "Rakefile" do |gp|
|
||||
plot_timing_bar_plot(gp, :oracle_1g,
|
||||
yrange: "[0:1400]"
|
||||
)
|
||||
end
|
||||
|
||||
task :all => ['sqlite100m', 'sqlite1g', 'dbx100m', 'dbx1g']
|
||||
|
||||
task :default => :all
|
46
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/csvx.rb
Executable file
46
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/csvx.rb
Executable file
|
@ -0,0 +1,46 @@
|
|||
class String
|
||||
def from_csv(sep = /,/)
|
||||
ret = chomp.split(sep)
|
||||
idx = 0;
|
||||
while idx < ret.length do
|
||||
if ret[idx][0] == "\""[0]
|
||||
while ret[idx][-1] != "\""[0]
|
||||
raise "Unterminated quote" if idx+1 >= ret.length
|
||||
ret[idx] = ret[idx]+","+ret[idx+1]
|
||||
ret.delete_at(idx+1)
|
||||
end
|
||||
ret[idx] = ret[idx].sub(/^"/, "").sub(/"$/, "")
|
||||
end
|
||||
idx += 1
|
||||
end
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
class Array
|
||||
def from_csv
|
||||
self.map { |l| l.to_s.chomp.from_csv }
|
||||
end
|
||||
|
||||
def to_csv(f)
|
||||
File.open(f, "w+") { |f| each { |row| f.puts(row.join(',')) }}
|
||||
end
|
||||
end
|
||||
|
||||
class IO
|
||||
def from_csv(args = {})
|
||||
header = args.fetch(:header, false)
|
||||
separator = args.fetch(:separator, /,/)
|
||||
keys = readline.chomp.
|
||||
sub(/ *$/, "").sub(/^ */,"").
|
||||
from_csv(separator) if header;
|
||||
map { |l| l.to_s.chomp.from_csv(separator) }.
|
||||
map { |a| if header then keys.zip(a).to_h else a end }
|
||||
end
|
||||
end
|
||||
|
||||
class File
|
||||
def File.csv(f, args = {})
|
||||
File.open(f) {|io| io.from_csv(args) }
|
||||
end
|
||||
end
|
80
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/data.csv
Executable file
80
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/data.csv
Executable file
|
@ -0,0 +1,80 @@
|
|||
sqlite,q1_noagg,100m,deterministic,0m16.125s
|
||||
sqlite,q1_noagg,100m,classic ,0m16.163s
|
||||
sqlite,q1_noagg,100m,partition ,0m16.226s
|
||||
sqlite,q1_noagg,100m,inline ,0m16.417s
|
||||
sqlite,q1_noagg,100m,hybrid ,0m16.830s
|
||||
sqlite,q1_noagg,1g ,deterministic,2m41.632s
|
||||
sqlite,q1_noagg,1g ,classic ,2m55.999s
|
||||
sqlite,q1_noagg,1g ,partition ,3m2.070s
|
||||
sqlite,q1_noagg,1g ,inline ,2m54.776s
|
||||
sqlite,q1_noagg,1g ,hybrid ,2m55.679s
|
||||
sqlite,q3_noagg,100m,deterministic,0m1.743s
|
||||
sqlite,q3_noagg,100m,classic ,Timeout
|
||||
sqlite,q3_noagg,100m,partition ,Timeout
|
||||
sqlite,q3_noagg,100m,inline ,0m2.048s
|
||||
sqlite,q3_noagg,100m,hybrid ,0m2.012s
|
||||
sqlite,q3_noagg,1g ,deterministic,0m4.687s
|
||||
sqlite,q3_noagg,1g ,classic ,Timeout
|
||||
sqlite,q3_noagg,1g ,partition ,Timeout
|
||||
sqlite,q3_noagg,1g ,inline ,0m7.572s
|
||||
sqlite,q3_noagg,1g ,hybrid ,0m7.992s
|
||||
sqlite,q5_noagg,100m,deterministic,0m1.542s
|
||||
sqlite,q5_noagg,100m,classic ,Timeout
|
||||
sqlite,q5_noagg,100m,partition ,Timeout
|
||||
sqlite,q5_noagg,100m,inline ,0m2.676s
|
||||
sqlite,q5_noagg,100m,hybrid ,0m3.597s
|
||||
sqlite,q5_noagg,1g ,deterministic,0m6.696s
|
||||
sqlite,q5_noagg,1g ,classic ,Timeout
|
||||
sqlite,q5_noagg,1g ,partition ,Timeout
|
||||
sqlite,q5_noagg,1g ,inline ,0m11.351s
|
||||
sqlite,q5_noagg,1g ,hybrid ,0m57.637s
|
||||
sqlite,q9_noagg,100m,deterministic,0m3.037s
|
||||
sqlite,q9_noagg,100m,classic ,Timeout
|
||||
sqlite,q9_noagg,100m,partition ,Timeout
|
||||
sqlite,q9_noagg,100m,inline ,12m22.873s
|
||||
sqlite,q9_noagg,100m,hybrid ,0m6.037s
|
||||
sqlite,q9_noagg,1g ,deterministic,0m38.967s
|
||||
sqlite,q9_noagg,1g ,classic ,Timeout
|
||||
sqlite,q9_noagg,1g ,partition ,Timeout
|
||||
sqlite,q9_noagg,1g ,inline ,Timeout
|
||||
sqlite,q9_noagg,1g ,hybrid ,1m9.280s
|
||||
oracle,q1_noagg,100m,deterministic,0m19.716s
|
||||
oracle,q1_noagg,100m,classic ,0m23.760s
|
||||
oracle,q1_noagg,100m,partition ,0m21.517s
|
||||
oracle,q1_noagg,100m,inline ,0m20.570s
|
||||
oracle,q1_noagg,100m,hybrid ,0m21.685s
|
||||
oracle,q3_noagg,100m,deterministic,0m1.887s
|
||||
oracle,q3_noagg,100m,classic ,Timeout
|
||||
oracle,q3_noagg,100m,partition ,Timeout
|
||||
oracle,q3_noagg,100m,inline ,0m2.831s
|
||||
oracle,q3_noagg,100m,hybrid ,0m2.482s
|
||||
oracle,q5_noagg,100m,deterministic,0m2.165s
|
||||
oracle,q5_noagg,100m,classic ,Timeout
|
||||
oracle,q5_noagg,100m,partition ,Timeout
|
||||
oracle,q5_noagg,100m,inline ,0m3.738s
|
||||
oracle,q5_noagg,100m,hybrid ,0m5.722s
|
||||
oracle,q9_noagg,100m,deterministic,0m3.883s
|
||||
oracle,q9_noagg,100m,classic ,Timeout
|
||||
oracle,q9_noagg,100m,partition ,Timeout
|
||||
oracle,q9_noagg,100m,inline ,8m0.466s
|
||||
oracle,q9_noagg,100m,hybrid ,0m10.610s
|
||||
oracle,q1_noagg,1g ,deterministic,3m29.131s
|
||||
oracle,q1_noagg,1g ,classic ,3m32.163s
|
||||
oracle,q1_noagg,1g ,partition ,3m45.280s
|
||||
oracle,q1_noagg,1g ,inline ,3m39.893s
|
||||
oracle,q1_noagg,1g ,hybrid ,3m23.962s
|
||||
oracle,q3_noagg,1g ,deterministic,0m5.437s
|
||||
oracle,q3_noagg,1g ,classic ,Timeout
|
||||
oracle,q3_noagg,1g ,partition ,Timeout
|
||||
oracle,q3_noagg,1g ,inline ,0m3.738s
|
||||
oracle,q3_noagg,1g ,hybrid ,0m5.722s
|
||||
oracle,q5_noagg,1g ,deterministic,0m5.092s
|
||||
oracle,q5_noagg,1g ,classic ,Timeout
|
||||
oracle,q5_noagg,1g ,partition ,Timeout
|
||||
oracle,q5_noagg,1g ,inline ,0m22.339s
|
||||
oracle,q5_noagg,1g ,hybrid ,Timeout
|
||||
oracle,q9_noagg,1g ,deterministic,0m28.182s
|
||||
oracle,q9_noagg,1g ,classic ,Timeout
|
||||
oracle,q9_noagg,1g ,partition ,Timeout
|
||||
oracle,q9_noagg,1g ,inline ,Timeout
|
||||
oracle,q9_noagg,1g ,hybrid ,5m8.406s
|
|
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/dbx100m.pdf
Executable file
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/dbx100m.pdf
Executable file
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/dbx1g.pdf
Executable file
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/dbx1g.pdf
Executable file
213
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/plot.rb
Executable file
213
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/plot.rb
Executable file
|
@ -0,0 +1,213 @@
|
|||
|
||||
$plot_terminal = "aqua"
|
||||
$plot_suffix = nil;
|
||||
$plot_auto_open = false;
|
||||
$current_plot = nil;
|
||||
|
||||
def plot_output(output, settings = {})
|
||||
case output
|
||||
when :aqua then $plot_terminal = "aqua"; $plot_suffix = nil;
|
||||
when :pdf then $plot_terminal = "pdf"; $plot_suffix = ".pdf"
|
||||
when :png then $plot_terminal = "png"; $plot_suffix = ".png"
|
||||
end
|
||||
$plot_terminal_opts = settings
|
||||
end
|
||||
|
||||
def plot_terminal(setting_overrides = {})
|
||||
settings = $plot_terminal_opts.merge setting_overrides
|
||||
$plot_terminal+(
|
||||
if settings.size < 1 then "" else
|
||||
" " + settings.to_a.flatten.join(" ")
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
$pretty_styles = [
|
||||
{ :lt => "rgb \"#A00000\"",
|
||||
# :fs => "rgb \"#A00000\"",
|
||||
:lw => 2,
|
||||
:pt => 1
|
||||
},
|
||||
{ :lt => "rgb \"#00A000\"",
|
||||
# :fs => "rgb \"#00A000\"",
|
||||
:lw => 2,
|
||||
:pt => 6
|
||||
},
|
||||
{ :lt => "rgb \"#5060D0\"",
|
||||
# :fs => "rgb \"#5060D0\"",
|
||||
:lw => 2,
|
||||
:pt => 2
|
||||
},
|
||||
{ :lt => "rgb \"#F25900\"",
|
||||
# :fs => "rgb \"#F25900\"",
|
||||
:lw => 2,
|
||||
:pt => 9
|
||||
}
|
||||
];
|
||||
|
||||
def pretty_style(idx, opts = {})
|
||||
opts = opts.clone;
|
||||
$pretty_styles[idx].each { |k, v| opts[k] = v unless opts.has_key? k }
|
||||
opts.map { |kv| kv.to_a.join(" ") unless kv[1].nil? }.compact.join(" ")
|
||||
end
|
||||
|
||||
def pretty_plot(plot, opts = {})
|
||||
# plot based on Brighten Godfrey's blog post:
|
||||
# http://youinfinitesnake.blogspot.com/2011/02/attractive-scientific-plots-with.html
|
||||
|
||||
plot.terminal [
|
||||
"pdf",
|
||||
"font \"#{opts.fetch(:fontface, "Times-Roman")},#{opts.fetch(:fontsize, 10)}\"",
|
||||
"linewidth #{opts.fetch(:linewidth, 4)} rounded",
|
||||
"fontscale #{opts.fetch(:fontscale, 1.0)}",
|
||||
"size #{opts.fetch(:sizex, 5)}in,#{opts.fetch(:sizey, 3)}in"
|
||||
].join(" ")
|
||||
|
||||
# Line style for axes
|
||||
plot.style "line 80 lc #{opts.fetch(:bordercolor, "rgb \"#808080\"")}"
|
||||
|
||||
# Line style for grid
|
||||
plot.style "line 81 lt 0" # dashed
|
||||
plot.style "line 81 lc #{opts.fetch(:gridcolor, "rgb \"#808080\"")}" # grey
|
||||
|
||||
plot.grid "back linestyle 81"
|
||||
|
||||
border_groups =
|
||||
opts.fetch(:border, [:left, :bottom]).map do |b|
|
||||
case b
|
||||
when :bottom then 1
|
||||
when :left then 2
|
||||
when :top then 4
|
||||
when :right then 8
|
||||
when :all then 1+2+4+8
|
||||
else raise "Invalid border type : #{b}"
|
||||
end
|
||||
end.sum
|
||||
|
||||
plot.border "#{border_groups} back linestyle 80" # Remove border on top and right. These
|
||||
# borders are useless and make it harder
|
||||
# to see plotted lines near the border.
|
||||
# Also, put it in grey; no need for so much emphasis on a border.
|
||||
plot.xtics "nomirror"
|
||||
plot.ytics "nomirror"
|
||||
|
||||
if(opts.fetch(:logx, false)) then
|
||||
plot.logscal "x"
|
||||
plot.mxtics "10" # Makes logscale look good.
|
||||
end
|
||||
if(opts.fetch(:logy, false)) then
|
||||
plot.logscal "y"
|
||||
plot.mytics "10" # Makes logscale look good.
|
||||
end
|
||||
|
||||
# Line styles: try to pick pleasing colors, rather
|
||||
# than strictly primary colors or hard-to-see colors
|
||||
# like gnuplot's default yellow. Make the lines thick
|
||||
# so they're easy to see in small plots in papers.
|
||||
$pretty_styles.each_index { |x| plot.style "line #{x+1} #{pretty_style(x)}" }
|
||||
|
||||
plot.key "bottom right"
|
||||
end
|
||||
|
||||
def auto_open_plots(new_val = true)
|
||||
$plot_auto_open = new_val;
|
||||
end
|
||||
|
||||
def row_data(data)
|
||||
$current_plot.data << Gnuplot::DataSet.new(data.unzip) { |ds| yield ds }
|
||||
end
|
||||
|
||||
|
||||
def plot(args = {})
|
||||
task(args) do
|
||||
task_name = case args
|
||||
when Hash then args.keys[0]
|
||||
when Symbol,String then args.to_s
|
||||
end
|
||||
Gnuplot.open do |gp|
|
||||
Gnuplot::Plot.new(gp) do |plot|
|
||||
$current_plot = plot;
|
||||
|
||||
plot.terminal plot_terminal
|
||||
if $plot_suffix and task_name then
|
||||
plot.output "#{task_name}#{$plot_suffix}"
|
||||
end
|
||||
yield plot;
|
||||
end
|
||||
end
|
||||
if $plot_auto_open and [".pdf", ".png"].include? $plot_suffix
|
||||
system("open #{task_name}#{$plot_suffix}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def line_plot(args = {})
|
||||
plot(args) do |plot|
|
||||
data_elements = yield(plot)
|
||||
data_elements = { :data => data_elements } unless data_elements.is_a? Hash;
|
||||
|
||||
data = data_elements[:data].unzip;
|
||||
xaxis = data_elements.fetch(:xaxis) { data.shift };
|
||||
keys = data_elements.fetch(:keys) { data.map { nil; } }
|
||||
withs = data_elements.fetch(:with, "linespoints");
|
||||
withs = data.map { withs } unless withs.is_a? Array;
|
||||
|
||||
raise "Missing data!" if data.nil?;
|
||||
raise "Missing X Axis!" if xaxis.nil?;
|
||||
|
||||
data.zip(keys, withs).each do |line, key, with|
|
||||
plot.data << Gnuplot::DataSet.new([xaxis, line]) do |ds|
|
||||
ds.title = key unless key.nil?
|
||||
ds.with = with unless with.nil?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def draw_clustered_bar_plot plot, args = {}
|
||||
data = args.fetch(:data).unzip;
|
||||
base_offset = args.fetch(:base_offset, 0);
|
||||
interbar_offset = args.fetch(:interbar_offset, 18);
|
||||
intergroup_offset = args.fetch(:intergroup_offset, interbar_offset);
|
||||
margins = args.fetch(:margins, intergroup_offset);
|
||||
bar_width = args.fetch(:bar_width, 10);
|
||||
tic_commands = args.fetch(:tic_commands, "");
|
||||
label_offset = args.fetch(:label_offset, 0);
|
||||
box_style = args.fetch(:box_style,
|
||||
lambda { |i| "boxes fill pattern #{i}" });
|
||||
|
||||
plot.grid "noxtics"
|
||||
group_offset = base_offset + margins
|
||||
group_size = interbar_offset * data.length + intergroup_offset;
|
||||
plot.boxwidth bar_width.to_s;
|
||||
pattern = 0;
|
||||
data.zip(args[:dataset_labels]).each do |dataset, dataset_title|
|
||||
offset = group_offset - group_size;
|
||||
group_offset += interbar_offset;
|
||||
|
||||
indices = dataset.map { |i| offset += group_size; }
|
||||
plot.data << Gnuplot::DataSet.new([indices,dataset]) do |ds|
|
||||
ds.title = dataset_title
|
||||
ds.with = box_style.call(pattern += 1);
|
||||
end
|
||||
end
|
||||
|
||||
label_offset += (group_size+intergroup_offset-margins)/2
|
||||
group_offset = base_offset - label_offset;
|
||||
plot.xtics "(#{args[:group_labels].map do |label|
|
||||
"\"#{label}\" #{group_offset += group_size}";
|
||||
end.join(", ")}) scale 0 #{tic_commands}";
|
||||
|
||||
plot.xrange "[-10:#{group_offset+label_offset+margins-intergroup_offset}]"
|
||||
end
|
||||
|
||||
def draw_bar_plot plot, args
|
||||
plot.key "off"
|
||||
args = args.clone
|
||||
args[:data] = args[:data].map {|d| [d]}
|
||||
args[:dataset_labels] = [""];
|
||||
args[:group_labels] = args[:labels];
|
||||
|
||||
draw_clustered_bar_plot plot, args
|
||||
end
|
||||
|
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/sqlite100m.pdf
Executable file
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/sqlite100m.pdf
Executable file
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/sqlite1g.pdf
Executable file
0
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/sqlite1g.pdf
Executable file
394
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/util.rb
Executable file
394
slides/talks/2018-1-Tour-Mimir/data/CT-InPractice/util.rb
Executable file
|
@ -0,0 +1,394 @@
|
|||
class Tokenizer
|
||||
def initialize(string, token, input_source = nil)
|
||||
@tokens = string.scan(token);
|
||||
@last = nil;
|
||||
@input_source = input_source;
|
||||
@string = string;
|
||||
end
|
||||
|
||||
def scan
|
||||
while @tokens.size > 0
|
||||
if !(yield @tokens.shift) then break; end
|
||||
end
|
||||
end
|
||||
|
||||
def peek
|
||||
if @tokens.size > 0 then @tokens[0]
|
||||
else nil; end
|
||||
end
|
||||
|
||||
def next
|
||||
@last =
|
||||
if @tokens.size > 0 then @tokens.shift
|
||||
else nil; end
|
||||
end
|
||||
|
||||
def last
|
||||
@last;
|
||||
end
|
||||
|
||||
def more?
|
||||
@tokens.size > 0;
|
||||
end
|
||||
|
||||
def flatten
|
||||
@tokens = @tokens.flatten;
|
||||
end
|
||||
|
||||
def assert_next(token, errstr = nil)
|
||||
case token
|
||||
when String then raise_error(errstr || "Expected '#{token}' but found '#{last}'") unless self.next == token
|
||||
when Array then raise_error(errstr || "Expected '#{token.join("','")}' but found '#{last}'") unless token.include? self.next;
|
||||
end
|
||||
self.last;
|
||||
end
|
||||
|
||||
def raise_error(errstr);
|
||||
errstr = "#{errstr} (line #{@input_source.lineno})" if @input_source;
|
||||
errstr = "#{errstr} (#{@string})" unless @input_source;
|
||||
raise "Parse Error: #{errstr}";
|
||||
end
|
||||
|
||||
def tokens_up_to(token)
|
||||
ret = Array.new;
|
||||
while (more? && (self.next != token))
|
||||
ret.push(last);
|
||||
end
|
||||
ret;
|
||||
end
|
||||
end
|
||||
|
||||
class Array
|
||||
def map_index
|
||||
(0...length).to_a.map { |i| yield(i, self[i]) }
|
||||
end
|
||||
|
||||
def to_h
|
||||
ret = Hash.new;
|
||||
each { |k,v| ret[k] = v; }
|
||||
return ret;
|
||||
end
|
||||
|
||||
def unzip
|
||||
ret = Array.new;
|
||||
each_index do |i|
|
||||
ret.push Array.new(i) while ret.length < self[i].length
|
||||
ret.each_index do |j|
|
||||
ret[j][i] = self[i][j]
|
||||
end
|
||||
end
|
||||
return ret;
|
||||
end
|
||||
|
||||
def count
|
||||
size
|
||||
end
|
||||
|
||||
def sum
|
||||
ret = 0;
|
||||
each { |item| ret += item }
|
||||
return ret;
|
||||
end
|
||||
|
||||
def avg
|
||||
sum.to_f / length.to_f
|
||||
end
|
||||
|
||||
def rms_avg
|
||||
Math.sqrt(map { |x| x.to_f * x.to_f }.avg)
|
||||
end
|
||||
|
||||
def stddev
|
||||
Math.sqrt((avg ** 2 - (map{|i| i.to_f ** 2}.avg)).abs)
|
||||
end
|
||||
|
||||
def reduce(&reducer)
|
||||
ret = Hash.new;
|
||||
each do |k,v|
|
||||
ret[k] = Array.new unless ret.has_key? k;
|
||||
ret[k].push(v);
|
||||
end
|
||||
if reducer.nil? then ret
|
||||
else
|
||||
ret.to_a.collect do |k,vs|
|
||||
[ k, reducer.call(k, vs) ]
|
||||
end.to_h
|
||||
end
|
||||
end
|
||||
|
||||
# Round-robin partition into K arrays
|
||||
def subdivide(k)
|
||||
cnt = 0;
|
||||
ret = (0...k).map {|i| Array.new };
|
||||
each { |i| ret[cnt % k].push i; cnt += 1; };
|
||||
ret;
|
||||
end
|
||||
|
||||
# Inorder partition into groups of K elements
|
||||
def partition(k)
|
||||
(0...(size / k.to_f).ceil).map do |i|
|
||||
self[k*i...[k*(i+1), size].min]
|
||||
end
|
||||
end
|
||||
|
||||
def zip_members
|
||||
self[0].zip(*(self[1..-1]))
|
||||
end
|
||||
|
||||
def grep(pattern, &block)
|
||||
ret = [];
|
||||
if block.nil?
|
||||
then each { |l| ret.push(l) if pattern =~ l; }
|
||||
else each { |l| match = pattern.match(l);
|
||||
ret.push(block.call(match)) if match; }
|
||||
end
|
||||
ret
|
||||
end
|
||||
|
||||
def window(window_size = 10, &block)
|
||||
if length <= window_size then
|
||||
if block.nil? then return [self.clone];
|
||||
else return [block.call(self)];
|
||||
end
|
||||
else
|
||||
ret = Array.new;
|
||||
w = Array.new;
|
||||
each do |item|
|
||||
w.push(item);
|
||||
w.shift if w.length > window_size;
|
||||
if w.length >= window_size then
|
||||
ret.push(if block.nil? then [w.clone] else block.call(w) end)
|
||||
end
|
||||
end
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
def fold(accum = nil)
|
||||
each { |i| accum = yield accum, i }
|
||||
accum
|
||||
end
|
||||
|
||||
def pick_samples_evenly(num_samples)
|
||||
return self if(self.length <= num_samples);
|
||||
keep_steps = (self.length / num_samples).to_i
|
||||
step = 0;
|
||||
self.delete_if { step += 1; (step % keep_step) == 0 }
|
||||
end
|
||||
|
||||
def to_table(headers = nil)
|
||||
row_sizes =
|
||||
((headers.nil? ? [] : [headers]) + self).
|
||||
map { |row| row.map { |c| c.to_s.length } }.
|
||||
unzip.
|
||||
map { |col| col.compact.max }
|
||||
|
||||
( unless headers.nil? then
|
||||
[ " " + headers.zip(row_sizes).map do |col, exp_size|
|
||||
col + (if col.size < exp_size then
|
||||
(" " * (exp_size - col.size))
|
||||
else "" end)
|
||||
end.join(" | "),
|
||||
("-" * (row_sizes.sum + 2 + (row_sizes.length - 1) * 3))
|
||||
]
|
||||
else [] end +
|
||||
map do |row|
|
||||
" " + row.zip(row_sizes).map do |col, exp_size|
|
||||
col = col.to_s
|
||||
if col.size < exp_size
|
||||
then col.center(exp_size)
|
||||
else col
|
||||
end
|
||||
end.join(" | ")
|
||||
end
|
||||
).join("\n")
|
||||
end
|
||||
|
||||
def for_all
|
||||
each { |v| return false unless yield v }
|
||||
true;
|
||||
end
|
||||
|
||||
def each_prefix
|
||||
each_index do |i|
|
||||
yield self[0..i];
|
||||
end
|
||||
end
|
||||
|
||||
def select
|
||||
map { |x| x if yield x }.compact
|
||||
end
|
||||
|
||||
def cogroup
|
||||
ret = Hash.new { |h,k| h[k] = [nil] * size }
|
||||
each_index do |i|
|
||||
self[i].each do |k, v|
|
||||
ret[k][i] = v
|
||||
end
|
||||
end
|
||||
ret
|
||||
end
|
||||
|
||||
# Return every cnt'th element of the array.
|
||||
def every(cnt, start = 0)
|
||||
(0..(((size-1-start)/cnt).to_i)).map { |i| self[i*cnt+start] }
|
||||
end
|
||||
|
||||
# Create batches of up to size cnt.
|
||||
def batch(cnt)
|
||||
(0..(((size-1)/cnt).to_i)).map { |i| self[(i*cnt)...((i+1)*cnt)] }
|
||||
end
|
||||
|
||||
def flatmap
|
||||
ret = []
|
||||
each { |i| ret = ret + yield(i) }
|
||||
ret
|
||||
end
|
||||
|
||||
def project(*keys)
|
||||
map { |x| x.project(*keys) }
|
||||
end
|
||||
|
||||
def unique
|
||||
last = nil
|
||||
sort.
|
||||
map { |c| last = c if c != last }.
|
||||
# map { |c| p c }.
|
||||
compact
|
||||
end
|
||||
|
||||
def histogram(bin_width = 5)
|
||||
min_val = (min - min % bin_width).to_i
|
||||
max_val = (max - max % bin_width + bin_width).to_i
|
||||
|
||||
(min_val..max_val).to_a.every(bin_width).
|
||||
map { |x| [x, 0] }.
|
||||
to_h.
|
||||
join(map { |x| (x.to_f / bin_width).to_i * bin_width }.
|
||||
reduce { |k,v| v.count },
|
||||
:left
|
||||
).
|
||||
map { |bin, cnt| [bin, cnt.compact.sum] }.
|
||||
sort { |a, b| a[0] <=> b[0] }
|
||||
end
|
||||
|
||||
def cumulative_sum
|
||||
tot = 0;
|
||||
map { |x| tot += x }
|
||||
end
|
||||
end
|
||||
|
||||
class Hash
|
||||
def intersect(other)
|
||||
keys.find_all { |k| other.has_key?(k) }
|
||||
end
|
||||
|
||||
def bar_graph_dataset(bar = 0.5, set_sep = 1.0, bar_sep = 0.2)
|
||||
curr_width = 0;
|
||||
tics = collect do |human,data|
|
||||
next_delta = data.length * bar + (data.length - 1) * bar_sep;
|
||||
curr_width += next_delta + set_sep;
|
||||
"\"#{human}\" #{curr_width - next_delta / 2}"
|
||||
end
|
||||
|
||||
curr_width = 0;
|
||||
points = values.collect do |data|
|
||||
curr_width += set_sep - bar_sep
|
||||
data.collect do |point|
|
||||
curr_width += bar_sep + bar;
|
||||
[curr_width - bar / 2, point]
|
||||
end
|
||||
end.unzip;
|
||||
|
||||
return ["(#{tics.join(', ')})" , points, "[0:#{curr_width+set_sep}]"];
|
||||
end
|
||||
|
||||
def to_sorted_a
|
||||
keys.sort.map do |k|
|
||||
[k, self[k]]
|
||||
end
|
||||
end
|
||||
|
||||
def map_leaves(prefix = [])
|
||||
keys.to_a.map do |k|
|
||||
[ k,
|
||||
if self[k].is_a? Hash
|
||||
then self[k].map_leaves(prefix+[k]) { |ik,v| yield(ik, v) }
|
||||
else yield(prefix+[k], v)
|
||||
end
|
||||
]
|
||||
end.to_h
|
||||
end
|
||||
|
||||
def project(*keys)
|
||||
keys.map { |k| self[k] }
|
||||
end
|
||||
|
||||
def join(h, outer = :no)
|
||||
case outer
|
||||
when :full then
|
||||
keys + h.keys.find_all { |k| not has_key? k }
|
||||
when :left then
|
||||
keys
|
||||
when :right then
|
||||
h.keys
|
||||
else
|
||||
intersect(h)
|
||||
end.
|
||||
map { |k| [k, [self[k], h[k]]] }.to_h
|
||||
end
|
||||
end
|
||||
|
||||
class Float
|
||||
def sig_figs(n)
|
||||
if self == 0.0 then self
|
||||
else
|
||||
mult = (10.0 ** (Math.log10(self).ceil.to_f - n.to_i.to_f))
|
||||
(self / mult).round * mult;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class IO
|
||||
def tee_readlines
|
||||
ret = [];
|
||||
each { |l| yield l; ret.push l }
|
||||
ret
|
||||
end
|
||||
|
||||
def grep
|
||||
map {|x| x if yield x}.compact
|
||||
end
|
||||
end
|
||||
|
||||
class Integer
|
||||
def to_bytestring
|
||||
return "-#{(-self).to_bytestring}" if self < 0;
|
||||
depth = (Math.log(self/2) / (10.0 * Math.log(2))).to_i
|
||||
scales = ["B", "KB", "MB", "GB", "PB", "EB"];
|
||||
depth = scales.length-1 if depth >= scales.length;
|
||||
"#{(self.to_f / (1024.0**(depth))).to_f.sig_figs(4)} #{scales[depth]}"
|
||||
end
|
||||
|
||||
def d(die)
|
||||
(0...self).map { rand(die)+1 }
|
||||
end
|
||||
end
|
||||
|
||||
class String
|
||||
def pluralize(num)
|
||||
if num == 1 then self
|
||||
else self+"s"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Dir
|
||||
def Dir.in_dir(d)
|
||||
old_d = Dir.getwd
|
||||
Dir.chdir d
|
||||
ret = yield
|
||||
Dir.chdir old_d
|
||||
ret
|
||||
end
|
||||
end
|
BIN
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/PDB.pdf
Executable file
BIN
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/PDB.pdf
Executable file
Binary file not shown.
102
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/Rakefile
Executable file
102
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/Rakefile
Executable file
|
@ -0,0 +1,102 @@
|
|||
$:.push "."
|
||||
|
||||
require "gnuplot"
|
||||
require "util.rb"
|
||||
require "plot.rb"
|
||||
require "csvx.rb"
|
||||
|
||||
data = File.csv("data.csv", header: true)
|
||||
|
||||
plot_output :aqua
|
||||
# $plot_auto_open = true
|
||||
|
||||
all_strategies =
|
||||
[
|
||||
"Mimir-Mat",
|
||||
"Mimir-Inline",
|
||||
"Mimir-Sample",
|
||||
"Mimir-Partition",
|
||||
"MCDB-Mimir",
|
||||
"SQLite-Det",
|
||||
"MayBMS-PGSQL",
|
||||
"MayBMS-SQLite",
|
||||
]
|
||||
|
||||
data.map { |r| [r["Query"].split(/-/)[0], r] }
|
||||
.reduce
|
||||
.each do |group, records|
|
||||
|
||||
plot group => ["data.csv", "Rakefile"] do |plot|
|
||||
|
||||
strategies =
|
||||
all_strategies
|
||||
.where { |s| records.index { |r| r["Strategy"] == s } }
|
||||
queries =
|
||||
records
|
||||
.map { |r| r["Query"] }
|
||||
.uniq.sort
|
||||
lookup = records.map { |r| [[r["Strategy"], r["Query"]], r["Time"]] }.to_h
|
||||
strategies
|
||||
|
||||
pretty_plot(plot, border: [:all])
|
||||
plot.key "left top opaque"
|
||||
plot.ylabel "Time (s)"
|
||||
max_y =
|
||||
records.map { |r| r["Time"] }
|
||||
.where { |r| /[\-0-9.]+/ =~ r }
|
||||
.map { |r| r.to_f }
|
||||
.max
|
||||
|
||||
case group
|
||||
when "TPCH"
|
||||
max_y = 300
|
||||
plot.key "center top outside maxcols 3 maxrows 2"
|
||||
when "PDB"
|
||||
max_y = 45
|
||||
plot.key "center top outside maxcols 3 maxrows 3"
|
||||
end
|
||||
plot.yrange "[0:#{max_y}]"
|
||||
|
||||
labels = []
|
||||
|
||||
draw_clustered_bar_plot(plot,
|
||||
data: (queries.map.with_index { |q,qi|
|
||||
strategies.map.with_index { |s,si|
|
||||
time = lookup[[s,q]]
|
||||
x_pos = bar_plot_position(si, qi, strategies.length)+1
|
||||
font = "Helvetica-Bold,6"
|
||||
case time
|
||||
when nil then
|
||||
labels.push "'?? Missing ??' at #{x_pos},6 font '#{font}' rotate by 90"
|
||||
when "TIMEOUT" then
|
||||
labels.push "'TIME OUT' at #{x_pos},#{max_y * 0.80} font '#{font}' rotate by 90 front"
|
||||
max_y * 1.2
|
||||
when "UNSUPPORTED"
|
||||
labels.push "'UNSUPPORTED' at #{x_pos},5 font '#{font}' rotate by 90 front tc ls #{si+1}"
|
||||
1
|
||||
else
|
||||
time = time.to_f
|
||||
if time > max_y
|
||||
labels.push "'[ #{time.to_i}s ]' at #{x_pos},#{max_y * 0.80} font 'Helvetica-Bold,8' rotate by 90 front"
|
||||
end
|
||||
time
|
||||
end
|
||||
}
|
||||
}),
|
||||
dataset_labels: strategies,
|
||||
group_labels: queries,
|
||||
bar_width: 15
|
||||
)
|
||||
labels.each { |l| plot.label l }
|
||||
|
||||
(1...queries.length).each { |qi|
|
||||
x_pos = bar_plot_position(0, qi, strategies.length) - 18
|
||||
plot.arrow("from #{x_pos},0 to #{x_pos},#{max_y} nohead lc rgb \"#808080\"")
|
||||
}
|
||||
|
||||
|
||||
end
|
||||
|
||||
task :default => group
|
||||
|
||||
end
|
BIN
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/TPCH.pdf
Executable file
BIN
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/TPCH.pdf
Executable file
Binary file not shown.
64
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/csvx.rb
Executable file
64
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/csvx.rb
Executable file
|
@ -0,0 +1,64 @@
|
|||
class String
|
||||
def from_csv(sep = /,/)
|
||||
ret = [[]]
|
||||
c = chars
|
||||
quote = "\""
|
||||
comma = ","
|
||||
i = 0
|
||||
expecting_quote = false
|
||||
while i < c.length
|
||||
if c[i] == quote
|
||||
if ret[-1].empty? then expecting_quote = true
|
||||
elsif c[i+1] == quote then i += 1; ret[-1].push(quote)
|
||||
elsif expecting_quote and (c[i+1] == comma) then
|
||||
ret.push([])
|
||||
expecting_quote = false
|
||||
i += 1
|
||||
elsif expecting_quote and (c[i+1] == nil) then
|
||||
expecting_quote = false
|
||||
else
|
||||
raise "Invalid CSV Line (misplaced quote at #{i}; #{c[i+1]}): #{self}"
|
||||
end
|
||||
elsif c[i] == comma
|
||||
if expecting_quote
|
||||
ret[-1].push(comma)
|
||||
else
|
||||
ret.push([])
|
||||
expecting_quote = false
|
||||
end
|
||||
else
|
||||
ret[-1].push(c[i])
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
ret.map { |col| col.join}
|
||||
end
|
||||
end
|
||||
|
||||
class Array
|
||||
def from_csv
|
||||
self.map { |l| l.to_s.chomp.from_csv }
|
||||
end
|
||||
|
||||
def to_csv(f)
|
||||
File.open(f, "w+") { |f| each { |row| f.puts(row.join(',')) }}
|
||||
end
|
||||
end
|
||||
|
||||
class IO
|
||||
def from_csv(args = {})
|
||||
header = args.fetch(:header, false)
|
||||
separator = args.fetch(:separator, /,/)
|
||||
keys = readline.chomp.
|
||||
sub(/ *$/, "").sub(/^ */,"").
|
||||
from_csv(separator) if header;
|
||||
map { |l| l.to_s.chomp.from_csv(separator) }.
|
||||
map { |a| if header then keys.zip(a).to_h else a end }
|
||||
end
|
||||
end
|
||||
|
||||
class File
|
||||
def File.csv(f, args = {})
|
||||
File.open(f) {|io| io.from_csv(args) }
|
||||
end
|
||||
end
|
57
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/data.csv
Executable file
57
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/data.csv
Executable file
|
@ -0,0 +1,57 @@
|
|||
Strategy,Query,Time
|
||||
Mimir-Mat,PDB-1,25.98781968615949
|
||||
Mimir-Mat,PDB-2,20.71622445844114
|
||||
Mimir-Mat,PDB-3,41.98619099296629
|
||||
MayBMS-PGSQL,PDB-1,23.439012999999996
|
||||
MayBMS-PGSQL,PDB-2,13.000651999999999
|
||||
MayBMS-PGSQL,PDB-3,20.2954832
|
||||
MayBMS-SQLite,PDB-1,22.1345477
|
||||
MayBMS-SQLite,PDB-2,7.291376699999999
|
||||
MayBMS-SQLite,PDB-3,29.1511957
|
||||
Mimir-Inline,TPCH-1,16.040970255620778
|
||||
Mimir-Inline,TPCH-3,19.171183695830404
|
||||
Mimir-Inline,TPCH-5,43.3495686205104
|
||||
Mimir-Inline,TPCH-9,98.61139780338854
|
||||
Mimir-Mat,TPCH-1,33.623222251608965
|
||||
Mimir-Mat,TPCH-3,4.8350385190919045
|
||||
Mimir-Mat,TPCH-5,11.789478918723763
|
||||
Mimir-Mat,TPCH-9,28.924315941147505
|
||||
Mimir-Sample,TPCH-1,119.61607021316885
|
||||
Mimir-Sample,TPCH-3,162.00108394436538
|
||||
Mimir-Sample,TPCH-5,258.74168805666267
|
||||
Mimir-Sample,TPCH-9,TIMEOUT
|
||||
Mimir-Partition,TPCH-1,UNSUPPORTED
|
||||
Mimir-Partition,TPCH-3,UNSUPPORTED
|
||||
Mimir-Partition,TPCH-5,UNSUPPORTED
|
||||
Mimir-Partition,TPCH-9,UNSUPPORTED
|
||||
--MayBMS-PGSQL,TPCH-1,UNSUPPORTED
|
||||
--MayBMS-PGSQL,TPCH-3,UNSUPPORTED
|
||||
--MayBMS-PGSQL,TPCH-5,UNSUPPORTED
|
||||
--MayBMS-PGSQL,TPCH-9,UNSUPPORTED
|
||||
--MayBMS-SQLite,TPCH-1,UNSUPPORTED
|
||||
--MayBMS-SQLite,TPCH-3,UNSUPPORTED
|
||||
--MayBMS-SQLite,TPCH-5,UNSUPPORTED
|
||||
--MayBMS-SQLite,TPCH-9,UNSUPPORTED
|
||||
MCDB-Mimir,TPCH-1,14.65919488966465
|
||||
MCDB-Mimir,TPCH-3,TIMEOUT
|
||||
MCDB-Mimir,TPCH-5,TIMEOUT
|
||||
MCDB-Mimir,TPCH-9,TIMEOUT
|
||||
SQLite-Det,PDB-1,9.521
|
||||
SQLite-Det,PDB-2,7.59
|
||||
SQLite-Det,PDB-3,31.22
|
||||
SQLite-Det,TPCH-1,19.561
|
||||
SQLite-Det,TPCH-3,22.835
|
||||
SQLite-Det,TPCH-5,33.308
|
||||
SQLite-Det,TPCH-9,51.125
|
||||
Mimir-Inline,PDB-1,TIMEOUT
|
||||
Mimir-Inline,PDB-2,30.827455023303628
|
||||
Mimir-Inline,PDB-3,TIMEOUT
|
||||
Mimir-Sample,PDB-1,TIMEOUT
|
||||
Mimir-Sample,PDB-2,242.5666234549135
|
||||
Mimir-Sample,PDB-3,TIMEOUT
|
||||
Mimir-Partition,PDB-1,TIMEOUT
|
||||
Mimir-Partition,PDB-2,TIMEOUT
|
||||
Mimir-Partition,PDB-3,TIMEOUT
|
||||
MCDB-Mimir,PDB-1,TIMEOUT
|
||||
MCDB-Mimir,PDB-2,TIMEOUT
|
||||
MCDB-Mimir,PDB-3,TIMEOUT
|
|
227
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/plot.rb
Executable file
227
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/plot.rb
Executable file
|
@ -0,0 +1,227 @@
|
|||
|
||||
$plot_terminal = "aqua"
|
||||
$plot_suffix = nil;
|
||||
$plot_auto_open = false;
|
||||
$current_plot = nil;
|
||||
|
||||
def plot_output(output, settings = {})
|
||||
case output
|
||||
when :aqua then $plot_terminal = "aqua"; $plot_suffix = nil;
|
||||
when :pdf then $plot_terminal = "pdf"; $plot_suffix = ".pdf"
|
||||
when :png then $plot_terminal = "png"; $plot_suffix = ".png"
|
||||
end
|
||||
$plot_terminal_opts = settings
|
||||
end
|
||||
|
||||
def plot_terminal(plot, settings)
|
||||
settings = $plot_terminal_opts + settings
|
||||
$plot_terminal+(
|
||||
if settings.size < 1 then "" else
|
||||
" " + settings.to_a.flatten.join(" ")
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
$pretty_styles = [
|
||||
{ :lt => "rgb \"#A00000\"",
|
||||
:lw => 2,
|
||||
:pt => 1
|
||||
},
|
||||
{ :lt => "rgb \"#00A000\"",
|
||||
:lw => 2,
|
||||
:pt => 6
|
||||
},
|
||||
{ :lt => "rgb \"#5060D0\"",
|
||||
:lw => 2,
|
||||
:pt => 2
|
||||
},
|
||||
{ :lt => "rgb \"#F25900\"",
|
||||
:lw => 2,
|
||||
:pt => 9
|
||||
}
|
||||
];
|
||||
|
||||
def pretty_style(idx, opts = {})
|
||||
opts = opts.clone;
|
||||
$pretty_styles[idx].each { |k, v| opts[k] = v unless opts.has_key? k }
|
||||
opts.map { |kv| kv.to_a.join(" ") unless kv[1].nil? }.compact.join(" ")
|
||||
end
|
||||
|
||||
def pretty_plot(plot, opts = {})
|
||||
# plot based on Brighten Godfrey's blog post:
|
||||
# http://youinfinitesnake.blogspot.com/2011/02/attractive-scientific-plots-with.html
|
||||
|
||||
plot.terminal [
|
||||
"pdf",
|
||||
"font \"#{opts.fetch(:fontface, "Times-Roman")},#{opts.fetch(:fontsize, 10)}\"",
|
||||
"linewidth #{opts.fetch(:linewidth, 4)} rounded",
|
||||
"fontscale #{opts.fetch(:fontscale, 1.0)}",
|
||||
"size #{opts.fetch(:sizex, 5)}in,#{opts.fetch(:sizey, 3)}in"
|
||||
].join(" ")
|
||||
|
||||
# Line style for axes
|
||||
plot.style "line 80 lc #{opts.fetch(:bordercolor, "rgb \"#808080\"")}"
|
||||
|
||||
# Line style for grid
|
||||
plot.style "line 81 lt 0" # dashed
|
||||
plot.style "line 81 lc #{opts.fetch(:gridcolor, "rgb \"#808080\"")}" # grey
|
||||
|
||||
plot.grid "back linestyle 81"
|
||||
|
||||
border_groups =
|
||||
opts.fetch(:border, [:left, :bottom]).map do |b|
|
||||
case b
|
||||
when :bottom then 1
|
||||
when :left then 2
|
||||
when :top then 4
|
||||
when :right then 8
|
||||
when :all then 1+2+4+8
|
||||
else raise "Invalid border type : #{b}"
|
||||
end
|
||||
end.sum
|
||||
|
||||
plot.border "#{border_groups} back linestyle 80" # Remove border on top and right. These
|
||||
# borders are useless and make it harder
|
||||
# to see plotted lines near the border.
|
||||
# Also, put it in grey; no need for so much emphasis on a border.
|
||||
plot.xtics "nomirror"
|
||||
plot.ytics "nomirror"
|
||||
|
||||
if(opts.fetch(:logx, false)) then
|
||||
plot.logscal "x"
|
||||
plot.mxtics "10" # Makes logscale look good.
|
||||
end
|
||||
if(opts.fetch(:logy, false)) then
|
||||
plot.logscal "y"
|
||||
plot.mytics "10" # Makes logscale look good.
|
||||
end
|
||||
|
||||
# Line styles: try to pick pleasing colors, rather
|
||||
# than strictly primary colors or hard-to-see colors
|
||||
# like gnuplot's default yellow. Make the lines thick
|
||||
# so they're easy to see in small plots in papers.
|
||||
$pretty_styles.each_index { |x| plot.style "line #{x+1} #{pretty_style(x)}" }
|
||||
|
||||
plot.key "bottom right"
|
||||
end
|
||||
|
||||
def auto_open_plots(new_val = true)
|
||||
$plot_auto_open = new_val;
|
||||
end
|
||||
|
||||
def row_data(data)
|
||||
$current_plot.data << Gnuplot::DataSet.new(data.unzip) { |ds| yield ds }
|
||||
end
|
||||
|
||||
|
||||
def plot(args = {})
|
||||
task(args) do
|
||||
task_name = case args
|
||||
when Hash then args.keys[0]
|
||||
when Symbol,String then args.to_s
|
||||
end
|
||||
Gnuplot.open do |gp|
|
||||
Gnuplot::Plot.new(gp) do |plot|
|
||||
$current_plot = plot;
|
||||
|
||||
$plot_terminal
|
||||
|
||||
plot.terminal $plot_terminal
|
||||
if $plot_suffix and task_name then
|
||||
plot.output "#{task_name}#{$plot_suffix}"
|
||||
end
|
||||
yield plot;
|
||||
end
|
||||
end
|
||||
if $plot_auto_open and [".pdf", ".png"].include? $plot_suffix
|
||||
system("open #{task_name}#{$plot_suffix}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def line_plot(args = {})
|
||||
plot(args) do |plot|
|
||||
data_elements = yield(plot)
|
||||
data_elements = { :data => data_elements } unless data_elements.is_a? Hash;
|
||||
|
||||
data = data_elements[:data].unzip;
|
||||
xaxis = data_elements.fetch(:xaxis) { data.shift };
|
||||
keys = data_elements.fetch(:keys) { data.map { nil; } }
|
||||
withs = data_elements.fetch(:with, "linespoints");
|
||||
withs = data.map { withs } unless withs.is_a? Array;
|
||||
|
||||
raise "Missing data!" if data.nil?;
|
||||
raise "Missing X Axis!" if xaxis.nil?;
|
||||
|
||||
data.zip(keys, withs).each do |line, key, with|
|
||||
plot.data << Gnuplot::DataSet.new([xaxis, line]) do |ds|
|
||||
ds.title = key unless key.nil?
|
||||
ds.with = with unless with.nil?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def bar_plot_position(group, element, num_elements, args = {})
|
||||
base_offset = args.fetch(:base_offset, 0);
|
||||
interbar_offset = args.fetch(:interbar_offset, 18);
|
||||
intergroup_offset = args.fetch(:intergroup_offset, interbar_offset);
|
||||
margins = args.fetch(:margins, intergroup_offset);
|
||||
bar_width = args.fetch(:bar_width, 10);
|
||||
|
||||
group_offset = base_offset + margins
|
||||
group_size = interbar_offset * num_elements + intergroup_offset;
|
||||
|
||||
position_relative_to_element = (interbar_offset * (group))
|
||||
position_of_group = (group_size * element)
|
||||
|
||||
group_offset + position_relative_to_element + position_of_group
|
||||
end
|
||||
|
||||
def draw_clustered_bar_plot plot, args = {}
|
||||
data = args.fetch(:data).unzip;
|
||||
base_offset = args.fetch(:base_offset, 0);
|
||||
interbar_offset = args.fetch(:interbar_offset, 18);
|
||||
intergroup_offset = args.fetch(:intergroup_offset, interbar_offset);
|
||||
margins = args.fetch(:margins, intergroup_offset);
|
||||
bar_width = args.fetch(:bar_width, 10);
|
||||
tic_commands = args.fetch(:tic_commands, "");
|
||||
label_offset = args.fetch(:label_offset, 0);
|
||||
box_style = args.fetch(:box_style,
|
||||
lambda { |i| "boxes fill pattern #{i}" });
|
||||
|
||||
plot.grid "noxtics"
|
||||
group_offset = base_offset + margins
|
||||
group_size = interbar_offset * data.length + intergroup_offset;
|
||||
plot.boxwidth bar_width.to_s;
|
||||
pattern = 0;
|
||||
data.zip(args[:dataset_labels]).each do |dataset, dataset_title|
|
||||
offset = group_offset - group_size;
|
||||
group_offset += interbar_offset;
|
||||
|
||||
indices = dataset.map { |i| offset += group_size; }
|
||||
plot.data << Gnuplot::DataSet.new([indices,dataset]) do |ds|
|
||||
ds.title = dataset_title
|
||||
ds.with = box_style.call(pattern += 1);
|
||||
end
|
||||
end
|
||||
|
||||
label_offset += (group_size+intergroup_offset-margins)/2
|
||||
group_offset = base_offset - label_offset;
|
||||
plot.xtics "(#{args[:group_labels].map do |label|
|
||||
"\"#{label}\" #{group_offset += group_size}";
|
||||
end.join(", ")}) scale 0 #{tic_commands}";
|
||||
|
||||
plot.xrange "[-10:#{group_offset+label_offset+margins-intergroup_offset}]"
|
||||
end
|
||||
|
||||
def draw_bar_plot plot, args
|
||||
plot.key "off"
|
||||
args = args.clone
|
||||
args[:data] = args[:data].map {|d| [d]}
|
||||
args[:dataset_labels] = [""];
|
||||
args[:group_labels] = args[:labels];
|
||||
|
||||
draw_clustered_bar_plot plot, args
|
||||
end
|
||||
|
471
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/util.rb
Executable file
471
slides/talks/2018-1-Tour-Mimir/data/PushdownVis/util.rb
Executable file
|
@ -0,0 +1,471 @@
|
|||
class Tokenizer
|
||||
def initialize(string, token, input_source = nil)
|
||||
@tokens = string.scan(token);
|
||||
@last = nil;
|
||||
@input_source = input_source;
|
||||
@string = string;
|
||||
end
|
||||
|
||||
def scan
|
||||
while @tokens.size > 0
|
||||
if !(yield @tokens.shift) then break; end
|
||||
end
|
||||
end
|
||||
|
||||
def peek
|
||||
if @tokens.size > 0 then @tokens[0]
|
||||
else nil; end
|
||||
end
|
||||
|
||||
def next
|
||||
@last =
|
||||
if @tokens.size > 0 then @tokens.shift
|
||||
else nil; end
|
||||
end
|
||||
|
||||
def last
|
||||
@last;
|
||||
end
|
||||
|
||||
def more?
|
||||
@tokens.size > 0;
|
||||
end
|
||||
|
||||
def flatten
|
||||
@tokens = @tokens.flatten;
|
||||
end
|
||||
|
||||
def assert_next(token, errstr = nil)
|
||||
case token
|
||||
when String then raise_error(errstr || "Expected '#{token}' but found '#{last}'") unless self.next == token
|
||||
when Array then raise_error(errstr || "Expected '#{token.join("','")}' but found '#{last}'") unless token.include? self.next;
|
||||
end
|
||||
self.last;
|
||||
end
|
||||
|
||||
def raise_error(errstr);
|
||||
errstr = "#{errstr} (line #{@input_source.lineno})" if @input_source;
|
||||
errstr = "#{errstr} (#{@string})" unless @input_source;
|
||||
raise "Parse Error: #{errstr}";
|
||||
end
|
||||
|
||||
def tokens_up_to(token)
|
||||
ret = Array.new;
|
||||
while (more? && (self.next != token))
|
||||
ret.push(last);
|
||||
end
|
||||
ret;
|
||||
end
|
||||
end
|
||||
|
||||
class Array
|
||||
def map_index
|
||||
(0...length).to_a.map { |i| yield(i, self[i]) }
|
||||
end
|
||||
|
||||
def to_h
|
||||
ret = Hash.new;
|
||||
each { |k,v| ret[k] = v; }
|
||||
return ret;
|
||||
end
|
||||
|
||||
def unzip
|
||||
ret = Array.new;
|
||||
each_index do |i|
|
||||
ret.push Array.new(i) while ret.length < self[i].length
|
||||
ret.each_index do |j|
|
||||
ret[j][i] = self[i][j]
|
||||
end
|
||||
end
|
||||
return ret;
|
||||
end
|
||||
|
||||
def count
|
||||
size
|
||||
end
|
||||
|
||||
def sum
|
||||
ret = 0;
|
||||
each { |item| ret += item }
|
||||
return ret;
|
||||
end
|
||||
|
||||
def avg
|
||||
sum.to_f / length.to_f
|
||||
end
|
||||
|
||||
def prod
|
||||
ret = 1;
|
||||
each { |item| ret *= item }
|
||||
return ret;
|
||||
end
|
||||
|
||||
def rms_avg
|
||||
Math.sqrt(map { |x| x.to_f ** 2 }.avg)
|
||||
end
|
||||
|
||||
def rms_err
|
||||
Math.sqrt(map { |x,y| (x.to_f - y.to_f) ** 2 }.avg)
|
||||
end
|
||||
|
||||
def stddev
|
||||
Math.sqrt((avg ** 2 - (map{|i| i.to_f ** 2}.avg)).abs)
|
||||
end
|
||||
|
||||
def reduce(&reducer)
|
||||
ret = Hash.new;
|
||||
each do |k,v|
|
||||
ret[k] = Array.new unless ret.has_key? k;
|
||||
ret[k].push(v);
|
||||
end
|
||||
if reducer.nil? then ret
|
||||
else
|
||||
ret.to_a.collect do |k,vs|
|
||||
[ k, reducer.call(k, vs) ]
|
||||
end.to_h
|
||||
end
|
||||
end
|
||||
|
||||
# Round-robin partition into K arrays
|
||||
def subdivide(k)
|
||||
cnt = 0;
|
||||
ret = (0...k).map {|i| Array.new };
|
||||
each { |i| ret[cnt % k].push i; cnt += 1; };
|
||||
ret;
|
||||
end
|
||||
|
||||
# Inorder partition into groups of K elements
|
||||
def take_groups(k)
|
||||
(0...(size / k.to_f).ceil).map do |i|
|
||||
self[k*i...[k*(i+1), size].min]
|
||||
end
|
||||
end
|
||||
|
||||
def zip_members
|
||||
self[0].zip(*(self[1..-1]))
|
||||
end
|
||||
|
||||
def grep(pattern, &block)
|
||||
ret = [];
|
||||
if block.nil?
|
||||
then each { |l| ret.push(l) if pattern =~ l; }
|
||||
else each { |l| match = pattern.match(l);
|
||||
ret.push(block.call(match)) if match; }
|
||||
end
|
||||
ret
|
||||
end
|
||||
|
||||
def window(window_size = 10, &block)
|
||||
if length <= window_size then
|
||||
if block.nil? then return [self.clone];
|
||||
else return [block.call(self)];
|
||||
end
|
||||
else
|
||||
ret = Array.new;
|
||||
w = Array.new;
|
||||
each do |item|
|
||||
w.push(item);
|
||||
w.shift if w.length > window_size;
|
||||
if w.length >= window_size then
|
||||
ret.push(if block.nil? then [w.clone] else block.call(w) end)
|
||||
end
|
||||
end
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
def fold(accum = nil)
|
||||
each { |i| accum = yield accum, i }
|
||||
accum
|
||||
end
|
||||
|
||||
def pick_samples_evenly(num_samples)
|
||||
return self if(self.length <= num_samples);
|
||||
keep_steps = (self.length / num_samples).to_i
|
||||
step = 0;
|
||||
self.delete_if { step += 1; (step % keep_step) == 0 }
|
||||
end
|
||||
|
||||
def to_table(headers = nil)
|
||||
row_sizes =
|
||||
((headers.nil? ? [] : [headers]) + self).
|
||||
map { |row| row.map { |c| c.to_s.length } }.
|
||||
unzip.
|
||||
map { |col| col.compact.max }
|
||||
|
||||
( unless headers.nil? then
|
||||
[ " " + headers.zip(row_sizes).map do |col, exp_size|
|
||||
col + (if col.size < exp_size then
|
||||
(" " * (exp_size - col.size))
|
||||
else "" end)
|
||||
end.join(" | "),
|
||||
("-" * (row_sizes.sum + 2 + (row_sizes.length - 1) * 3))
|
||||
]
|
||||
else [] end +
|
||||
map do |row|
|
||||
" " + row.zip(row_sizes).map do |col, exp_size|
|
||||
col = col.to_s
|
||||
if col.size < exp_size
|
||||
then col.center(exp_size)
|
||||
else col
|
||||
end
|
||||
end.join(" | ")
|
||||
end
|
||||
).join("\n")
|
||||
end
|
||||
|
||||
def tabulate_schemaless_records
|
||||
keys = map {|r| r.keys}.flatten.unique.sort
|
||||
|
||||
[ keys ,
|
||||
map {|r| keys.map {|k| r[k] }}
|
||||
]
|
||||
end
|
||||
|
||||
def for_all
|
||||
each { |v| return false unless yield v }
|
||||
true;
|
||||
end
|
||||
|
||||
def each_prefix
|
||||
each_index do |i|
|
||||
yield self[0..i];
|
||||
end
|
||||
end
|
||||
|
||||
def select
|
||||
map { |x| x if yield x }.compact
|
||||
end
|
||||
|
||||
def cogroup
|
||||
ret = Hash.new { |h,k| h[k] = [nil] * size }
|
||||
each_index do |i|
|
||||
self[i].each do |k, v|
|
||||
ret[k][i] = v
|
||||
end
|
||||
end
|
||||
ret
|
||||
end
|
||||
|
||||
# Return every cnt'th element of the array.
|
||||
def every(cnt, start = 0)
|
||||
(0..(((size-1-start)/cnt).to_i)).map { |i| self[i*cnt+start] }
|
||||
end
|
||||
|
||||
# Create batches of up to size cnt.
|
||||
def batch(cnt)
|
||||
(0..(((size-1)/cnt).to_i)).map { |i| self[(i*cnt)...((i+1)*cnt)] }
|
||||
end
|
||||
|
||||
def flatmap
|
||||
ret = []
|
||||
each { |i| ret = ret + yield(i) }
|
||||
ret
|
||||
end
|
||||
|
||||
def project(*keys)
|
||||
map { |x| x.project(*keys) }
|
||||
end
|
||||
|
||||
def unique
|
||||
last = nil
|
||||
sort.
|
||||
map { |c| last = c if c != last }.
|
||||
# map { |c| p c }.
|
||||
compact
|
||||
end
|
||||
|
||||
def histogram(bin_width = 5)
|
||||
min_val = (min - min % bin_width).to_i
|
||||
max_val = (max - max % bin_width + bin_width).to_i
|
||||
|
||||
(min_val..max_val).to_a.every(bin_width).
|
||||
map { |x| [x, 0] }.
|
||||
to_h.
|
||||
join(map { |x| (x.to_f / bin_width).to_i * bin_width }.
|
||||
reduce { |k,v| v.count },
|
||||
:left
|
||||
).
|
||||
map { |bin, cnt| [bin, cnt.compact.sum] }.
|
||||
sort { |a, b| a[0] <=> b[0] }
|
||||
end
|
||||
|
||||
def cumulative_sum
|
||||
tot = 0;
|
||||
map { |x| tot += x }
|
||||
end
|
||||
|
||||
def splice(val, idx)
|
||||
return [val] + self if idx <= 0
|
||||
return self + [val] if idx >= length
|
||||
return self[0...idx] + [val] + self[idx..-1]
|
||||
end
|
||||
|
||||
def all_sorts
|
||||
return [[]] if empty?
|
||||
return [self] if length == 1
|
||||
hd = self[0]
|
||||
self[1..-1].all_sorts.map do |rest|
|
||||
(0..rest.length).map { |i| rest.splice(hd, i) }
|
||||
end.flatten(1)
|
||||
end
|
||||
|
||||
def merge(other, args = {})
|
||||
if args.has_key?(:eq)
|
||||
args[:eq] = [args[:eq], args[:eq]] unless args[:eq].is_a? Array
|
||||
a, b = args[:eq]
|
||||
idx = Hash.new { |h,k| h[k] = [] }
|
||||
self.each {|i| idx[i[a]].push i }
|
||||
other.map {|j| idx[i[b]].map { |i| i + j } }.flatten(1)
|
||||
else
|
||||
self.map {|i|
|
||||
other.map {|j|
|
||||
i + j if yield i,j
|
||||
}.compact
|
||||
}.flatten(1)
|
||||
end
|
||||
end
|
||||
|
||||
def where
|
||||
map {|i| i if yield i }.compact
|
||||
end
|
||||
end
|
||||
|
||||
class Hash
|
||||
def intersect(other)
|
||||
keys.find_all { |k| other.has_key?(k) }
|
||||
end
|
||||
|
||||
def bar_graph_dataset(bar = 0.5, set_sep = 1.0, bar_sep = 0.2)
|
||||
curr_width = 0;
|
||||
tics = collect do |human,data|
|
||||
next_delta = data.length * bar + (data.length - 1) * bar_sep;
|
||||
curr_width += next_delta + set_sep;
|
||||
"\"#{human}\" #{curr_width - next_delta / 2}"
|
||||
end
|
||||
|
||||
curr_width = 0;
|
||||
points = values.collect do |data|
|
||||
curr_width += set_sep - bar_sep
|
||||
data.collect do |point|
|
||||
curr_width += bar_sep + bar;
|
||||
[curr_width - bar / 2, point]
|
||||
end
|
||||
end.unzip;
|
||||
|
||||
return ["(#{tics.join(', ')})" , points, "[0:#{curr_width+set_sep}]"];
|
||||
end
|
||||
|
||||
def to_sorted_a
|
||||
keys.sort.map do |k|
|
||||
[k, self[k]]
|
||||
end
|
||||
end
|
||||
|
||||
def map_leaves(prefix = [])
|
||||
keys.to_a.map do |k|
|
||||
[ k,
|
||||
if self[k].is_a? Hash
|
||||
then self[k].map_leaves(prefix+[k]) { |ik,v| yield(ik, v) }
|
||||
else yield(prefix+[k], v)
|
||||
end
|
||||
]
|
||||
end.to_h
|
||||
end
|
||||
|
||||
def project(*keys)
|
||||
keys.map { |k| self[k] }
|
||||
end
|
||||
|
||||
def join(h, outer = :no)
|
||||
case outer
|
||||
when :full then
|
||||
keys + h.keys.find_all { |k| not has_key? k }
|
||||
when :left then
|
||||
keys
|
||||
when :right then
|
||||
h.keys
|
||||
else
|
||||
intersect(h)
|
||||
end.
|
||||
map { |k| [k, [self[k], h[k]]] }.to_h
|
||||
end
|
||||
|
||||
def flatten_tree(sep = nil, prefix = nil)
|
||||
map { |k,v|
|
||||
unless prefix.nil?
|
||||
k = sep + k.to_s unless sep.nil?
|
||||
k = prefix.to_s + k.to_s
|
||||
end
|
||||
case v
|
||||
when Hash then v.flatten_tree(sep, k).to_a
|
||||
else [ [k.to_sym, v] ]
|
||||
end
|
||||
}.flatten(1).to_h
|
||||
end
|
||||
end
|
||||
|
||||
class Float
|
||||
def sig_figs(n)
|
||||
if self == 0.0 then self
|
||||
else
|
||||
mult = (10.0 ** (Math.log10(self).ceil.to_f - n.to_i.to_f))
|
||||
(self / mult).round * mult;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class IO
|
||||
def tee_readlines
|
||||
ret = [];
|
||||
each { |l| yield l; ret.push l }
|
||||
ret
|
||||
end
|
||||
|
||||
def grep
|
||||
map {|x| x if yield x}.compact
|
||||
end
|
||||
end
|
||||
|
||||
class File
|
||||
def File.stream(inFile, outFile, mode = "w+")
|
||||
File.open(inFile) do |inHandle|
|
||||
File.open(outFile, mode) do |outHandle|
|
||||
yield(inHandle, outHandle)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Integer
|
||||
def to_bytestring
|
||||
return "-#{(-self).to_bytestring}" if self < 0;
|
||||
depth = (Math.log(self/2) / (10.0 * Math.log(2))).to_i
|
||||
scales = ["B", "KB", "MB", "GB", "PB", "EB"];
|
||||
depth = scales.length-1 if depth >= scales.length;
|
||||
"#{(self.to_f / (1024.0**(depth))).to_f.sig_figs(4)} #{scales[depth]}"
|
||||
end
|
||||
|
||||
def d(die)
|
||||
(0...self).map { rand(die)+1 }
|
||||
end
|
||||
end
|
||||
|
||||
class String
|
||||
def pluralize(num)
|
||||
if num == 1 then self
|
||||
else self+"s"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Dir
|
||||
def Dir.in_dir(d)
|
||||
old_d = Dir.getwd
|
||||
Dir.chdir d
|
||||
ret = yield
|
||||
Dir.chdir old_d
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue