Starting updated tour talk

This commit is contained in:
Oliver Kennedy 2018-01-19 01:18:16 -05:00
parent 1d80613c10
commit 836ddd5ccd
17 changed files with 1805 additions and 1184 deletions

View file

@ -0,0 +1,95 @@
require 'rubygems'
require 'gnuplot'
$:.push(".")
require 'util'
require 'plot'
require 'csvx'
$modes = [:deterministic,:classic,:partition,:inline,:hybrid]
$query_names = {
:q1_noagg => "Q1",
:q3_noagg => "Q3",
:q5_noagg => "Q5",
:q9_noagg => "Q9"
}
# plot_output :pdf, size: "5in,2.5in", fsize: "12"
plot_output :aqua
# auto_open_plots :true
# $plot_auto_open = true
def to_seconds(time)
case time
when /([0-9]+)m([0-9.]+)s/ then
$1.to_i * 60 + $2.to_f
when /\?/ then
0
when /Timeout/ then
1000000
else
raise "Unknown time value '#{time}'"
end
end
def sort_by_cols(order,hash)
order.map { |col|
hash[col]
}
end
$data =
File.csv("data.csv", separator: / *, */).
map { |db,q,sf,mode,time|
[ (db+"_"+sf).to_sym,
[q.to_sym,
[mode.to_sym, to_seconds(time)]]]
}.reduce { |db, db_trials|
db_trials.reduce { |q, q_trials|
sort_by_cols($modes, q_trials.to_h)
}
}
def plot_timing_bar_plot(gp, db, details = {})
clusters = $query_names.keys.sort;
data = sort_by_cols(
clusters,
$data[db]
).map {|timings|
det_t = timings.shift
timings.map { |t| t / det_t * 100}
}
gp.yrange details.fetch(:yrange, "[0:300]")
gp.key "font \"Times-Roman,10\" opaque box top left"
gp.ylabel "% of Deterministic Time"
draw_clustered_bar_plot(gp,
data: data,
dataset_labels:
$modes.map {|m| m.to_s.capitalize}[1..-1],
group_labels:
clusters.map {|c| $query_names[c]},
box_style:
lambda {|i| "boxes fill solid #{(i.to_f/6)+0.25} lc #{$pretty_styles[4-i][:lt]}"}
)
end
plot 'sqlite100m' => "Rakefile" do |gp|
plot_timing_bar_plot(gp, :sqlite_100m)
end
plot 'sqlite1g' => "Rakefile" do |gp|
plot_timing_bar_plot(gp, :sqlite_1g)
end
plot 'dbx100m' => "Rakefile" do |gp|
plot_timing_bar_plot(gp, :oracle_100m,
yrange: "[0:400]"
)
end
plot 'dbx1g' => "Rakefile" do |gp|
plot_timing_bar_plot(gp, :oracle_1g,
yrange: "[0:1400]"
)
end
task :all => ['sqlite100m', 'sqlite1g', 'dbx100m', 'dbx1g']
task :default => :all

View file

@ -0,0 +1,46 @@
class String
def from_csv(sep = /,/)
ret = chomp.split(sep)
idx = 0;
while idx < ret.length do
if ret[idx][0] == "\""[0]
while ret[idx][-1] != "\""[0]
raise "Unterminated quote" if idx+1 >= ret.length
ret[idx] = ret[idx]+","+ret[idx+1]
ret.delete_at(idx+1)
end
ret[idx] = ret[idx].sub(/^"/, "").sub(/"$/, "")
end
idx += 1
end
ret
end
end
class Array
def from_csv
self.map { |l| l.to_s.chomp.from_csv }
end
def to_csv(f)
File.open(f, "w+") { |f| each { |row| f.puts(row.join(',')) }}
end
end
class IO
def from_csv(args = {})
header = args.fetch(:header, false)
separator = args.fetch(:separator, /,/)
keys = readline.chomp.
sub(/ *$/, "").sub(/^ */,"").
from_csv(separator) if header;
map { |l| l.to_s.chomp.from_csv(separator) }.
map { |a| if header then keys.zip(a).to_h else a end }
end
end
class File
def File.csv(f, args = {})
File.open(f) {|io| io.from_csv(args) }
end
end

View file

@ -0,0 +1,80 @@
sqlite,q1_noagg,100m,deterministic,0m16.125s
sqlite,q1_noagg,100m,classic ,0m16.163s
sqlite,q1_noagg,100m,partition ,0m16.226s
sqlite,q1_noagg,100m,inline ,0m16.417s
sqlite,q1_noagg,100m,hybrid ,0m16.830s
sqlite,q1_noagg,1g ,deterministic,2m41.632s
sqlite,q1_noagg,1g ,classic ,2m55.999s
sqlite,q1_noagg,1g ,partition ,3m2.070s
sqlite,q1_noagg,1g ,inline ,2m54.776s
sqlite,q1_noagg,1g ,hybrid ,2m55.679s
sqlite,q3_noagg,100m,deterministic,0m1.743s
sqlite,q3_noagg,100m,classic ,Timeout
sqlite,q3_noagg,100m,partition ,Timeout
sqlite,q3_noagg,100m,inline ,0m2.048s
sqlite,q3_noagg,100m,hybrid ,0m2.012s
sqlite,q3_noagg,1g ,deterministic,0m4.687s
sqlite,q3_noagg,1g ,classic ,Timeout
sqlite,q3_noagg,1g ,partition ,Timeout
sqlite,q3_noagg,1g ,inline ,0m7.572s
sqlite,q3_noagg,1g ,hybrid ,0m7.992s
sqlite,q5_noagg,100m,deterministic,0m1.542s
sqlite,q5_noagg,100m,classic ,Timeout
sqlite,q5_noagg,100m,partition ,Timeout
sqlite,q5_noagg,100m,inline ,0m2.676s
sqlite,q5_noagg,100m,hybrid ,0m3.597s
sqlite,q5_noagg,1g ,deterministic,0m6.696s
sqlite,q5_noagg,1g ,classic ,Timeout
sqlite,q5_noagg,1g ,partition ,Timeout
sqlite,q5_noagg,1g ,inline ,0m11.351s
sqlite,q5_noagg,1g ,hybrid ,0m57.637s
sqlite,q9_noagg,100m,deterministic,0m3.037s
sqlite,q9_noagg,100m,classic ,Timeout
sqlite,q9_noagg,100m,partition ,Timeout
sqlite,q9_noagg,100m,inline ,12m22.873s
sqlite,q9_noagg,100m,hybrid ,0m6.037s
sqlite,q9_noagg,1g ,deterministic,0m38.967s
sqlite,q9_noagg,1g ,classic ,Timeout
sqlite,q9_noagg,1g ,partition ,Timeout
sqlite,q9_noagg,1g ,inline ,Timeout
sqlite,q9_noagg,1g ,hybrid ,1m9.280s
oracle,q1_noagg,100m,deterministic,0m19.716s
oracle,q1_noagg,100m,classic ,0m23.760s
oracle,q1_noagg,100m,partition ,0m21.517s
oracle,q1_noagg,100m,inline ,0m20.570s
oracle,q1_noagg,100m,hybrid ,0m21.685s
oracle,q3_noagg,100m,deterministic,0m1.887s
oracle,q3_noagg,100m,classic ,Timeout
oracle,q3_noagg,100m,partition ,Timeout
oracle,q3_noagg,100m,inline ,0m2.831s
oracle,q3_noagg,100m,hybrid ,0m2.482s
oracle,q5_noagg,100m,deterministic,0m2.165s
oracle,q5_noagg,100m,classic ,Timeout
oracle,q5_noagg,100m,partition ,Timeout
oracle,q5_noagg,100m,inline ,0m3.738s
oracle,q5_noagg,100m,hybrid ,0m5.722s
oracle,q9_noagg,100m,deterministic,0m3.883s
oracle,q9_noagg,100m,classic ,Timeout
oracle,q9_noagg,100m,partition ,Timeout
oracle,q9_noagg,100m,inline ,8m0.466s
oracle,q9_noagg,100m,hybrid ,0m10.610s
oracle,q1_noagg,1g ,deterministic,3m29.131s
oracle,q1_noagg,1g ,classic ,3m32.163s
oracle,q1_noagg,1g ,partition ,3m45.280s
oracle,q1_noagg,1g ,inline ,3m39.893s
oracle,q1_noagg,1g ,hybrid ,3m23.962s
oracle,q3_noagg,1g ,deterministic,0m5.437s
oracle,q3_noagg,1g ,classic ,Timeout
oracle,q3_noagg,1g ,partition ,Timeout
oracle,q3_noagg,1g ,inline ,0m3.738s
oracle,q3_noagg,1g ,hybrid ,0m5.722s
oracle,q5_noagg,1g ,deterministic,0m5.092s
oracle,q5_noagg,1g ,classic ,Timeout
oracle,q5_noagg,1g ,partition ,Timeout
oracle,q5_noagg,1g ,inline ,0m22.339s
oracle,q5_noagg,1g ,hybrid ,Timeout
oracle,q9_noagg,1g ,deterministic,0m28.182s
oracle,q9_noagg,1g ,classic ,Timeout
oracle,q9_noagg,1g ,partition ,Timeout
oracle,q9_noagg,1g ,inline ,Timeout
oracle,q9_noagg,1g ,hybrid ,5m8.406s
1 sqlite q1_noagg 100m deterministic 0m16.125s
2 sqlite q1_noagg 100m classic 0m16.163s
3 sqlite q1_noagg 100m partition 0m16.226s
4 sqlite q1_noagg 100m inline 0m16.417s
5 sqlite q1_noagg 100m hybrid 0m16.830s
6 sqlite q1_noagg 1g deterministic 2m41.632s
7 sqlite q1_noagg 1g classic 2m55.999s
8 sqlite q1_noagg 1g partition 3m2.070s
9 sqlite q1_noagg 1g inline 2m54.776s
10 sqlite q1_noagg 1g hybrid 2m55.679s
11 sqlite q3_noagg 100m deterministic 0m1.743s
12 sqlite q3_noagg 100m classic Timeout
13 sqlite q3_noagg 100m partition Timeout
14 sqlite q3_noagg 100m inline 0m2.048s
15 sqlite q3_noagg 100m hybrid 0m2.012s
16 sqlite q3_noagg 1g deterministic 0m4.687s
17 sqlite q3_noagg 1g classic Timeout
18 sqlite q3_noagg 1g partition Timeout
19 sqlite q3_noagg 1g inline 0m7.572s
20 sqlite q3_noagg 1g hybrid 0m7.992s
21 sqlite q5_noagg 100m deterministic 0m1.542s
22 sqlite q5_noagg 100m classic Timeout
23 sqlite q5_noagg 100m partition Timeout
24 sqlite q5_noagg 100m inline 0m2.676s
25 sqlite q5_noagg 100m hybrid 0m3.597s
26 sqlite q5_noagg 1g deterministic 0m6.696s
27 sqlite q5_noagg 1g classic Timeout
28 sqlite q5_noagg 1g partition Timeout
29 sqlite q5_noagg 1g inline 0m11.351s
30 sqlite q5_noagg 1g hybrid 0m57.637s
31 sqlite q9_noagg 100m deterministic 0m3.037s
32 sqlite q9_noagg 100m classic Timeout
33 sqlite q9_noagg 100m partition Timeout
34 sqlite q9_noagg 100m inline 12m22.873s
35 sqlite q9_noagg 100m hybrid 0m6.037s
36 sqlite q9_noagg 1g deterministic 0m38.967s
37 sqlite q9_noagg 1g classic Timeout
38 sqlite q9_noagg 1g partition Timeout
39 sqlite q9_noagg 1g inline Timeout
40 sqlite q9_noagg 1g hybrid 1m9.280s
41 oracle q1_noagg 100m deterministic 0m19.716s
42 oracle q1_noagg 100m classic 0m23.760s
43 oracle q1_noagg 100m partition 0m21.517s
44 oracle q1_noagg 100m inline 0m20.570s
45 oracle q1_noagg 100m hybrid 0m21.685s
46 oracle q3_noagg 100m deterministic 0m1.887s
47 oracle q3_noagg 100m classic Timeout
48 oracle q3_noagg 100m partition Timeout
49 oracle q3_noagg 100m inline 0m2.831s
50 oracle q3_noagg 100m hybrid 0m2.482s
51 oracle q5_noagg 100m deterministic 0m2.165s
52 oracle q5_noagg 100m classic Timeout
53 oracle q5_noagg 100m partition Timeout
54 oracle q5_noagg 100m inline 0m3.738s
55 oracle q5_noagg 100m hybrid 0m5.722s
56 oracle q9_noagg 100m deterministic 0m3.883s
57 oracle q9_noagg 100m classic Timeout
58 oracle q9_noagg 100m partition Timeout
59 oracle q9_noagg 100m inline 8m0.466s
60 oracle q9_noagg 100m hybrid 0m10.610s
61 oracle q1_noagg 1g deterministic 3m29.131s
62 oracle q1_noagg 1g classic 3m32.163s
63 oracle q1_noagg 1g partition 3m45.280s
64 oracle q1_noagg 1g inline 3m39.893s
65 oracle q1_noagg 1g hybrid 3m23.962s
66 oracle q3_noagg 1g deterministic 0m5.437s
67 oracle q3_noagg 1g classic Timeout
68 oracle q3_noagg 1g partition Timeout
69 oracle q3_noagg 1g inline 0m3.738s
70 oracle q3_noagg 1g hybrid 0m5.722s
71 oracle q5_noagg 1g deterministic 0m5.092s
72 oracle q5_noagg 1g classic Timeout
73 oracle q5_noagg 1g partition Timeout
74 oracle q5_noagg 1g inline 0m22.339s
75 oracle q5_noagg 1g hybrid Timeout
76 oracle q9_noagg 1g deterministic 0m28.182s
77 oracle q9_noagg 1g classic Timeout
78 oracle q9_noagg 1g partition Timeout
79 oracle q9_noagg 1g inline Timeout
80 oracle q9_noagg 1g hybrid 5m8.406s

View file

@ -0,0 +1,213 @@
$plot_terminal = "aqua"
$plot_suffix = nil;
$plot_auto_open = false;
$current_plot = nil;
def plot_output(output, settings = {})
case output
when :aqua then $plot_terminal = "aqua"; $plot_suffix = nil;
when :pdf then $plot_terminal = "pdf"; $plot_suffix = ".pdf"
when :png then $plot_terminal = "png"; $plot_suffix = ".png"
end
$plot_terminal_opts = settings
end
def plot_terminal(setting_overrides = {})
settings = $plot_terminal_opts.merge setting_overrides
$plot_terminal+(
if settings.size < 1 then "" else
" " + settings.to_a.flatten.join(" ")
end
)
end
$pretty_styles = [
{ :lt => "rgb \"#A00000\"",
# :fs => "rgb \"#A00000\"",
:lw => 2,
:pt => 1
},
{ :lt => "rgb \"#00A000\"",
# :fs => "rgb \"#00A000\"",
:lw => 2,
:pt => 6
},
{ :lt => "rgb \"#5060D0\"",
# :fs => "rgb \"#5060D0\"",
:lw => 2,
:pt => 2
},
{ :lt => "rgb \"#F25900\"",
# :fs => "rgb \"#F25900\"",
:lw => 2,
:pt => 9
}
];
def pretty_style(idx, opts = {})
opts = opts.clone;
$pretty_styles[idx].each { |k, v| opts[k] = v unless opts.has_key? k }
opts.map { |kv| kv.to_a.join(" ") unless kv[1].nil? }.compact.join(" ")
end
def pretty_plot(plot, opts = {})
# plot based on Brighten Godfrey's blog post:
# http://youinfinitesnake.blogspot.com/2011/02/attractive-scientific-plots-with.html
plot.terminal [
"pdf",
"font \"#{opts.fetch(:fontface, "Times-Roman")},#{opts.fetch(:fontsize, 10)}\"",
"linewidth #{opts.fetch(:linewidth, 4)} rounded",
"fontscale #{opts.fetch(:fontscale, 1.0)}",
"size #{opts.fetch(:sizex, 5)}in,#{opts.fetch(:sizey, 3)}in"
].join(" ")
# Line style for axes
plot.style "line 80 lc #{opts.fetch(:bordercolor, "rgb \"#808080\"")}"
# Line style for grid
plot.style "line 81 lt 0" # dashed
plot.style "line 81 lc #{opts.fetch(:gridcolor, "rgb \"#808080\"")}" # grey
plot.grid "back linestyle 81"
border_groups =
opts.fetch(:border, [:left, :bottom]).map do |b|
case b
when :bottom then 1
when :left then 2
when :top then 4
when :right then 8
when :all then 1+2+4+8
else raise "Invalid border type : #{b}"
end
end.sum
plot.border "#{border_groups} back linestyle 80" # Remove border on top and right. These
# borders are useless and make it harder
# to see plotted lines near the border.
# Also, put it in grey; no need for so much emphasis on a border.
plot.xtics "nomirror"
plot.ytics "nomirror"
if(opts.fetch(:logx, false)) then
plot.logscal "x"
plot.mxtics "10" # Makes logscale look good.
end
if(opts.fetch(:logy, false)) then
plot.logscal "y"
plot.mytics "10" # Makes logscale look good.
end
# Line styles: try to pick pleasing colors, rather
# than strictly primary colors or hard-to-see colors
# like gnuplot's default yellow. Make the lines thick
# so they're easy to see in small plots in papers.
$pretty_styles.each_index { |x| plot.style "line #{x+1} #{pretty_style(x)}" }
plot.key "bottom right"
end
def auto_open_plots(new_val = true)
$plot_auto_open = new_val;
end
def row_data(data)
$current_plot.data << Gnuplot::DataSet.new(data.unzip) { |ds| yield ds }
end
def plot(args = {})
task(args) do
task_name = case args
when Hash then args.keys[0]
when Symbol,String then args.to_s
end
Gnuplot.open do |gp|
Gnuplot::Plot.new(gp) do |plot|
$current_plot = plot;
plot.terminal plot_terminal
if $plot_suffix and task_name then
plot.output "#{task_name}#{$plot_suffix}"
end
yield plot;
end
end
if $plot_auto_open and [".pdf", ".png"].include? $plot_suffix
system("open #{task_name}#{$plot_suffix}")
end
end
end
def line_plot(args = {})
plot(args) do |plot|
data_elements = yield(plot)
data_elements = { :data => data_elements } unless data_elements.is_a? Hash;
data = data_elements[:data].unzip;
xaxis = data_elements.fetch(:xaxis) { data.shift };
keys = data_elements.fetch(:keys) { data.map { nil; } }
withs = data_elements.fetch(:with, "linespoints");
withs = data.map { withs } unless withs.is_a? Array;
raise "Missing data!" if data.nil?;
raise "Missing X Axis!" if xaxis.nil?;
data.zip(keys, withs).each do |line, key, with|
plot.data << Gnuplot::DataSet.new([xaxis, line]) do |ds|
ds.title = key unless key.nil?
ds.with = with unless with.nil?
end
end
end
end
def draw_clustered_bar_plot plot, args = {}
data = args.fetch(:data).unzip;
base_offset = args.fetch(:base_offset, 0);
interbar_offset = args.fetch(:interbar_offset, 18);
intergroup_offset = args.fetch(:intergroup_offset, interbar_offset);
margins = args.fetch(:margins, intergroup_offset);
bar_width = args.fetch(:bar_width, 10);
tic_commands = args.fetch(:tic_commands, "");
label_offset = args.fetch(:label_offset, 0);
box_style = args.fetch(:box_style,
lambda { |i| "boxes fill pattern #{i}" });
plot.grid "noxtics"
group_offset = base_offset + margins
group_size = interbar_offset * data.length + intergroup_offset;
plot.boxwidth bar_width.to_s;
pattern = 0;
data.zip(args[:dataset_labels]).each do |dataset, dataset_title|
offset = group_offset - group_size;
group_offset += interbar_offset;
indices = dataset.map { |i| offset += group_size; }
plot.data << Gnuplot::DataSet.new([indices,dataset]) do |ds|
ds.title = dataset_title
ds.with = box_style.call(pattern += 1);
end
end
label_offset += (group_size+intergroup_offset-margins)/2
group_offset = base_offset - label_offset;
plot.xtics "(#{args[:group_labels].map do |label|
"\"#{label}\" #{group_offset += group_size}";
end.join(", ")}) scale 0 #{tic_commands}";
plot.xrange "[-10:#{group_offset+label_offset+margins-intergroup_offset}]"
end
def draw_bar_plot plot, args
plot.key "off"
args = args.clone
args[:data] = args[:data].map {|d| [d]}
args[:dataset_labels] = [""];
args[:group_labels] = args[:labels];
draw_clustered_bar_plot plot, args
end

View file

@ -0,0 +1,394 @@
class Tokenizer
def initialize(string, token, input_source = nil)
@tokens = string.scan(token);
@last = nil;
@input_source = input_source;
@string = string;
end
def scan
while @tokens.size > 0
if !(yield @tokens.shift) then break; end
end
end
def peek
if @tokens.size > 0 then @tokens[0]
else nil; end
end
def next
@last =
if @tokens.size > 0 then @tokens.shift
else nil; end
end
def last
@last;
end
def more?
@tokens.size > 0;
end
def flatten
@tokens = @tokens.flatten;
end
def assert_next(token, errstr = nil)
case token
when String then raise_error(errstr || "Expected '#{token}' but found '#{last}'") unless self.next == token
when Array then raise_error(errstr || "Expected '#{token.join("','")}' but found '#{last}'") unless token.include? self.next;
end
self.last;
end
def raise_error(errstr);
errstr = "#{errstr} (line #{@input_source.lineno})" if @input_source;
errstr = "#{errstr} (#{@string})" unless @input_source;
raise "Parse Error: #{errstr}";
end
def tokens_up_to(token)
ret = Array.new;
while (more? && (self.next != token))
ret.push(last);
end
ret;
end
end
class Array
def map_index
(0...length).to_a.map { |i| yield(i, self[i]) }
end
def to_h
ret = Hash.new;
each { |k,v| ret[k] = v; }
return ret;
end
def unzip
ret = Array.new;
each_index do |i|
ret.push Array.new(i) while ret.length < self[i].length
ret.each_index do |j|
ret[j][i] = self[i][j]
end
end
return ret;
end
def count
size
end
def sum
ret = 0;
each { |item| ret += item }
return ret;
end
def avg
sum.to_f / length.to_f
end
def rms_avg
Math.sqrt(map { |x| x.to_f * x.to_f }.avg)
end
def stddev
Math.sqrt((avg ** 2 - (map{|i| i.to_f ** 2}.avg)).abs)
end
def reduce(&reducer)
ret = Hash.new;
each do |k,v|
ret[k] = Array.new unless ret.has_key? k;
ret[k].push(v);
end
if reducer.nil? then ret
else
ret.to_a.collect do |k,vs|
[ k, reducer.call(k, vs) ]
end.to_h
end
end
# Round-robin partition into K arrays
def subdivide(k)
cnt = 0;
ret = (0...k).map {|i| Array.new };
each { |i| ret[cnt % k].push i; cnt += 1; };
ret;
end
# Inorder partition into groups of K elements
def partition(k)
(0...(size / k.to_f).ceil).map do |i|
self[k*i...[k*(i+1), size].min]
end
end
def zip_members
self[0].zip(*(self[1..-1]))
end
def grep(pattern, &block)
ret = [];
if block.nil?
then each { |l| ret.push(l) if pattern =~ l; }
else each { |l| match = pattern.match(l);
ret.push(block.call(match)) if match; }
end
ret
end
def window(window_size = 10, &block)
if length <= window_size then
if block.nil? then return [self.clone];
else return [block.call(self)];
end
else
ret = Array.new;
w = Array.new;
each do |item|
w.push(item);
w.shift if w.length > window_size;
if w.length >= window_size then
ret.push(if block.nil? then [w.clone] else block.call(w) end)
end
end
ret
end
end
def fold(accum = nil)
each { |i| accum = yield accum, i }
accum
end
def pick_samples_evenly(num_samples)
return self if(self.length <= num_samples);
keep_steps = (self.length / num_samples).to_i
step = 0;
self.delete_if { step += 1; (step % keep_step) == 0 }
end
def to_table(headers = nil)
row_sizes =
((headers.nil? ? [] : [headers]) + self).
map { |row| row.map { |c| c.to_s.length } }.
unzip.
map { |col| col.compact.max }
( unless headers.nil? then
[ " " + headers.zip(row_sizes).map do |col, exp_size|
col + (if col.size < exp_size then
(" " * (exp_size - col.size))
else "" end)
end.join(" | "),
("-" * (row_sizes.sum + 2 + (row_sizes.length - 1) * 3))
]
else [] end +
map do |row|
" " + row.zip(row_sizes).map do |col, exp_size|
col = col.to_s
if col.size < exp_size
then col.center(exp_size)
else col
end
end.join(" | ")
end
).join("\n")
end
def for_all
each { |v| return false unless yield v }
true;
end
def each_prefix
each_index do |i|
yield self[0..i];
end
end
def select
map { |x| x if yield x }.compact
end
def cogroup
ret = Hash.new { |h,k| h[k] = [nil] * size }
each_index do |i|
self[i].each do |k, v|
ret[k][i] = v
end
end
ret
end
# Return every cnt'th element of the array.
def every(cnt, start = 0)
(0..(((size-1-start)/cnt).to_i)).map { |i| self[i*cnt+start] }
end
# Create batches of up to size cnt.
def batch(cnt)
(0..(((size-1)/cnt).to_i)).map { |i| self[(i*cnt)...((i+1)*cnt)] }
end
def flatmap
ret = []
each { |i| ret = ret + yield(i) }
ret
end
def project(*keys)
map { |x| x.project(*keys) }
end
def unique
last = nil
sort.
map { |c| last = c if c != last }.
# map { |c| p c }.
compact
end
def histogram(bin_width = 5)
min_val = (min - min % bin_width).to_i
max_val = (max - max % bin_width + bin_width).to_i
(min_val..max_val).to_a.every(bin_width).
map { |x| [x, 0] }.
to_h.
join(map { |x| (x.to_f / bin_width).to_i * bin_width }.
reduce { |k,v| v.count },
:left
).
map { |bin, cnt| [bin, cnt.compact.sum] }.
sort { |a, b| a[0] <=> b[0] }
end
def cumulative_sum
tot = 0;
map { |x| tot += x }
end
end
class Hash
def intersect(other)
keys.find_all { |k| other.has_key?(k) }
end
def bar_graph_dataset(bar = 0.5, set_sep = 1.0, bar_sep = 0.2)
curr_width = 0;
tics = collect do |human,data|
next_delta = data.length * bar + (data.length - 1) * bar_sep;
curr_width += next_delta + set_sep;
"\"#{human}\" #{curr_width - next_delta / 2}"
end
curr_width = 0;
points = values.collect do |data|
curr_width += set_sep - bar_sep
data.collect do |point|
curr_width += bar_sep + bar;
[curr_width - bar / 2, point]
end
end.unzip;
return ["(#{tics.join(', ')})" , points, "[0:#{curr_width+set_sep}]"];
end
def to_sorted_a
keys.sort.map do |k|
[k, self[k]]
end
end
def map_leaves(prefix = [])
keys.to_a.map do |k|
[ k,
if self[k].is_a? Hash
then self[k].map_leaves(prefix+[k]) { |ik,v| yield(ik, v) }
else yield(prefix+[k], v)
end
]
end.to_h
end
def project(*keys)
keys.map { |k| self[k] }
end
def join(h, outer = :no)
case outer
when :full then
keys + h.keys.find_all { |k| not has_key? k }
when :left then
keys
when :right then
h.keys
else
intersect(h)
end.
map { |k| [k, [self[k], h[k]]] }.to_h
end
end
class Float
def sig_figs(n)
if self == 0.0 then self
else
mult = (10.0 ** (Math.log10(self).ceil.to_f - n.to_i.to_f))
(self / mult).round * mult;
end
end
end
class IO
def tee_readlines
ret = [];
each { |l| yield l; ret.push l }
ret
end
def grep
map {|x| x if yield x}.compact
end
end
class Integer
def to_bytestring
return "-#{(-self).to_bytestring}" if self < 0;
depth = (Math.log(self/2) / (10.0 * Math.log(2))).to_i
scales = ["B", "KB", "MB", "GB", "PB", "EB"];
depth = scales.length-1 if depth >= scales.length;
"#{(self.to_f / (1024.0**(depth))).to_f.sig_figs(4)} #{scales[depth]}"
end
def d(die)
(0...self).map { rand(die)+1 }
end
end
class String
def pluralize(num)
if num == 1 then self
else self+"s"
end
end
end
class Dir
def Dir.in_dir(d)
old_d = Dir.getwd
Dir.chdir d
ret = yield
Dir.chdir old_d
ret
end
end

Binary file not shown.

View file

@ -0,0 +1,102 @@
$:.push "."
require "gnuplot"
require "util.rb"
require "plot.rb"
require "csvx.rb"
data = File.csv("data.csv", header: true)
plot_output :aqua
# $plot_auto_open = true
all_strategies =
[
"Mimir-Mat",
"Mimir-Inline",
"Mimir-Sample",
"Mimir-Partition",
"MCDB-Mimir",
"SQLite-Det",
"MayBMS-PGSQL",
"MayBMS-SQLite",
]
data.map { |r| [r["Query"].split(/-/)[0], r] }
.reduce
.each do |group, records|
plot group => ["data.csv", "Rakefile"] do |plot|
strategies =
all_strategies
.where { |s| records.index { |r| r["Strategy"] == s } }
queries =
records
.map { |r| r["Query"] }
.uniq.sort
lookup = records.map { |r| [[r["Strategy"], r["Query"]], r["Time"]] }.to_h
strategies
pretty_plot(plot, border: [:all])
plot.key "left top opaque"
plot.ylabel "Time (s)"
max_y =
records.map { |r| r["Time"] }
.where { |r| /[\-0-9.]+/ =~ r }
.map { |r| r.to_f }
.max
case group
when "TPCH"
max_y = 300
plot.key "center top outside maxcols 3 maxrows 2"
when "PDB"
max_y = 45
plot.key "center top outside maxcols 3 maxrows 3"
end
plot.yrange "[0:#{max_y}]"
labels = []
draw_clustered_bar_plot(plot,
data: (queries.map.with_index { |q,qi|
strategies.map.with_index { |s,si|
time = lookup[[s,q]]
x_pos = bar_plot_position(si, qi, strategies.length)+1
font = "Helvetica-Bold,6"
case time
when nil then
labels.push "'?? Missing ??' at #{x_pos},6 font '#{font}' rotate by 90"
when "TIMEOUT" then
labels.push "'TIME OUT' at #{x_pos},#{max_y * 0.80} font '#{font}' rotate by 90 front"
max_y * 1.2
when "UNSUPPORTED"
labels.push "'UNSUPPORTED' at #{x_pos},5 font '#{font}' rotate by 90 front tc ls #{si+1}"
1
else
time = time.to_f
if time > max_y
labels.push "'[ #{time.to_i}s ]' at #{x_pos},#{max_y * 0.80} font 'Helvetica-Bold,8' rotate by 90 front"
end
time
end
}
}),
dataset_labels: strategies,
group_labels: queries,
bar_width: 15
)
labels.each { |l| plot.label l }
(1...queries.length).each { |qi|
x_pos = bar_plot_position(0, qi, strategies.length) - 18
plot.arrow("from #{x_pos},0 to #{x_pos},#{max_y} nohead lc rgb \"#808080\"")
}
end
task :default => group
end

Binary file not shown.

View file

@ -0,0 +1,64 @@
class String
def from_csv(sep = /,/)
ret = [[]]
c = chars
quote = "\""
comma = ","
i = 0
expecting_quote = false
while i < c.length
if c[i] == quote
if ret[-1].empty? then expecting_quote = true
elsif c[i+1] == quote then i += 1; ret[-1].push(quote)
elsif expecting_quote and (c[i+1] == comma) then
ret.push([])
expecting_quote = false
i += 1
elsif expecting_quote and (c[i+1] == nil) then
expecting_quote = false
else
raise "Invalid CSV Line (misplaced quote at #{i}; #{c[i+1]}): #{self}"
end
elsif c[i] == comma
if expecting_quote
ret[-1].push(comma)
else
ret.push([])
expecting_quote = false
end
else
ret[-1].push(c[i])
end
i += 1
end
ret.map { |col| col.join}
end
end
class Array
def from_csv
self.map { |l| l.to_s.chomp.from_csv }
end
def to_csv(f)
File.open(f, "w+") { |f| each { |row| f.puts(row.join(',')) }}
end
end
class IO
def from_csv(args = {})
header = args.fetch(:header, false)
separator = args.fetch(:separator, /,/)
keys = readline.chomp.
sub(/ *$/, "").sub(/^ */,"").
from_csv(separator) if header;
map { |l| l.to_s.chomp.from_csv(separator) }.
map { |a| if header then keys.zip(a).to_h else a end }
end
end
class File
def File.csv(f, args = {})
File.open(f) {|io| io.from_csv(args) }
end
end

View file

@ -0,0 +1,57 @@
Strategy,Query,Time
Mimir-Mat,PDB-1,25.98781968615949
Mimir-Mat,PDB-2,20.71622445844114
Mimir-Mat,PDB-3,41.98619099296629
MayBMS-PGSQL,PDB-1,23.439012999999996
MayBMS-PGSQL,PDB-2,13.000651999999999
MayBMS-PGSQL,PDB-3,20.2954832
MayBMS-SQLite,PDB-1,22.1345477
MayBMS-SQLite,PDB-2,7.291376699999999
MayBMS-SQLite,PDB-3,29.1511957
Mimir-Inline,TPCH-1,16.040970255620778
Mimir-Inline,TPCH-3,19.171183695830404
Mimir-Inline,TPCH-5,43.3495686205104
Mimir-Inline,TPCH-9,98.61139780338854
Mimir-Mat,TPCH-1,33.623222251608965
Mimir-Mat,TPCH-3,4.8350385190919045
Mimir-Mat,TPCH-5,11.789478918723763
Mimir-Mat,TPCH-9,28.924315941147505
Mimir-Sample,TPCH-1,119.61607021316885
Mimir-Sample,TPCH-3,162.00108394436538
Mimir-Sample,TPCH-5,258.74168805666267
Mimir-Sample,TPCH-9,TIMEOUT
Mimir-Partition,TPCH-1,UNSUPPORTED
Mimir-Partition,TPCH-3,UNSUPPORTED
Mimir-Partition,TPCH-5,UNSUPPORTED
Mimir-Partition,TPCH-9,UNSUPPORTED
--MayBMS-PGSQL,TPCH-1,UNSUPPORTED
--MayBMS-PGSQL,TPCH-3,UNSUPPORTED
--MayBMS-PGSQL,TPCH-5,UNSUPPORTED
--MayBMS-PGSQL,TPCH-9,UNSUPPORTED
--MayBMS-SQLite,TPCH-1,UNSUPPORTED
--MayBMS-SQLite,TPCH-3,UNSUPPORTED
--MayBMS-SQLite,TPCH-5,UNSUPPORTED
--MayBMS-SQLite,TPCH-9,UNSUPPORTED
MCDB-Mimir,TPCH-1,14.65919488966465
MCDB-Mimir,TPCH-3,TIMEOUT
MCDB-Mimir,TPCH-5,TIMEOUT
MCDB-Mimir,TPCH-9,TIMEOUT
SQLite-Det,PDB-1,9.521
SQLite-Det,PDB-2,7.59
SQLite-Det,PDB-3,31.22
SQLite-Det,TPCH-1,19.561
SQLite-Det,TPCH-3,22.835
SQLite-Det,TPCH-5,33.308
SQLite-Det,TPCH-9,51.125
Mimir-Inline,PDB-1,TIMEOUT
Mimir-Inline,PDB-2,30.827455023303628
Mimir-Inline,PDB-3,TIMEOUT
Mimir-Sample,PDB-1,TIMEOUT
Mimir-Sample,PDB-2,242.5666234549135
Mimir-Sample,PDB-3,TIMEOUT
Mimir-Partition,PDB-1,TIMEOUT
Mimir-Partition,PDB-2,TIMEOUT
Mimir-Partition,PDB-3,TIMEOUT
MCDB-Mimir,PDB-1,TIMEOUT
MCDB-Mimir,PDB-2,TIMEOUT
MCDB-Mimir,PDB-3,TIMEOUT
1 Strategy Query Time
2 Mimir-Mat PDB-1 25.98781968615949
3 Mimir-Mat PDB-2 20.71622445844114
4 Mimir-Mat PDB-3 41.98619099296629
5 MayBMS-PGSQL PDB-1 23.439012999999996
6 MayBMS-PGSQL PDB-2 13.000651999999999
7 MayBMS-PGSQL PDB-3 20.2954832
8 MayBMS-SQLite PDB-1 22.1345477
9 MayBMS-SQLite PDB-2 7.291376699999999
10 MayBMS-SQLite PDB-3 29.1511957
11 Mimir-Inline TPCH-1 16.040970255620778
12 Mimir-Inline TPCH-3 19.171183695830404
13 Mimir-Inline TPCH-5 43.3495686205104
14 Mimir-Inline TPCH-9 98.61139780338854
15 Mimir-Mat TPCH-1 33.623222251608965
16 Mimir-Mat TPCH-3 4.8350385190919045
17 Mimir-Mat TPCH-5 11.789478918723763
18 Mimir-Mat TPCH-9 28.924315941147505
19 Mimir-Sample TPCH-1 119.61607021316885
20 Mimir-Sample TPCH-3 162.00108394436538
21 Mimir-Sample TPCH-5 258.74168805666267
22 Mimir-Sample TPCH-9 TIMEOUT
23 Mimir-Partition TPCH-1 UNSUPPORTED
24 Mimir-Partition TPCH-3 UNSUPPORTED
25 Mimir-Partition TPCH-5 UNSUPPORTED
26 Mimir-Partition TPCH-9 UNSUPPORTED
27 --MayBMS-PGSQL TPCH-1 UNSUPPORTED
28 --MayBMS-PGSQL TPCH-3 UNSUPPORTED
29 --MayBMS-PGSQL TPCH-5 UNSUPPORTED
30 --MayBMS-PGSQL TPCH-9 UNSUPPORTED
31 --MayBMS-SQLite TPCH-1 UNSUPPORTED
32 --MayBMS-SQLite TPCH-3 UNSUPPORTED
33 --MayBMS-SQLite TPCH-5 UNSUPPORTED
34 --MayBMS-SQLite TPCH-9 UNSUPPORTED
35 MCDB-Mimir TPCH-1 14.65919488966465
36 MCDB-Mimir TPCH-3 TIMEOUT
37 MCDB-Mimir TPCH-5 TIMEOUT
38 MCDB-Mimir TPCH-9 TIMEOUT
39 SQLite-Det PDB-1 9.521
40 SQLite-Det PDB-2 7.59
41 SQLite-Det PDB-3 31.22
42 SQLite-Det TPCH-1 19.561
43 SQLite-Det TPCH-3 22.835
44 SQLite-Det TPCH-5 33.308
45 SQLite-Det TPCH-9 51.125
46 Mimir-Inline PDB-1 TIMEOUT
47 Mimir-Inline PDB-2 30.827455023303628
48 Mimir-Inline PDB-3 TIMEOUT
49 Mimir-Sample PDB-1 TIMEOUT
50 Mimir-Sample PDB-2 242.5666234549135
51 Mimir-Sample PDB-3 TIMEOUT
52 Mimir-Partition PDB-1 TIMEOUT
53 Mimir-Partition PDB-2 TIMEOUT
54 Mimir-Partition PDB-3 TIMEOUT
55 MCDB-Mimir PDB-1 TIMEOUT
56 MCDB-Mimir PDB-2 TIMEOUT
57 MCDB-Mimir PDB-3 TIMEOUT

View file

@ -0,0 +1,227 @@
$plot_terminal = "aqua"
$plot_suffix = nil;
$plot_auto_open = false;
$current_plot = nil;
def plot_output(output, settings = {})
case output
when :aqua then $plot_terminal = "aqua"; $plot_suffix = nil;
when :pdf then $plot_terminal = "pdf"; $plot_suffix = ".pdf"
when :png then $plot_terminal = "png"; $plot_suffix = ".png"
end
$plot_terminal_opts = settings
end
def plot_terminal(plot, settings)
settings = $plot_terminal_opts + settings
$plot_terminal+(
if settings.size < 1 then "" else
" " + settings.to_a.flatten.join(" ")
end
)
end
$pretty_styles = [
{ :lt => "rgb \"#A00000\"",
:lw => 2,
:pt => 1
},
{ :lt => "rgb \"#00A000\"",
:lw => 2,
:pt => 6
},
{ :lt => "rgb \"#5060D0\"",
:lw => 2,
:pt => 2
},
{ :lt => "rgb \"#F25900\"",
:lw => 2,
:pt => 9
}
];
def pretty_style(idx, opts = {})
opts = opts.clone;
$pretty_styles[idx].each { |k, v| opts[k] = v unless opts.has_key? k }
opts.map { |kv| kv.to_a.join(" ") unless kv[1].nil? }.compact.join(" ")
end
def pretty_plot(plot, opts = {})
# plot based on Brighten Godfrey's blog post:
# http://youinfinitesnake.blogspot.com/2011/02/attractive-scientific-plots-with.html
plot.terminal [
"pdf",
"font \"#{opts.fetch(:fontface, "Times-Roman")},#{opts.fetch(:fontsize, 10)}\"",
"linewidth #{opts.fetch(:linewidth, 4)} rounded",
"fontscale #{opts.fetch(:fontscale, 1.0)}",
"size #{opts.fetch(:sizex, 5)}in,#{opts.fetch(:sizey, 3)}in"
].join(" ")
# Line style for axes
plot.style "line 80 lc #{opts.fetch(:bordercolor, "rgb \"#808080\"")}"
# Line style for grid
plot.style "line 81 lt 0" # dashed
plot.style "line 81 lc #{opts.fetch(:gridcolor, "rgb \"#808080\"")}" # grey
plot.grid "back linestyle 81"
border_groups =
opts.fetch(:border, [:left, :bottom]).map do |b|
case b
when :bottom then 1
when :left then 2
when :top then 4
when :right then 8
when :all then 1+2+4+8
else raise "Invalid border type : #{b}"
end
end.sum
plot.border "#{border_groups} back linestyle 80" # Remove border on top and right. These
# borders are useless and make it harder
# to see plotted lines near the border.
# Also, put it in grey; no need for so much emphasis on a border.
plot.xtics "nomirror"
plot.ytics "nomirror"
if(opts.fetch(:logx, false)) then
plot.logscal "x"
plot.mxtics "10" # Makes logscale look good.
end
if(opts.fetch(:logy, false)) then
plot.logscal "y"
plot.mytics "10" # Makes logscale look good.
end
# Line styles: try to pick pleasing colors, rather
# than strictly primary colors or hard-to-see colors
# like gnuplot's default yellow. Make the lines thick
# so they're easy to see in small plots in papers.
$pretty_styles.each_index { |x| plot.style "line #{x+1} #{pretty_style(x)}" }
plot.key "bottom right"
end
def auto_open_plots(new_val = true)
$plot_auto_open = new_val;
end
def row_data(data)
$current_plot.data << Gnuplot::DataSet.new(data.unzip) { |ds| yield ds }
end
def plot(args = {})
task(args) do
task_name = case args
when Hash then args.keys[0]
when Symbol,String then args.to_s
end
Gnuplot.open do |gp|
Gnuplot::Plot.new(gp) do |plot|
$current_plot = plot;
$plot_terminal
plot.terminal $plot_terminal
if $plot_suffix and task_name then
plot.output "#{task_name}#{$plot_suffix}"
end
yield plot;
end
end
if $plot_auto_open and [".pdf", ".png"].include? $plot_suffix
system("open #{task_name}#{$plot_suffix}")
end
end
end
def line_plot(args = {})
plot(args) do |plot|
data_elements = yield(plot)
data_elements = { :data => data_elements } unless data_elements.is_a? Hash;
data = data_elements[:data].unzip;
xaxis = data_elements.fetch(:xaxis) { data.shift };
keys = data_elements.fetch(:keys) { data.map { nil; } }
withs = data_elements.fetch(:with, "linespoints");
withs = data.map { withs } unless withs.is_a? Array;
raise "Missing data!" if data.nil?;
raise "Missing X Axis!" if xaxis.nil?;
data.zip(keys, withs).each do |line, key, with|
plot.data << Gnuplot::DataSet.new([xaxis, line]) do |ds|
ds.title = key unless key.nil?
ds.with = with unless with.nil?
end
end
end
end
def bar_plot_position(group, element, num_elements, args = {})
base_offset = args.fetch(:base_offset, 0);
interbar_offset = args.fetch(:interbar_offset, 18);
intergroup_offset = args.fetch(:intergroup_offset, interbar_offset);
margins = args.fetch(:margins, intergroup_offset);
bar_width = args.fetch(:bar_width, 10);
group_offset = base_offset + margins
group_size = interbar_offset * num_elements + intergroup_offset;
position_relative_to_element = (interbar_offset * (group))
position_of_group = (group_size * element)
group_offset + position_relative_to_element + position_of_group
end
def draw_clustered_bar_plot plot, args = {}
data = args.fetch(:data).unzip;
base_offset = args.fetch(:base_offset, 0);
interbar_offset = args.fetch(:interbar_offset, 18);
intergroup_offset = args.fetch(:intergroup_offset, interbar_offset);
margins = args.fetch(:margins, intergroup_offset);
bar_width = args.fetch(:bar_width, 10);
tic_commands = args.fetch(:tic_commands, "");
label_offset = args.fetch(:label_offset, 0);
box_style = args.fetch(:box_style,
lambda { |i| "boxes fill pattern #{i}" });
plot.grid "noxtics"
group_offset = base_offset + margins
group_size = interbar_offset * data.length + intergroup_offset;
plot.boxwidth bar_width.to_s;
pattern = 0;
data.zip(args[:dataset_labels]).each do |dataset, dataset_title|
offset = group_offset - group_size;
group_offset += interbar_offset;
indices = dataset.map { |i| offset += group_size; }
plot.data << Gnuplot::DataSet.new([indices,dataset]) do |ds|
ds.title = dataset_title
ds.with = box_style.call(pattern += 1);
end
end
label_offset += (group_size+intergroup_offset-margins)/2
group_offset = base_offset - label_offset;
plot.xtics "(#{args[:group_labels].map do |label|
"\"#{label}\" #{group_offset += group_size}";
end.join(", ")}) scale 0 #{tic_commands}";
plot.xrange "[-10:#{group_offset+label_offset+margins-intergroup_offset}]"
end
def draw_bar_plot plot, args
plot.key "off"
args = args.clone
args[:data] = args[:data].map {|d| [d]}
args[:dataset_labels] = [""];
args[:group_labels] = args[:labels];
draw_clustered_bar_plot plot, args
end

View file

@ -0,0 +1,471 @@
class Tokenizer
def initialize(string, token, input_source = nil)
@tokens = string.scan(token);
@last = nil;
@input_source = input_source;
@string = string;
end
def scan
while @tokens.size > 0
if !(yield @tokens.shift) then break; end
end
end
def peek
if @tokens.size > 0 then @tokens[0]
else nil; end
end
def next
@last =
if @tokens.size > 0 then @tokens.shift
else nil; end
end
def last
@last;
end
def more?
@tokens.size > 0;
end
def flatten
@tokens = @tokens.flatten;
end
def assert_next(token, errstr = nil)
case token
when String then raise_error(errstr || "Expected '#{token}' but found '#{last}'") unless self.next == token
when Array then raise_error(errstr || "Expected '#{token.join("','")}' but found '#{last}'") unless token.include? self.next;
end
self.last;
end
def raise_error(errstr);
errstr = "#{errstr} (line #{@input_source.lineno})" if @input_source;
errstr = "#{errstr} (#{@string})" unless @input_source;
raise "Parse Error: #{errstr}";
end
def tokens_up_to(token)
ret = Array.new;
while (more? && (self.next != token))
ret.push(last);
end
ret;
end
end
class Array
def map_index
(0...length).to_a.map { |i| yield(i, self[i]) }
end
def to_h
ret = Hash.new;
each { |k,v| ret[k] = v; }
return ret;
end
def unzip
ret = Array.new;
each_index do |i|
ret.push Array.new(i) while ret.length < self[i].length
ret.each_index do |j|
ret[j][i] = self[i][j]
end
end
return ret;
end
def count
size
end
def sum
ret = 0;
each { |item| ret += item }
return ret;
end
def avg
sum.to_f / length.to_f
end
def prod
ret = 1;
each { |item| ret *= item }
return ret;
end
def rms_avg
Math.sqrt(map { |x| x.to_f ** 2 }.avg)
end
def rms_err
Math.sqrt(map { |x,y| (x.to_f - y.to_f) ** 2 }.avg)
end
def stddev
Math.sqrt((avg ** 2 - (map{|i| i.to_f ** 2}.avg)).abs)
end
def reduce(&reducer)
ret = Hash.new;
each do |k,v|
ret[k] = Array.new unless ret.has_key? k;
ret[k].push(v);
end
if reducer.nil? then ret
else
ret.to_a.collect do |k,vs|
[ k, reducer.call(k, vs) ]
end.to_h
end
end
# Round-robin partition into K arrays
def subdivide(k)
cnt = 0;
ret = (0...k).map {|i| Array.new };
each { |i| ret[cnt % k].push i; cnt += 1; };
ret;
end
# Inorder partition into groups of K elements
def take_groups(k)
(0...(size / k.to_f).ceil).map do |i|
self[k*i...[k*(i+1), size].min]
end
end
def zip_members
self[0].zip(*(self[1..-1]))
end
def grep(pattern, &block)
ret = [];
if block.nil?
then each { |l| ret.push(l) if pattern =~ l; }
else each { |l| match = pattern.match(l);
ret.push(block.call(match)) if match; }
end
ret
end
def window(window_size = 10, &block)
if length <= window_size then
if block.nil? then return [self.clone];
else return [block.call(self)];
end
else
ret = Array.new;
w = Array.new;
each do |item|
w.push(item);
w.shift if w.length > window_size;
if w.length >= window_size then
ret.push(if block.nil? then [w.clone] else block.call(w) end)
end
end
ret
end
end
def fold(accum = nil)
each { |i| accum = yield accum, i }
accum
end
def pick_samples_evenly(num_samples)
return self if(self.length <= num_samples);
keep_steps = (self.length / num_samples).to_i
step = 0;
self.delete_if { step += 1; (step % keep_step) == 0 }
end
def to_table(headers = nil)
row_sizes =
((headers.nil? ? [] : [headers]) + self).
map { |row| row.map { |c| c.to_s.length } }.
unzip.
map { |col| col.compact.max }
( unless headers.nil? then
[ " " + headers.zip(row_sizes).map do |col, exp_size|
col + (if col.size < exp_size then
(" " * (exp_size - col.size))
else "" end)
end.join(" | "),
("-" * (row_sizes.sum + 2 + (row_sizes.length - 1) * 3))
]
else [] end +
map do |row|
" " + row.zip(row_sizes).map do |col, exp_size|
col = col.to_s
if col.size < exp_size
then col.center(exp_size)
else col
end
end.join(" | ")
end
).join("\n")
end
def tabulate_schemaless_records
keys = map {|r| r.keys}.flatten.unique.sort
[ keys ,
map {|r| keys.map {|k| r[k] }}
]
end
def for_all
each { |v| return false unless yield v }
true;
end
def each_prefix
each_index do |i|
yield self[0..i];
end
end
def select
map { |x| x if yield x }.compact
end
def cogroup
ret = Hash.new { |h,k| h[k] = [nil] * size }
each_index do |i|
self[i].each do |k, v|
ret[k][i] = v
end
end
ret
end
# Return every cnt'th element of the array.
def every(cnt, start = 0)
(0..(((size-1-start)/cnt).to_i)).map { |i| self[i*cnt+start] }
end
# Create batches of up to size cnt.
def batch(cnt)
(0..(((size-1)/cnt).to_i)).map { |i| self[(i*cnt)...((i+1)*cnt)] }
end
def flatmap
ret = []
each { |i| ret = ret + yield(i) }
ret
end
def project(*keys)
map { |x| x.project(*keys) }
end
def unique
last = nil
sort.
map { |c| last = c if c != last }.
# map { |c| p c }.
compact
end
def histogram(bin_width = 5)
min_val = (min - min % bin_width).to_i
max_val = (max - max % bin_width + bin_width).to_i
(min_val..max_val).to_a.every(bin_width).
map { |x| [x, 0] }.
to_h.
join(map { |x| (x.to_f / bin_width).to_i * bin_width }.
reduce { |k,v| v.count },
:left
).
map { |bin, cnt| [bin, cnt.compact.sum] }.
sort { |a, b| a[0] <=> b[0] }
end
def cumulative_sum
tot = 0;
map { |x| tot += x }
end
def splice(val, idx)
return [val] + self if idx <= 0
return self + [val] if idx >= length
return self[0...idx] + [val] + self[idx..-1]
end
def all_sorts
return [[]] if empty?
return [self] if length == 1
hd = self[0]
self[1..-1].all_sorts.map do |rest|
(0..rest.length).map { |i| rest.splice(hd, i) }
end.flatten(1)
end
def merge(other, args = {})
if args.has_key?(:eq)
args[:eq] = [args[:eq], args[:eq]] unless args[:eq].is_a? Array
a, b = args[:eq]
idx = Hash.new { |h,k| h[k] = [] }
self.each {|i| idx[i[a]].push i }
other.map {|j| idx[i[b]].map { |i| i + j } }.flatten(1)
else
self.map {|i|
other.map {|j|
i + j if yield i,j
}.compact
}.flatten(1)
end
end
def where
map {|i| i if yield i }.compact
end
end
class Hash
def intersect(other)
keys.find_all { |k| other.has_key?(k) }
end
def bar_graph_dataset(bar = 0.5, set_sep = 1.0, bar_sep = 0.2)
curr_width = 0;
tics = collect do |human,data|
next_delta = data.length * bar + (data.length - 1) * bar_sep;
curr_width += next_delta + set_sep;
"\"#{human}\" #{curr_width - next_delta / 2}"
end
curr_width = 0;
points = values.collect do |data|
curr_width += set_sep - bar_sep
data.collect do |point|
curr_width += bar_sep + bar;
[curr_width - bar / 2, point]
end
end.unzip;
return ["(#{tics.join(', ')})" , points, "[0:#{curr_width+set_sep}]"];
end
def to_sorted_a
keys.sort.map do |k|
[k, self[k]]
end
end
def map_leaves(prefix = [])
keys.to_a.map do |k|
[ k,
if self[k].is_a? Hash
then self[k].map_leaves(prefix+[k]) { |ik,v| yield(ik, v) }
else yield(prefix+[k], v)
end
]
end.to_h
end
def project(*keys)
keys.map { |k| self[k] }
end
def join(h, outer = :no)
case outer
when :full then
keys + h.keys.find_all { |k| not has_key? k }
when :left then
keys
when :right then
h.keys
else
intersect(h)
end.
map { |k| [k, [self[k], h[k]]] }.to_h
end
def flatten_tree(sep = nil, prefix = nil)
map { |k,v|
unless prefix.nil?
k = sep + k.to_s unless sep.nil?
k = prefix.to_s + k.to_s
end
case v
when Hash then v.flatten_tree(sep, k).to_a
else [ [k.to_sym, v] ]
end
}.flatten(1).to_h
end
end
class Float
def sig_figs(n)
if self == 0.0 then self
else
mult = (10.0 ** (Math.log10(self).ceil.to_f - n.to_i.to_f))
(self / mult).round * mult;
end
end
end
class IO
def tee_readlines
ret = [];
each { |l| yield l; ret.push l }
ret
end
def grep
map {|x| x if yield x}.compact
end
end
class File
def File.stream(inFile, outFile, mode = "w+")
File.open(inFile) do |inHandle|
File.open(outFile, mode) do |outHandle|
yield(inHandle, outHandle)
end
end
end
end
class Integer
def to_bytestring
return "-#{(-self).to_bytestring}" if self < 0;
depth = (Math.log(self/2) / (10.0 * Math.log(2))).to_i
scales = ["B", "KB", "MB", "GB", "PB", "EB"];
depth = scales.length-1 if depth >= scales.length;
"#{(self.to_f / (1024.0**(depth))).to_f.sig_figs(4)} #{scales[depth]}"
end
def d(die)
(0...self).map { rand(die)+1 }
end
end
class String
def pluralize(num)
if num == 1 then self
else self+"s"
end
end
end
class Dir
def Dir.in_dir(d)
old_d = Dir.getwd
Dir.chdir d
ret = yield
Dir.chdir old_d
ret
end
end

File diff suppressed because it is too large Load diff