pdbench/TPCH-Trio/tpch_horizontal_partitioner.py

76 lines
1.9 KiB
Python

#!/usr/bin/env python
import sys
import re
import random
if len(sys.argv) != 7:
print 'invalid arg'
print 'Usage: ' + sys.argv[0] + ' filename #lines #partitions #attributes #maxalts tablename'
sys.exit(1)
filename=sys.argv[1]
lines=int(sys.argv[2])
partitions=int(sys.argv[3])
attrs=int(sys.argv[4])
maxalts=int(sys.argv[5])
tablename=sys.argv[6]
f=open(filename, 'r')
lineno = 0
part_no = 1
part_lines = 0
next_alts = random.randint(1, maxalts)
xtuple=[]
while 1:
line = f.readline()
if line:
line = re.sub(';','', line)
line = re.split('\|', line)
line.pop()
xtuple.append(line)
if (not line and len(xtuple) > 0) or len(xtuple) == next_alts:
str = "insert into %s%s values[(" % (tablename, (partitions > 0 and str(part_no) or ""))
i = 0
for alt in xtuple:
j = 0
for attr in alt:
if re.match("^[+-]?\d*\.?\d+?$", attr):
str += attr
else:
str += "\'" + attr + "\'"
if j == attrs - 1 or j == len(alt) - 1:
str += "):%s" % (len(xtuple) == 1 and 1 or (1.0 / len(xtuple)))
else:
str += ','
j += 1
if j == len(alt):
break
if i < len(xtuple) - 1:
str += "|("
else:
str += "];"
i += 1
print str
xtuple=[]
next_alts = random.randint(1, maxalts)
if partitions > 0 and lines > 0 and part_lines >= lines / partitions:
part_no += 1
part_lines = 0
if not line:
break
lineno += 1
part_lines += 1
if part_lines % 1000 == 0:
print "commit;"
print "commit;"