Initial import

master
Oliver Kennedy 2016-10-06 19:10:22 -04:00
commit 7f0c4cfbb7
101 changed files with 29392 additions and 0 deletions

25
MayBMS-tpch/README.txt Normal file
View File

@ -0,0 +1,25 @@
This is the distribution of a modified version of the TPC-H data generator
developed by the MayBMS team (http://maybms.sourceforge.net) which was used
in the experiments of the paper
Fast and Simple Relational Processing of Uncertain Data.
L. Antova, T. Jansen, C. Koch, D. Olteanu. Proc. ICDE 2008.
This directory contains the following files:
* uncertain-tpch/ -- the source code of the data generator.
* query1.sql, query2.sql, query3.sql -- example queries used in our
experiments.
* urel-attr-tuple.sql -- this SQL script joins the U-relations generated
by the data generator together, i.e., it maps an attribute-level
representation achieved by vertical decompositioning to a tuple-level
representation.
* urel-ULDB.sql -- this SQL script maps tuple-level U-relations to Trio
ULDBs.

2
MayBMS-tpch/query1.sql Normal file
View File

@ -0,0 +1,2 @@
create table res as select l_orderkey,o_orderdate,o_shippriority from (select U1.C1 as C1,U1.W1 as W1,U1.C2 as C2, U1.W2 as W2,U1.C3 as C3, U1.W3 as W3,U1.C4 as C4, U1.W4 as W4,U1.C5 as C5, U1.W5 as W5,U1.C6 as C6, U1.W6 as W6,U1.C7 as C7, U1.W7 as W7, U2.C1 as C8,U2.W1 as W8,l_orderkey,o_orderdate,o_shippriority,lineitem_tid, customer_tid, orders_tid from (select U.C1 as C1,U.W1 as W1,U.C2 as C2, U.W2 as W2,U.C3 as C3, U.W3 as W3,U.C4 as C4, U.W4 as W4,U.C5 as C5, U.W5 as W5,U.C6 as C6, U.W6 as W6,U.C7 as C7, U.W7 as W7,lineitem_tid, customer_tid, orders_tid,l_orderkey,o_orderdate from (select U1.C1 as C1,U1.W1 as W1,U1.C2 as C2, U1.W2 as W2, U2.C1 as C3,U2.W1 as W3,U2.C2 as C4, U2.W2 as W4,U2.C3 as C5, U2.W3 as W5,U2.C4 as C6, U2.W4 as W6,U2.C5 as C7, U2.W5 as W7,l_orderkey,o_orderdate,o_orderkey,lineitem_tid, customer_tid, orders_tid from (select U1.C1 as C1,U1.W1 as W1, U2.C1 as C2,U2.W1 as W2,l_orderkey,lineitem_tid from (select C1, W1, tid as lineitem_tid, l_orderkey from U_l_orderkey) as U1 join (select U.C1 as C1,U.W1 as W1,lineitem_tid from (select * from (select C1, W1, tid as lineitem_tid, l_shipdate from U_l_shipdate) as X1 where l_shipdate < '1995-03-17') U) as U2 using (lineitem_tid) where (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) as U1, (select U1.C1 as C1,U1.W1 as W1,U1.C2 as C2, U1.W2 as W2,U1.C3 as C3, U1.W3 as W3,U1.C4 as C4, U1.W4 as W4, U2.C1 as C5,U2.W1 as W5,o_orderdate,o_orderkey,customer_tid, orders_tid from (select U.C1 as C1,U.W1 as W1,U.C2 as C2, U.W2 as W2,U.C3 as C3, U.W3 as W3,U.C4 as C4, U.W4 as W4,customer_tid, orders_tid,o_orderdate from (select U1.C1 as C1,U1.W1 as W1,U1.C2 as C2, U1.W2 as W2, U2.C1 as C3,U2.W1 as W3,U2.C2 as C4, U2.W2 as W4,c_custkey,o_custkey,o_orderdate,customer_tid, orders_tid from (select U1.C1 as C1,U1.W1 as W1, U2.C1 as C2,U2.W1 as W2,c_custkey,customer_tid from (select C1, W1, tid as customer_tid, c_custkey from U_c_custkey) as U1 join (select U.C1 as C1,U.W1 as W1,customer_tid from (select * from (select C1, W1, tid as customer_tid, c_mktsegment from U_c_mktsegment) as X2 where c_mktsegment = 'BUILDING') U) as U2 using (customer_tid) where (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) as U1, (select U1.C1 as C1,U1.W1 as W1, U2.C1 as C2,U2.W1 as W2,o_custkey,o_orderdate,orders_tid from (select C1, W1, tid as orders_tid, o_custkey from U_o_custkey) as U1 join (select * from (select C1, W1, tid as orders_tid, o_orderdate from U_o_orderdate) as X3 where o_orderdate > '1995-03-15') as U2 using (orders_tid) where (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) as U2 where U1.c_custkey=U2.o_custkey and (U1.C2 <> U2.C2 or U1.W2 = U2.W2) and (U1.C2 <> U2.C1 or U1.W2 = U2.W1) and (U1.C1 <> U2.C2 or U1.W1 = U2.W2) and (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) U) as U1 join (select C1, W1, tid as orders_tid, o_orderkey from U_o_orderkey) as U2 using (orders_tid) where (U1.C4 <> U2.C1 or U1.W4 = U2.W1) and (U1.C3 <> U2.C1 or U1.W3 = U2.W1) and (U1.C2 <> U2.C1 or U1.W2 = U2.W1) and (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) as U2 where U1.l_orderkey=U2.o_orderkey and (U1.C2 <> U2.C5 or U1.W2 = U2.W5) and (U1.C2 <> U2.C4 or U1.W2 = U2.W4) and (U1.C2 <> U2.C3 or U1.W2 = U2.W3) and (U1.C2 <> U2.C2 or U1.W2 = U2.W2) and (U1.C2 <> U2.C1 or U1.W2 = U2.W1) and (U1.C1 <> U2.C5 or U1.W1 = U2.W5) and (U1.C1 <> U2.C4 or U1.W1 = U2.W4) and (U1.C1 <> U2.C3 or U1.W1 = U2.W3) and (U1.C1 <> U2.C2 or U1.W1 = U2.W2) and (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) U) as U1 join (select C1, W1, tid as orders_tid, o_shippriority from U_o_shippriority) as U2 using (orders_tid) where (U1.C7 <> U2.C1 or U1.W7 = U2.W1) and (U1.C6 <> U2.C1 or U1.W6 = U2.W1) and (U1.C5 <> U2.C1 or U1.W5 = U2.W1) and (U1.C4 <> U2.C1 or U1.W4 = U2.W1) and (U1.C3 <> U2.C1 or U1.W3 = U2.W1) and (U1.C2 <> U2.C1 or U1.W2 = U2.W1) and (U1.C1 <> U2.C1 or U1.W1 = U2.W1))X4;

1
MayBMS-tpch/query2.sql Normal file
View File

@ -0,0 +1 @@
create table res as select l_extendedprice from (select U1.C1 as C1,U1.W1 as W1,U1.C2 as C2, U1.W2 as W2, U2.C1 as C3,U2.W1 as W3,U2.C2 as C4, U2.W2 as W4,l_extendedprice,l_discount,lineitem_tid from (select U1.C1 as C1,U1.W1 as W1, U2.C1 as C2,U2.W1 as W2,l_extendedprice,lineitem_tid from (select U.C1 as C1,U.W1 as W1,lineitem_tid from (select * from (select C1, W1, tid as lineitem_tid, l_quantity from U_l_quantity) as X50 where l_quantity < '24') U) as U1 join (select C1, W1, tid as lineitem_tid, l_extendedprice from U_l_extendedprice) as U2 using (lineitem_tid) where (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) as U1 join (select U1.C1 as C1,U1.W1 as W1, U2.C1 as C2,U2.W1 as W2,l_discount,lineitem_tid from (select U.C1 as C1,U.W1 as W1,lineitem_tid from (select * from (select C1, W1, tid as lineitem_tid, l_shipdate from U_l_shipdate) as X51 where l_shipdate > '1994-01-01' and l_shipdate < '1996-01-01') U) as U1 join (select * from (select C1, W1, tid as lineitem_tid, l_discount from U_l_discount) as X52 where l_discount > '0.05' and l_discount < '0.08') as U2 using (lineitem_tid) where (U1.C1 <> U2.C1 or U1.W1 = U2.W1)) as U2 using (lineitem_tid) where (U1.C2 <> U2.C2 or U1.W2 = U2.W2) and (U1.C2 <> U2.C1 or U1.W2 = U2.W1) and (U1.C1 <> U2.C2 or U1.W1 = U2.W2) and (U1.C1 <> U2.C1 or U1.W1 = U2.W1))X53;

1
MayBMS-tpch/query3.sql Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,220 @@
# @(#)PORTING.NOTES 2.1.8.1
Table of Contents
==================
1. General Program Structure
2. Naming Conventions and Variable Usage
3. Porting Procedures
4. Compilation Options
5. Customizing QGEN
6. Further Enhancements
7. Known Porting Problems
8. Reporting Problems
1. General Program Structure
The code provided with TPC-H and TPC-R benchmarks includes a database
population generator (DBGEN) and a query template translator(QGEN). It
is written in ANSI-C, and is meant to be easily portable to a broad variety
of platforms. The program is composed of five source files and some
support and header files. The main modules are:
build.c: each table in the database schema is represented by a
routine mk_XXXX, which populates a structure
representing one row in table XXXX.
See Also: dss_types.h, bm_utils.c, rnd.*
print.c: each table in the database schema is represented by a
routine pr_XXXX, which prints the contents of a
structure representing one row in table XXX.
See Also: dss_types.h, dss.h
driver.c: this module contains the main control functions for
DBGEN, including command line parsing, distribution
management, database scaling and the calls to mk_XXXX
and pr_XXXX for each table generated.
qgen.c: this module contains the main control functions for
QGEN, including query template parsing.
varsub.c: each query template includes one or more parameter
substitution points; this routine handles the
parameter generation for the TPC-H/TPC-R benchmark.
The support utilities provide a generalized set of functions for data
generation and include:
bm_utils.c: data type generators, string management and
portability routines.
rnd.*: a general purpose random number generator used
throughout the code.
dss.h:
shared.h: a set of '#defines' for limits, formats and fixed
values
dsstypes.h: structure definitions for each table definition
2. Naming Conventions and Variable Usage
Since DBGEN will be maintained by a large number of people, it is
particularly important to observe the coding, variable naming and usage
conventions detailed here.
#define
--------
All #define directives are found in header files (*.h). In general,
the header files segregate variables and macros as follows:
rnd.h -- anything exclusively referenced by rnd.c
dss.h -- general defines for the benchmark, including *all*
extern declarations (see below).
shared.h -- defines related to the tuple definitions in
dsstypes.h. Isolated to ease automatic processing needed by many
direct load routines (see below).
dsstypes.h -- structure definitons and typedef directives to
detail the contents of each table's tuples.
config.h -- any porting and configuration related defines should
go here, to localize the changes necessary to move the suite
from one machine to another.
tpcd.h -- defines related to QGEN, rather than DBGEN
extern
------
DBGEN and QGEN make extensive use of extern declarations. This could
probably stand to be changed at some point, but has made the rapid
turnaround of prototypes easier. In order to be sure that each
declaration was matched by exactly one definition per executatble,
they are all declared as EXTERN, a macro dependent on DECLARER. In
any module that defines DECLARER, all variables declared EXTERN will
be defined as globals. DECLARER should be declared only in modules
containing a main() routine.
Naming Conventions
------------------
defines
o All defines use upper case
o All defines use a table prefix, if appropriate:
O_* relates to orders table
L_* realtes to lineitem table
P_* realtes to part table
PS_* relates to partsupplier table
C_* realtes to customer table
S_* relates to supplier table
N_* relates to nation table
R_* realtes to region table
T_* relates to time table
o All defines have a usage prefix, if appropriate:
*_TAG environment variable name
*_DFLT environment variable default
*_MAX upper bound
*_MIN lower bound
*_LEN average length
*_SD random number seed (see rnd.*)
*_FMT printf format string
*_SCL divisor (for scaled arithmetic)
*_SIZE tuple length
3. Porting Procedures
The code provided should be easily portable to any machine providing an
ANSI C compiler.
-- Copy makefile.suite to makefile
-- Edit the makefile to match the name of your C compiler
and to include appropriate compilation options in the CFLAGS
definition
-- make.
Special care should be taken in modifying any of the monetary calcu-
lations in DBGEN. These have proven to be particularly sensitive to
portability problems. If you decide to create the routines for inline
data load (see below), be sure to compare the resulting data to that
generated by a flat file data generation to be sure that all numeric
conversions have been correct.
If the compile generates errors, refer to "Compilation Options", below.
The problem you are encountering may already have been addressed in the
code.
If the compile is successful, but QGEN is not generating the appropriate
query syntax for your environment, refer to "Customizing QGEN", below.
For other problems, refer to "Reporting Problems" at the end of this
document.
4. Compilation Options
config.h and makefile.suite contain a number of compile time options intended
to make the process of porting the code provided with TPC-H/TPC-R as easy as
possible on a broad range of platforms. Most ports should consist of reviewing
the possible settings described in config.h and modifying the makefile
to employ them appropriately.
5. Customizing QGEN
QGEN relies on a number of vendor-specific conventions to generate
appropriate query syntax. These are controlled by #defines in tpcd.h,
and enabled by a #define in config.h. If you find that the syntax
generated by QGEN is not sufficient for your environment you will need
to modify these to files. It is strongly recomended that you not change
the general organization of the files.
Currently defined options are:
VTAG -- marks a variable substitution point [:]
QDIR_TAG -- environent variable which points to query templates
[DSS_QUERY]
GEN_QUERY_PLAN -- syntax to generate a query plan ["Set Explain On;"]
START_TRAN -- syntax to begin a transaction ["Begin Work;"]
END_TRAN -- syntax to end a transaction ["Commit Work;"]
SET_OUTPUT -- syntax to redirect query output ["Output to"]
SET_ROWCOUNT -- syntax to set the number of rows returned
["{return %d rows}"]
SET_DBASE -- syntax to connect to a database
6. Further Enhancements
load_stub.c provides entry points for two likely enhancements.
The ld_XXXX routines make it possible to load the
database directly from DBGEN without first writing the database
population out to the filesystem. This may prove particularly useful
when loading larger database populations. Be particularly careful about
monetary amounts. To assure portability, all monetary calcualtion are
done using long integers (which hold money amounts as a number of
pennies). These will need to be scaled to dollars and cents (by dividing
by 100), before the values are presented to the DBMS.
The hd_XXXX routines allow header information to be written before the
creation of the flat files. This should allow system which require
formatting information in database load files to use DBGEN with only
a small amount of custom code.
qgen.c defines the translation table for query templates in the
routine qsub().
varsub.c defines the parameter substitutions in the routine varsub().
If you are porting DBGEN to a machine that is not supports a native word
size larger that 32 bits, you may wish to modify the default values for
BITS_PER_LONG and MAX_LONG. These values are used in the generation of
the sparse primary keys in the order and lineitem tables. The code has
been structured to run on any machine supporting a 32 bit long, but
may be slightly more efficient on machines that are able to make use of
a larger native type.
7. Known Porting Problems
The current codeline will not compile under SunOS 4.1. Solaris 2.4 and later
are supported, and anyone wishing to use DBGEN on a Sun platform is
encouraged to use one of these OS releases.
8. Reporting Problems
The code provided with TPC-H/TPC-R has been written to be easily portable,
and has been tested on a wide variety of platforms, If you have any
trouble porting the code to your platform, please help us to correct
the problem in a later release by sending the following information
to the TPC D subcommittee:
Computer Make and Model
Compiler Type and Revision Number
Brief Description of the problem
Suggested modification to correct the problem

View File

@ -0,0 +1,52 @@
In addition to the standard TPC-H parameters, UTPC-H has three more
parameters:
-x uncertainty ratio.
-z (zipf) correlation ratio.
-m maximum alternatives per uncertain cell.
such as scale (-s 1 for
1GB of data)
The (standard dbgen) parameter "s" is used to control the size of each
world (-s 1 means that each world has size 1 GB). The uncertainty
ratio (x) controls the percentage of (uncertain) fields with several
possible values, and the parameter "m" controls how many possible
values can be assigned to a field. The parameter "z" defines a Zipf
distribution for the variables with different dependent field counts
(DFC). The DFC of a variable is the number of tuple fields dependent
on that variable. We use the parameter "z" to control the attribute
correlations: For "n" uncertain fields, there are ceiling(C*z^{i})
variables with DFC "i", where
C = n(z-1)/(z^{k+1} - 1),
i.e., n is sum of (C*z^i) with i from 0 to k. Thus greater z-values
correspond to higher correlations in the data. The number of domain
values of a variable with DFC k>1 is chosen using the formula
p^{k-1}*(Product of m_i with i from 1 to k),
where "m_i" is the number of different values for the "i"-th field
dependent on that variable, and "p" is the probability that a
combination of possible values for the "k" fields is valid. This
assumption fits naturally to data cleaning scenarios (work by the
MAyBMS team, ICDE'07, on chasing dependencies on world-set
decompositions)
By default, after correlating two variables with arbitrary DFCs, only
$p*100$ percent of the combinations satisfy the constraints and are
preserved. The value of "p" can be changed in the code (no in put
parameter to date).
The uncertain fields are assigned randomly to variables. This can lead
to correlations between fields belonging to different tuples or even
to different relations. This fits to scenarios where constraints are
enforced across tuples or relations.

View File

@ -0,0 +1,262 @@
/*
* $Id: bcd2.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: bcd2.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:45 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* bcd.c: conversion routines for multi-byte arithmetic
*
* defined routines:
* bin_bcd2(long binary, long *low_res, long *high_res)
* bcd2_bin(long *dest, long bcd)
* bcd2_add(long *bcd_low, long *bcd_high, long addend)
* bcd2_sub(long *bcd_low, long *bcd_high, long subend)
* bcd2_mul(long *bcd_low, long *bcd_high, long multiplier)
* bcd2_div(long *bcd_low, long *bcd_high, long divisor)
* long bcd2_mod(long *bcd_low, long *bcd_high, long modulo)
* long bcd2_cmp(long *bcd_low, long *bcd_high, long compare)
*/
#include <stdio.h>
#include "bcd2.h" /* for function prototypes */
#define DIGITS_PER_LONG 7
#define WORD_DIVISOR 10000000
#define GET_DIGIT(num, low, high) \
((num) >= DIGITS_PER_LONG)? \
(high & (0xF << (4 * ((num) - DIGITS_PER_LONG)))) \
>> (((num) - DIGITS_PER_LONG) * 4): \
(low & (0xF << (4 * (num)))) >> ((num) * 4)
#define SET_DIGIT(value, num, low, high) \
if ((num) >= DIGITS_PER_LONG) \
{ \
*high &= \
(0xFFFFFFF ^ (0xF << (4 * ((num) - DIGITS_PER_LONG)))); \
*high |= (value << (4 * ((num) - DIGITS_PER_LONG))); \
} \
else \
{ \
*low = (*low & (0xFFFFFFF ^ (0xF << (4 * (num))))); \
*low |= (value << (4 * (num))); \
}
int
bin_bcd2(long binary, long *low_res, long *high_res)
{
char number[15],
*current;
int count;
long *dest;
*low_res = *high_res = 0;
sprintf(number, "%014ld", binary);
for (current = number, count=13; *current; current++, count--)
{
dest = (count < DIGITS_PER_LONG)?low_res:high_res;
*dest = *dest << 4;
*dest |= *current - '0';
}
return(0);
}
int
bcd2_bin(long *dest, long bcd)
{
int count;
long mask;
count = DIGITS_PER_LONG - 1;
mask = 0xF000000;
*dest = 0;
while (mask)
{
*dest *= 10;
*dest += (bcd & mask) >> (4 * count);
mask = mask >> 4;
count -= 1;
}
return(0);
}
int
bcd2_add(long *bcd_low, long *bcd_high, long addend)
{
long tmp_lo, tmp_hi, carry, res;
int digit;
bin_bcd2(addend, &tmp_lo, &tmp_hi);
carry = 0;
for (digit=0; digit < 14; digit++)
{
res = GET_DIGIT(digit, *bcd_low, *bcd_high);
res += GET_DIGIT(digit, tmp_lo, tmp_hi);
res += carry;
carry = res / 10;
res %= 10;
SET_DIGIT(res, digit, bcd_low, bcd_high);
}
return(carry);
}
int
bcd2_sub(long *bcd_low, long *bcd_high, long subend)
{
long tmp_lo, tmp_hi, carry, res;
int digit;
bin_bcd2(subend, &tmp_lo, &tmp_hi);
carry = 0;
for (digit=0; digit < 14; digit++)
{
res = GET_DIGIT(digit, *bcd_low, *bcd_high);
res -= GET_DIGIT(digit, tmp_lo, tmp_hi);
res -= carry;
if (res < 0)
{
res += 10;
carry = 1;
}
SET_DIGIT(res, digit, bcd_low, bcd_high);
}
return(carry);
}
int
bcd2_mul(long *bcd_low, long *bcd_high, long multiplier)
{
long tmp_lo, tmp_hi, carry, m_lo, m_hi, m1, m2;
int udigit, ldigit, res;
tmp_lo = *bcd_low;
tmp_hi = *bcd_high;
bin_bcd2(multiplier, &m_lo, &m_hi);
*bcd_low = 0;
*bcd_high = 0;
carry = 0;
for (ldigit=0; ldigit < 14; ldigit++)
{
m1 = GET_DIGIT(ldigit, m_lo, m_hi);
carry = 0;
for (udigit=0; udigit < 14; udigit++)
{
m2 = GET_DIGIT(udigit, tmp_lo, tmp_hi);
res = m1 * m2;
res += carry;
if (udigit + ldigit < 14)
{
carry = GET_DIGIT(udigit + ldigit, *bcd_low, *bcd_high);
res += carry;
}
carry = res / 10;
res %= 10;
if (udigit + ldigit < 14)
SET_DIGIT(res, udigit + ldigit, bcd_low, bcd_high);
}
}
return(carry);
}
int
bcd2_div(long *bcd_low, long *bcd_high, long divisor)
{
long tmp_lo, tmp_hi, carry, d1, res, digit;
carry = 0;
tmp_lo = *bcd_low;
tmp_hi = *bcd_high;
*bcd_low = *bcd_high = 0;
for (digit=13; digit >= 0; digit--)
{
d1 = GET_DIGIT(digit, tmp_lo, tmp_hi);
d1 += 10 * carry;
res = d1 / divisor;
carry = d1 % divisor;
SET_DIGIT(res, digit, bcd_low, bcd_high);
}
return(carry);
}
long
bcd2_mod(long *bcd_low, long *bcd_high, long modulo)
{
long tmp_low, tmp_high;
tmp_low = *bcd_low;
tmp_high = *bcd_high;
while (tmp_high || tmp_low > modulo)
bcd2_sub(&tmp_low, &tmp_high, modulo);
return(tmp_low);
}
long
bcd2_cmp(long *low1, long *high1, long comp)
{
long temp = 0;
bcd2_bin(&temp, *high1);
if (temp > 214)
return(1);
bcd2_bin(&temp, *low1);
return(temp - comp);
}
#ifdef TEST_BCD
#include <values.h>
main()
{
long bin, low_bcd, high_bcd;
int i;
bin = MAXINT;
printf("%ld\n", bin);
bin_bcd2(bin, &low_bcd, &high_bcd);
printf("%ld %ld\n", high_bcd, low_bcd);
bin = 0;
bcd2_bin(&bin, high_bcd);
bcd2_bin(&bin, low_bcd);
printf( "%ld\n", bin);
for (i=9; i >= 0; i--)
printf("%dth digit in %d is %d\n",
i, bin, GET_DIGIT(i, low_bcd, high_bcd));
bcd2_add(&low_bcd, &high_bcd, MAXINT);
bin = 0;
bcd2_bin(&bin, high_bcd);
high_bcd = bin;
bin = 0;
bcd2_bin(&bin, low_bcd);
low_bcd = bin;
printf( "%ld%07ld\n", high_bcd, low_bcd);
bin_bcd2(14, &low_bcd, &high_bcd);
bcd2_mul(&low_bcd, &high_bcd, 23L);
bin = 0;
bcd2_bin(&bin, high_bcd);
bcd2_bin(&bin, low_bcd);
printf( "%ld\n", bin);
bcd2_div(&low_bcd, &high_bcd, 10L);
bin = 0;
bcd2_bin(&bin, high_bcd);
bcd2_bin(&bin, low_bcd);
printf( "%ld\n", bin);
}
#endif /* TEST */

View File

@ -0,0 +1,34 @@
/*
* $Id: bcd2.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: bcd2.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:45 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
int bin_bcd2(long binary, long *low_res, long *high_res);
int bcd2_bin(long *dest, long bcd);
int bcd2_add(long *bcd_low, long *bcd_high, long addend);
int bcd2_sub(long *bcd_low, long *bcd_high, long subend);
int bcd2_mul(long *bcd_low, long *bcd_high, long multiplier);
int bcd2_div(long *bcd_low, long *bcd_high, long divisor);
long bcd2_mod(long *bcd_low, long *bcd_high, long modulo);
long bcd2_cmp(long *bcd_low, long *bcd_high, long compare);

View File

@ -0,0 +1,595 @@
/*
* $Id: bm_utils.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: bm_utils.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.4 2006/04/12 18:00:55 jms
* add missing parameter to call to gen_seed
*
* Revision 1.3 2005/10/14 23:16:54 jms
* fix for answer set compliance
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.3 2004/02/18 14:05:53 jms
* porting changes for LINUX and 64 bit RNG
*
* Revision 1.2 2004/01/22 05:49:29 jms
* AIX porting (AIX 5.1)
*
* Revision 1.1.1.1 2003/08/08 21:35:26 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
*
* Various routines that handle distributions, value selections and
* seed value management for the DSS benchmark. Current functions:
* env_config -- set config vars with optional environment override
* yes_no -- ask simple yes/no question and return boolean result
* a_rnd(min, max) -- random alphanumeric within length range
* pick_str(size, set) -- select a string from the set of size
* read_dist(file, name, distribution *) -- read named dist from file
* tbl_open(path, mode) -- std fopen with lifenoise
* julian(date) -- julian date correction
* rowcnt(tbl) -- proper scaling of given table
* e_str(set, min, max) -- build an embedded str
* agg_str() -- build a string from the named set
* dsscasecmp() -- version of strcasecmp()
* dssncasecmp() -- version of strncasecmp()
* getopt()
* set_state() -- initialize the RNG
*/
#include "config.h"
#include "dss.h"
#include <stdio.h>
#include <time.h>
#include <errno.h>
#include <string.h>
#ifdef HP
#include <strings.h>
#endif /* HP */
#include <ctype.h>
#include <math.h>
#ifndef _POSIX_SOURCE
#include <malloc.h>
#endif /* POSIX_SOURCE */
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
/* Lines added by Chuck McDevitt for WIN32 support */
#ifdef WIN32
#ifndef _POSIX_
#include <io.h>
#ifndef S_ISREG
#define S_ISREG(m) ( ((m) & _S_IFMT) == _S_IFREG )
#define S_ISFIFO(m) ( ((m) & _S_IFMT) == _S_IFIFO )
#endif
#endif
#ifndef stat
#define stat _stat
#endif
#ifndef fdopen
#define fdopen _fdopen
#endif
#ifndef open
#define open _open
#endif
#ifndef O_RDONLY
#define O_RDONLY _O_RDONLY
#endif
#ifndef O_WRONLY
#define O_WRONLY _O_WRONLY
#endif
#ifndef O_CREAT
#define O_CREAT _O_CREAT
#endif
#endif
/* End of lines added by Chuck McDevitt for WIN32 support */
#include "dsstypes.h"
static char alpha_num[65] =
"0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,";
#if defined(__STDC__) || defined(__cplusplus)
#define PROTO(s) s
#else
#define PROTO(s) ()
#endif
char *getenv PROTO((const char *name));
void usage();
long *permute_dist(distribution *d, long stream);
extern seed_t Seed[];
/*
* env_config: look for a environmental variable setting and return its
* value; otherwise return the default supplied
*/
char *
env_config(char *var, char *dflt)
{
static char *evar;
if ((evar = getenv(var)) != NULL)
return (evar);
else
return (dflt);
}
/*
* return the answer to a yes/no question as a boolean
*/
long
yes_no(char *prompt)
{
char reply[128];
#ifdef WIN32
/* Disable warning about conditional expression is constant */
#pragma warning(disable:4127)
#endif
while (1)
{
#ifdef WIN32
#pragma warning(default:4127)
#endif
printf("%s [Y/N]: ", prompt);
fgets(reply, 128, stdin);
switch (*reply)
{
case 'y':
case 'Y':
return (1);
case 'n':
case 'N':
return (0);
default:
printf("Please answer 'yes' or 'no'.\n");
}
}
}
/*
* generate a random string with length randomly selected in [min, max]
* and using the characters in alphanum (currently includes a space
* and comma)
*/
void
a_rnd(int min, int max, int column, char *dest)
{
DSS_HUGE i,
len,
char_int;
RANDOM(len, min, max, column);
for (i = 0; i < len; i++)
{
if (i % 5 == 0)
RANDOM(char_int, 0, MAX_LONG, column);
*(dest + i) = alpha_num[char_int & 077];
char_int >>= 6;
}
*(dest + len) = '\0';
return;
}
/*
* embed a randomly selected member of distribution d in alpha-numeric
* noise of a length rendomly selected between min and max at a random
* position
*/
void
e_str(distribution *d, int min, int max, int stream, char *dest)
{
char strtmp[MAXAGG_LEN + 1];
DSS_HUGE loc;
int len;
a_rnd(min, max, stream, dest);
pick_str(d, stream, strtmp);
len = strlen(strtmp);
RANDOM(loc, 0, (strlen(dest) - 1 - len), stream);
strncpy(dest + loc, strtmp, len);
return;
}
/*
* return the string associate with the LSB of a uniformly selected
* long in [1, max] where max is determined by the distribution
* being queried
*/
int
pick_str(distribution *s, int c, char *target)
{
long i = 0;
DSS_HUGE j;
RANDOM(j, 1, s->list[s->count - 1].weight, c);
while (s->list[i].weight < j)
i++;
strcpy(target, s->list[i].text);
return(i);
}
/*
* unjulian (long date) -- return(date - STARTDATE)
*/
long
unjulian(long date)
{
int i;
long res = 0;
for (i = STARTDATE / 1000; i < date / 1000; i++)
res += 365 + LEAP(i);
res += date % 1000 - 1;
return(res);
}
long
julian(long date)
{
long offset;
long result;
long yr;
long yend;
offset = date - STARTDATE;
result = STARTDATE;
#ifdef WIN32
/* Disable warning about conditional expression is constant */
#pragma warning(disable:4127)
#endif
while (1)
{
#ifdef WIN32
#pragma warning(default:4127)
#endif
yr = result / 1000;
yend = yr * 1000 + 365 + LEAP(yr);
if (result + offset > yend) /* overflow into next year */
{
offset -= yend - result + 1;
result += 1000;
continue;
}
else
break;
}
return (result + offset);
}
/*
* load a distribution from a flat file into the target structure;
* should be rewritten to allow multiple dists in a file
*/
void
read_dist(char *path, char *name, distribution *target)
{
FILE *fp;
char line[256],
token[256],
*c;
long weight,
count = 0,
name_set = 0;
if (d_path == NULL)
{
sprintf(line, "%s%c%s",
env_config(CONFIG_TAG, CONFIG_DFLT), PATH_SEP, path);
fp = fopen(line, "r");
OPEN_CHECK(fp, line);
}
else
{
fp = fopen(d_path, "r");
OPEN_CHECK(fp, d_path);
}
while (fgets(line, sizeof(line), fp) != NULL)
{
if ((c = strchr(line, '\n')) != NULL)
*c = '\0';
if ((c = strchr(line, '#')) != NULL)
*c = '\0';
if (*line == '\0')
continue;
if (!name_set)
{
if (dsscasecmp(strtok(line, "\n\t "), "BEGIN"))
continue;
if (dsscasecmp(strtok(NULL, "\n\t "), name))
continue;
name_set = 1;
continue;
}
else
{
if (!dssncasecmp(line, "END", 3))
{
fclose(fp);
return;
}
}
if (sscanf(line, "%[^|]|%ld", token, &weight) != 2)
continue;
if (!dsscasecmp(token, "count"))
{
target->count = weight;
target->list =
(set_member *)
malloc((size_t)(weight * sizeof(set_member)));
MALLOC_CHECK(target->list);
target->max = 0;
continue;
}
target->list[count].text =
(char *) malloc((size_t)(strlen(token) + 1));
MALLOC_CHECK(target->list[count].text);
strcpy(target->list[count].text, token);
target->max += weight;
target->list[count].weight = target->max;
count += 1;
} /* while fgets() */
if (count != target->count)
{
fprintf(stderr, "Read error on dist '%s'\n", name);
fclose(fp);
exit(1);
}
target->permute = (long *)NULL;
fclose(fp);
return;
}
/*
* standard file open with life noise
*/
FILE *
tbl_open(int tbl, char *mode)
{
char prompt[256];
char fullpath[256];
FILE *f;
struct stat fstats;
int retcode;
if (*tdefs[tbl].name == PATH_SEP)
strcpy(fullpath, tdefs[tbl].name);
else
sprintf(fullpath, "%s%c%s",
env_config(PATH_TAG, PATH_DFLT), PATH_SEP, tdefs[tbl].name);
retcode = stat(fullpath, &fstats);
if (retcode && (errno != ENOENT))
{
fprintf(stderr, "stat(%s) failed.\n", fullpath);
exit(-1);
}
if (S_ISREG(fstats.st_mode) && !force && *mode != 'r' )
{
sprintf(prompt, "Do you want to overwrite %s ?", fullpath);
if (!yes_no(prompt))
exit(0);
}
if (S_ISFIFO(fstats.st_mode))
{
retcode =
open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT);
f = fdopen(retcode, mode);
}
else
f = fopen(fullpath, mode);
OPEN_CHECK(f, fullpath);
if (header && columnar && tdefs[tbl].header != NULL)
tdefs[tbl].header(f);
return (f);
}
/*
* agg_str(set, count) build an aggregated string from count unique
* selections taken from set
*/
void
agg_str(distribution *set, long count, long col, char *dest)
{
distribution *d;
int i;
d = set;
*dest = '\0';
for (i=0; i < count; i++)
{
strcat(dest, DIST_MEMBER(set,*permute_dist(d, col)));
strcat(dest, " ");
d = (distribution *)NULL;
}
*(dest + strlen(dest) - 1) = '\0';
return;
}
long
dssncasecmp(char *s1, char *s2, int n)
{
for (; n > 0; ++s1, ++s2, --n)
if (tolower(*s1) != tolower(*s2))
return ((tolower(*s1) < tolower(*s2)) ? -1 : 1);
else if (*s1 == '\0')
return (0);
return (0);
}
long
dsscasecmp(char *s1, char *s2)
{
for (; tolower(*s1) == tolower(*s2); ++s1, ++s2)
if (*s1 == '\0')
return (0);
return ((tolower(*s1) < tolower(*s2)) ? -1 : 1);
}
#ifndef STDLIB_HAS_GETOPT
int optind = 0;
int opterr = 0;
char *optarg = NULL;
int
getopt(int ac, char **av, char *opt)
{
static char *nextchar = NULL;
char *cp;
char hold;
if (optarg == NULL)
{
optarg = (char *)malloc(BUFSIZ);
MALLOC_CHECK(optarg);
}
if (!nextchar || *nextchar == '\0')
{
optind++;
if (optind == ac)
return(-1);
nextchar = av[optind];
if (*nextchar != '-')
return(-1);
nextchar +=1;
}
if (nextchar && *nextchar == '-') /* -- termination */
{
optind++;
return(-1);
}
else /* found an option */
{
cp = strchr(opt, *nextchar);
nextchar += 1;
if (cp == NULL) /* not defined for this run */
return('?');
if (*(cp + 1) == ':') /* option takes an argument */
{
if (*nextchar)
{
hold = *cp;
cp = optarg;
while (*nextchar)
*cp++ = *nextchar++;
*cp = '\0';
*cp = hold;
}
else /* white space separated, use next arg */
{
if (++optind == ac)
return('?');
strcpy(optarg, av[optind]);
}
nextchar = NULL;
}
return(*cp);
}
}
#endif /* STDLIB_HAS_GETOPT */
char **
mk_ascdate(void)
{
char **m;
dss_time_t t;
DSS_HUGE i;
m = (char**) malloc((size_t)(TOTDATE * sizeof (char *)));
MALLOC_CHECK(m);
for (i = 0; i < TOTDATE; i++)
{
mk_time(i + 1, &t);
m[i] = strdup(t.alpha);
}
return(m);
}
/*
* set_state() -- initialize the RNG so that
* appropriate data sets can be generated.
* For each table that is to be generated, calculate the number of rows/child, and send that to the
* seed generation routine in speed_seed.c. Note: assumes that tables are completely independent.
* Returns the number of rows to be generated by the named step.
*/
DSS_HUGE
set_state(int table, long sf, long procs, long step, DSS_HUGE *extra_rows)
{
int i;
DSS_HUGE rowcount, remainder, result;
if (sf == 0 || step == 0)
return(0);
rowcount = tdefs[table].base / procs;
if ((sf / procs) > (int)MAX_32B_SCALE)
INTERNAL_ERROR("SCALE OVERFLOW. RE-RUN WITH MORE CHILDREN.");
rowcount *= sf;
remainder = (tdefs[table].base % procs) * sf;
rowcount += remainder / procs;
result = rowcount;
for (i=0; i < step - 1; i++)
{
if (table == LINE) /* special case for shared seeds */
tdefs[table].gen_seed(1, rowcount);
else
tdefs[table].gen_seed(0, rowcount);
/* need to set seeds of child in case there's a dependency */
/* NOTE: this assumes that the parent and child have the same base row count */
if (tdefs[table].child != NONE)
tdefs[tdefs[table].child].gen_seed(0,rowcount);
}
*extra_rows = remainder % procs;
if (step > procs) /* moving to the end to generate updates */
tdefs[table].gen_seed(0, *extra_rows);
return(result);
}

View File

@ -0,0 +1,447 @@
/*
* $Id: build.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: build.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.4 2005/10/28 02:56:22 jms
* add platform-specific printf formats to allow for DSS_HUGE data type
*
* Revision 1.3 2005/10/14 23:16:54 jms
* fix for answer set compliance
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.3 2004/04/07 20:17:29 jms
* bug #58 (join fails between order/lineitem)
*
* Revision 1.2 2004/01/22 05:49:29 jms
* AIX porting (AIX 5.1)
*
* Revision 1.1.1.1 2003/08/08 21:35:26 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/* stuff related to the customer table */
#include <stdio.h>
#include <string.h>
#ifndef VMS
#include <sys/types.h>
#endif
#if defined(SUN)
#include <unistd.h>
#endif
#include <math.h>
#include "dss.h"
#include "dsstypes.h"
#ifdef ADHOC
#include "adhoc.h"
extern adhoc_t adhocs[];
#endif /* ADHOC */
#include "rng64.h"
#define LEAP_ADJ(yr, mnth) \
((LEAP(yr) && (mnth) >= 2) ? 1 : 0)
#define JDAY_BASE 8035 /* start from 1/1/70 a la unix */
#define JMNTH_BASE (-70 * 12) /* start from 1/1/70 a la unix */
#define JDAY(date) ((date) - STARTDATE + JDAY_BASE + 1)
#define PART_SUPP_BRIDGE(tgt, p, s) \
{ \
DSS_HUGE tot_scnt = tdefs[SUPP].base * scale; \
tgt = (p + s * (tot_scnt / SUPP_PER_PART + \
(long) ((p - 1) / tot_scnt))) % tot_scnt + 1; \
}
#define V_STR(avg, sd, tgt) a_rnd((int)(avg * V_STR_LOW),(int)(avg * V_STR_HGH), sd, tgt)
#define TEXT(avg, sd, tgt) dbg_text(tgt, (int)(avg * V_STR_LOW),(int)(avg * V_STR_HGH), sd)
//static
void gen_phone PROTO((DSS_HUGE ind, char *target, long seed));
DSS_HUGE
rpb_routine(DSS_HUGE p)
{
DSS_HUGE price;
price = 90000;
price += (p/10) % 20001; /* limit contribution to $200 */
price += (p % 1000) * 100;
return(price);
}
//static
void
gen_phone(DSS_HUGE ind, char *target, long seed)
{
DSS_HUGE acode,
exchg,
number;
RANDOM(acode, 100, 999, seed);
RANDOM(exchg, 100, 999, seed);
RANDOM(number, 1000, 9999, seed);
sprintf(target, "%02d", (int)(10 + (ind % NATIONS_MAX)));
sprintf(target + 3, "%03d", (int)acode);
sprintf(target + 7, "%03d", (int)exchg);
sprintf(target + 11, "%04d", (int)number);
target[2] = target[6] = target[10] = '-';
return;
}
long
mk_cust(DSS_HUGE n_cust, customer_t *c)
{
DSS_HUGE i;
static int bInit = 0;
static char szFormat[100];
if (!bInit)
{
sprintf(szFormat, C_NAME_FMT, 9, HUGE_FORMAT + 1);
bInit = 1;
}
c->custkey = n_cust;
sprintf(c->name, szFormat, C_NAME_TAG, n_cust);
V_STR(C_ADDR_LEN, C_ADDR_SD, c->address);
c->alen = strlen(c->address);
RANDOM(i, 0, (nations.count - 1), C_NTRG_SD);
c->nation_code = i;
gen_phone(i, c->phone, (long)C_PHNE_SD);
RANDOM(c->acctbal, C_ABAL_MIN, C_ABAL_MAX, C_ABAL_SD);
pick_str(&c_mseg_set, C_MSEG_SD, c->mktsegment);
TEXT(C_CMNT_LEN, C_CMNT_SD, c->comment);
c->clen = strlen(c->comment);
return (0);
}
/*
* generate the numbered order and its associated lineitems
*/
void
mk_sparse (DSS_HUGE i, DSS_HUGE *ok, long seq)
{
long low_bits;
*ok = i;
low_bits = (long)(i & ((1 << SPARSE_KEEP) - 1));
*ok = *ok >> SPARSE_KEEP;
*ok = *ok << SPARSE_BITS;
*ok += seq;
*ok = *ok << SPARSE_KEEP;
*ok += low_bits;
return;
}
long
mk_order(DSS_HUGE index, order_t *o, long upd_num)
{
DSS_HUGE lcnt;
DSS_HUGE rprice;
long ocnt;
DSS_HUGE tmp_date;
DSS_HUGE s_date;
DSS_HUGE r_date;
DSS_HUGE c_date;
DSS_HUGE clk_num;
DSS_HUGE supp_num;
static char **asc_date = NULL;
char tmp_str[2];
char **mk_ascdate PROTO((void));
int delta = 1;
static int bInit = 0;
static char szFormat[100];
if (!bInit)
{
sprintf(szFormat, O_CLRK_FMT, 9, HUGE_FORMAT + 1);
bInit = 1;
}
if (asc_date == NULL)
asc_date = mk_ascdate();
mk_sparse (index, &o->okey,
(upd_num == 0) ? 0 : 1 + upd_num / (10000 / refresh));
if (scale >= 30000)
RANDOM64(o->custkey, O_CKEY_MIN, O_CKEY_MAX, O_CKEY_SD);
else
RANDOM(o->custkey, O_CKEY_MIN, O_CKEY_MAX, O_CKEY_SD);
while (o->custkey % CUST_MORTALITY == 0)
{
o->custkey += delta;
o->custkey = MIN(o->custkey, O_CKEY_MAX);
delta *= -1;
}
RANDOM(tmp_date, O_ODATE_MIN, O_ODATE_MAX, O_ODATE_SD);
strcpy(o->odate, asc_date[tmp_date - STARTDATE]);
pick_str(&o_priority_set, O_PRIO_SD, o->opriority);
RANDOM(clk_num, 1, MAX((scale * O_CLRK_SCL), O_CLRK_SCL), O_CLRK_SD);
sprintf(o->clerk, szFormat, O_CLRK_TAG, clk_num);
TEXT(O_CMNT_LEN, O_CMNT_SD, o->comment);
o->clen = strlen(o->comment);
#ifdef DEBUG
if (o->clen > O_CMNT_MAX) fprintf(stderr, "comment error: O%d\n", index);
#endif /* DEBUG */
o->spriority = 0;
o->totalprice = 0;
o->orderstatus = 'O';
ocnt = 0;
RANDOM(o->lines, O_LCNT_MIN, O_LCNT_MAX, O_LCNT_SD);
for (lcnt = 0; lcnt < o->lines; lcnt++)
{
o->l[lcnt].okey = o->okey;;
o->l[lcnt].lcnt = lcnt + 1;
RANDOM(o->l[lcnt].quantity, L_QTY_MIN, L_QTY_MAX, L_QTY_SD);
RANDOM(o->l[lcnt].discount, L_DCNT_MIN, L_DCNT_MAX, L_DCNT_SD);
RANDOM(o->l[lcnt].tax, L_TAX_MIN, L_TAX_MAX, L_TAX_SD);
pick_str(&l_instruct_set, L_SHIP_SD, o->l[lcnt].shipinstruct);
pick_str(&l_smode_set, L_SMODE_SD, o->l[lcnt].shipmode);
TEXT(L_CMNT_LEN, L_CMNT_SD, o->l[lcnt].comment);
o->l[lcnt].clen = strlen(o->l[lcnt].comment);
if (scale >= 30000)
RANDOM64(o->l[lcnt].partkey, L_PKEY_MIN, L_PKEY_MAX, L_PKEY_SD);
else
RANDOM(o->l[lcnt].partkey, L_PKEY_MIN, L_PKEY_MAX, L_PKEY_SD);
rprice = rpb_routine(o->l[lcnt].partkey);
RANDOM(supp_num, 0, 3, L_SKEY_SD);
PART_SUPP_BRIDGE( o->l[lcnt].suppkey, o->l[lcnt].partkey, supp_num);
o->l[lcnt].eprice = rprice * o->l[lcnt].quantity;
o->totalprice +=
((o->l[lcnt].eprice *
((long)100 - o->l[lcnt].discount)) / (long)PENNIES ) *
((long)100 + o->l[lcnt].tax)
/ (long)PENNIES;
RANDOM(s_date, L_SDTE_MIN, L_SDTE_MAX, L_SDTE_SD);
s_date += tmp_date;
RANDOM(c_date, L_CDTE_MIN, L_CDTE_MAX, L_CDTE_SD);
c_date += tmp_date;
RANDOM(r_date, L_RDTE_MIN, L_RDTE_MAX, L_RDTE_SD);
r_date += s_date;
strcpy(o->l[lcnt].sdate, asc_date[s_date - STARTDATE]);
strcpy(o->l[lcnt].cdate, asc_date[c_date - STARTDATE]);
strcpy(o->l[lcnt].rdate, asc_date[r_date - STARTDATE]);
if (julian(r_date) <= CURRENTDATE)
{
pick_str(&l_rflag_set, L_RFLG_SD, tmp_str);
o->l[lcnt].rflag[0] = *tmp_str;
}
else
o->l[lcnt].rflag[0] = 'N';
if (julian(s_date) <= CURRENTDATE)
{
ocnt++;
o->l[lcnt].lstatus[0] = 'F';
}
else
o->l[lcnt].lstatus[0] = 'O';
}
if (ocnt > 0)
o->orderstatus = 'P';
if (ocnt == o->lines)
o->orderstatus = 'F';
return (0);
}
long
mk_part(DSS_HUGE index, part_t *p)
{
DSS_HUGE temp;
long snum;
DSS_HUGE brnd;
static int bInit = 0;
static char szFormat[100];
static char szBrandFormat[100];
if (!bInit)
{
sprintf(szFormat, P_MFG_FMT, 1, HUGE_FORMAT + 1);
sprintf(szBrandFormat, P_BRND_FMT, 2, HUGE_FORMAT + 1);
bInit = 1;
}
p->partkey = index;
agg_str(&colors, (long)P_NAME_SCL, (long)P_NAME_SD, p->name);
RANDOM(temp, P_MFG_MIN, P_MFG_MAX, P_MFG_SD);
sprintf(p->mfgr, szFormat, P_MFG_TAG, temp);
RANDOM(brnd, P_BRND_MIN, P_BRND_MAX, P_BRND_SD);
sprintf(p->brand, szBrandFormat, P_BRND_TAG, (temp * 10 + brnd));
p->tlen = pick_str(&p_types_set, P_TYPE_SD, p->type);
p->tlen = strlen(p_types_set.list[p->tlen].text);
RANDOM(p->size, P_SIZE_MIN, P_SIZE_MAX, P_SIZE_SD);
pick_str(&p_cntr_set, P_CNTR_SD, p->container);
p->retailprice = rpb_routine(index);
TEXT(P_CMNT_LEN, P_CMNT_SD, p->comment);
p->clen = strlen(p->comment);
for (snum = 0; snum < SUPP_PER_PART; snum++)
{
p->s[snum].partkey = p->partkey;
PART_SUPP_BRIDGE( p->s[snum].suppkey, index, snum);
RANDOM(p->s[snum].qty, PS_QTY_MIN, PS_QTY_MAX, PS_QTY_SD);
RANDOM(p->s[snum].scost, PS_SCST_MIN, PS_SCST_MAX, PS_SCST_SD);
TEXT(PS_CMNT_LEN, PS_CMNT_SD, p->s[snum].comment);
p->s[snum].clen = strlen(p->s[snum].comment);
}
return (0);
}
long
mk_supp(DSS_HUGE index, supplier_t *s)
{
DSS_HUGE i,
bad_press,
noise,
offset,
type;
static int bInit = 0;
static char szFormat[100];
if (!bInit)
{
sprintf(szFormat, S_NAME_FMT, 9, HUGE_FORMAT + 1);
bInit = 1;
}
s->suppkey = index;
sprintf(s->name, szFormat, S_NAME_TAG, index);
V_STR(S_ADDR_LEN, S_ADDR_SD, s->address);
s->alen = strlen(s->address);
RANDOM(i, 0, nations.count - 1, S_NTRG_SD);
s->nation_code= i;
gen_phone(i, s->phone, S_PHNE_SD);
RANDOM(s->acctbal, S_ABAL_MIN, S_ABAL_MAX, S_ABAL_SD);
TEXT(S_CMNT_LEN, S_CMNT_SD, s->comment);
s->clen = strlen(s->comment);
/* these calls should really move inside the if stmt below,
* but this will simplify seedless parallel load
*/
RANDOM(bad_press, 1, 10000, BBB_CMNT_SD);
RANDOM(type, 0, 100, BBB_TYPE_SD);
RANDOM(noise, 0, (s->clen - BBB_CMNT_LEN), BBB_JNK_SD);
RANDOM(offset, 0, (s->clen - (BBB_CMNT_LEN + noise)),
BBB_OFFSET_SD);
if (bad_press <= S_CMNT_BBB)
{
type = (type < BBB_DEADBEATS) ?0:1;
memcpy(s->comment + offset, BBB_BASE, BBB_BASE_LEN);
if (type == 0)
memcpy(s->comment + BBB_BASE_LEN + offset + noise,
BBB_COMPLAIN, BBB_TYPE_LEN);
else
memcpy(s->comment + BBB_BASE_LEN + offset + noise,
BBB_COMMEND, BBB_TYPE_LEN);
}
return (0);
}
struct
{
char *mdes;
long days;
long dcnt;
} months[] =
{
{NULL, 0, 0},
{"JAN", 31, 31},
{"FEB", 28, 59},
{"MAR", 31, 90},
{"APR", 30, 120},
{"MAY", 31, 151},
{"JUN", 30, 181},
{"JUL", 31, 212},
{"AUG", 31, 243},
{"SEP", 30, 273},
{"OCT", 31, 304},
{"NOV", 30, 334},
{"DEC", 31, 365}
};
long
mk_time(DSS_HUGE index, dss_time_t *t)
{
long m = 0;
long y;
long d;
t->timekey = index + JDAY_BASE;
y = julian(index + STARTDATE - 1) / 1000;
d = julian(index + STARTDATE - 1) % 1000;
while (d > months[m].dcnt + LEAP_ADJ(y, m))
m++;
PR_DATE(t->alpha, y, m,
d - months[m - 1].dcnt - ((LEAP(y) && m > 2) ? 1 : 0));
t->year = 1900 + y;
t->month = m + 12 * y + JMNTH_BASE;
t->week = (d + T_START_DAY - 1) / 7 + 1;
t->day = d - months[m - 1].dcnt - LEAP_ADJ(y, m-1);
return (0);
}
int
mk_nation(DSS_HUGE index, code_t *c)
{
c->code = index - 1;
c->text = nations.list[index - 1].text;
c->join = nations.list[index - 1].weight;
TEXT(N_CMNT_LEN, N_CMNT_SD, c->comment);
c->clen = strlen(c->comment);
return(0);
}
int
mk_region(DSS_HUGE index, code_t *c)
{
c->code = index - 1;
c->text = regions.list[index - 1].text;
c->join = 0; /* for completeness */
TEXT(R_CMNT_LEN, R_CMNT_SD, c->comment);
c->clen = strlen(c->comment);
return(0);
}

View File

@ -0,0 +1,239 @@
/*
* $Id: config.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: config.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.3 2007/03/03 20:13:59 olteanu
* *** empty log message ***
*
* Revision 1.2 2007/03/03 19:05:06 olteanu
* *** empty log message ***
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.8 2007/01/04 21:29:21 jms
* Porting changes uncovered as part of move to VS2005. No impact on data set
*
* Revision 1.7 2006/06/29 20:46:17 jms
* 2.4.0 changes from Meikel
*
* Revision 1.6 2006/05/31 22:25:21 jms
* Rework UnifInt calls in varsub to handle lack of PROTO defn in windows
*
* Revision 1.5 2006/05/25 22:35:36 jms
* qgen porting changes for 32b/64b
*
* Revision 1.4 2006/03/09 18:54:55 jms
* porting bugs
*
* Revision 1.3 2005/03/04 19:48:39 jms
* Changes from Doug Johnson to address very large scale factors
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.7 2004/04/08 17:36:47 jms
* clarify config.h/makefile linkage
*
* Revision 1.6 2004/04/08 17:35:00 jms
* SUN/SOLARIS ifdef merge between machines
*
* Revision 1.5 2004/04/08 17:27:53 jms
* solaris porting fixes
*
* Revision 1.4 2003/08/12 16:45:26 jms
* linux porting changes
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* this file allows the compilation of DBGEN to be tailored to specific
* architectures and operating systems. Some options are grouped
* together to allow easier compilation on a given vendor's hardware.
*
* The following #defines will effect the code:
* KILL(pid) -- how to terminate a process in a parallel load
* SPAWN -- name of system call to clone an existing process
* SET_HANDLER(proc) -- name of routine to handle signals in parallel load
* WAIT(res, pid) -- how to await the termination of a child
* SEPARATOR -- character used to separate fields in flat files
* DBNAME -- default name of database to be loaded
* STDLIB_HAS_GETOPT -- to prevent confilcts with gloabal getopt()
* MDY_DATE -- generate dates as MM-DD-YY
* WIN32 -- support for WindowsNT
* SUPPORT_64BITS -- compiler defines a 64 bit datatype
* DSS_HUGE -- 64 bit data type
* HUGE_FORMAT -- printf string for 64 bit data type
* EOL_HANDLING -- flat files don't need final column separator
*
* Certain defines must be provided in the makefile:
* MACHINE defines
* ==========
* ATT -- getopt() handling
* DOS -- disable all multi-user functionality/dependency
* HP -- posix source inclusion differences
* IBM -- posix source inclusion differences
* SGI -- getopt() handling
* SUN -- getopt() handling
* LINUX
* WIN32 -- for WINDOWS
*
* DATABASE defines
* ================
* DB2 -- use DB2 dialect in QGEN
* INFORMIX -- use Informix dialect in QGEN
* SQLSERVER -- use SQLSERVER dialect in QGEN
* SYBASE -- use Sybase dialect in QGEN
* TDAT -- use Teradata dialect in QGEN
*
* WORKLOAD defines
* ================
* TPCH -- make will create TPCH (set in makefile)
*/
#ifdef DOS
#define DSS_PROC 1
#define PATH_SEP '\\'
#else
#ifdef ATT
#define STDLIB_HAS_GETOPT
#ifdef SQLSERVER
#define WIN32
#else
/* the 64 bit defines are for the Metaware compiler */
#define SUPPORT_64BITS
#define DSS_HUGE long long
#define RNG_A 6364136223846793005ull
#define RNG_C 1ull
#define HUGE_FORMAT "%LLd"
#define HUGE_DATE_FORMAT "%02LLd"
#endif /* SQLSERVER or MP/RAS */
#endif /* ATT */
#ifdef HP
#define _INCLUDE_POSIX_SOURCE
#define STDLIB_HAS_GETOPT
#endif /* HP */
#ifdef IBM
#define STDLIB_HAS_GETOPT
#define SUPPORT_64BITS
#define DSS_HUGE long long
#define HUGE_FORMAT "%lld"
#define HUGE_DATE_FORMAT "%02lld"
#define RNG_A 6364136223846793005ull
#define RNG_C 1ull
#endif /* IBM */
#ifdef LINUX
#define STDLIB_HAS_GETOPT
#define SUPPORT_64BITS
#define DSS_HUGE long long int
#define HUGE_FORMAT "%lld"
#define HUGE_DATE_FORMAT "%02lld"
#define RNG_A 6364136223846793005ull
#define RNG_C 1ull
#endif /* LINUX */
#ifdef SUN
#define STDLIB_HAS_GETOPT
#define RNG_A 6364136223846793005ull
#define RNG_C 1ull
#define SUPPORT_64BITS
#define DSS_HUGE long long
#define HUGE_FORMAT "%lld"
#define HUGE_DATE_FORMAT "%02lld"
#endif /* SUN */
#ifdef SGI
#define STDLIB_HAS_GETOPT
#define SUPPORT_64BITS
#define DSS_HUGE __int64_t
#endif /* SGI */
#if (defined(WIN32)&&!defined(_POSIX_))
#define pid_t int
#define SET_HANDLER(proc) signal(SIGINT, proc)
#define KILL(pid) \
TerminateProcess(OpenProcess(PROCESS_TERMINATE,FALSE,pid),3)
#if (defined (__WATCOMC__))
#define SPAWN() spawnv(P_NOWAIT, spawn_args[0], spawn_args)
#define WAIT(res, pid) cwait(res, pid, WAIT_CHILD)
#else
#define SPAWN() _spawnv(_P_NOWAIT, spawn_args[0], spawn_args)
#define WAIT(res, pid) _cwait(res, pid, _WAIT_CHILD)
#define getpid _getpid
#endif /* WATCOMC */
#define SIGS_DEFINED
#define PATH_SEP '\\'
#define SUPPORT_64BITS
#define DSS_HUGE __int64
#define RNG_A 6364136223846793005uI64
#define RNG_C 1uI64
#define HUGE_FORMAT "%I64d"
#define HUGE_DATE_FORMAT "%02I64d"
/* need to define process termination codes to match UNIX */
/* these are copied from Linux/GNU and need to be verified as part of a rework of */
/* process handling under NT (29 Apr 98) */
#define WIFEXITED(s) ((s & 0xFF) == 0)
#define WIFSIGNALED(s) (((unsigned int)((status)-1) & 0xFFFF) < 0xFF)
#define WIFSTOPPED(s) (((s) & 0xff) == 0x7f)
#define WTERMSIG(s) ((s) & 0x7f)
#define WSTOPSIG(s) (((s) & 0xff00) >> 8)
/* requried by move to Visual Studio 2005 */
#define strdup(x) _strdup(x)
#endif /* WIN32 */
#ifndef SIGS_DEFINED
#define KILL(pid) kill(SIGUSR1, pid)
#define SET_HANDLER(proc) signal(SIGUSR1, proc)
#define SPAWN fork
#define WAIT(res, pid) wait(res)
#endif /* DEFAULT */
#define DSS_PROC getpid()
#endif /* DOS */
#ifndef DBNAME
#define DBNAME "dss"
#endif /* DBNAME */
#ifndef PATH_SEP
#define PATH_SEP '/'
#endif /* PATH_SEP */
#ifndef DSS_HUGE
#error Support for a 64-bit datatype is required in this release
#endif
#ifndef DOUBLE_CAST
#define DOUBLE_CAST (double)
#endif /* DOUBLE_CAST */
/* added by olteanu */
#ifndef EOL_HANDLING
#define EOL_HANDLING 1
#endif /* EOL_HANDLING */
/* addition finished */

View File

@ -0,0 +1,168 @@
# Microsoft Developer Studio Project File - Name="dbgen" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=dbgen - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "dbgen.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "dbgen.mak" CFG="dbgen - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "dbgen - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "dbgen - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "dbgen - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "TPCH" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
!ELSEIF "$(CFG)" == "dbgen - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "TPCH" /FR /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
!ENDIF
# Begin Target
# Name "dbgen - Win32 Release"
# Name "dbgen - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=.\bm_utils.c
# End Source File
# Begin Source File
SOURCE=.\build.c
# End Source File
# Begin Source File
SOURCE=.\driver.c
# End Source File
# Begin Source File
SOURCE=.\load_stub.c
# End Source File
# Begin Source File
SOURCE=.\permute.c
# End Source File
# Begin Source File
SOURCE=.\print.c
# End Source File
# Begin Source File
SOURCE=.\rnd.c
# End Source File
# Begin Source File
SOURCE=.\rng64.c
# End Source File
# Begin Source File
SOURCE=.\speed_seed.c
# End Source File
# Begin Source File
SOURCE=.\text.c
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
SOURCE=.\config.h
# End Source File
# Begin Source File
SOURCE=.\dss.h
# End Source File
# Begin Source File
SOURCE=.\dsstypes.h
# End Source File
# Begin Source File
SOURCE=.\permute.h
# End Source File
# Begin Source File
SOURCE=.\rnd.h
# End Source File
# Begin Source File
SOURCE=.\rng64.h
# End Source File
# Begin Source File
SOURCE=.\shared.h
# End Source File
# Begin Source File
SOURCE=.\tpcd.h
# End Source File
# End Group
# Begin Group "Resource Files"
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project

View File

@ -0,0 +1,842 @@
#
# $Id: dists.dss,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
#
# Revision History
# ===================
# $Log: dists.dss,v $
# Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
#
#
# Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
#
#
# Revision 1.2 2005/01/03 20:08:58 jms
# change line terminations
#
# Revision 1.1.1.1 2004/11/24 23:31:46 jms
# re-establish external server
#
# Revision 1.1.1.1 2003/04/03 18:54:21 jms
# recreation after CVS crash
#
# Revision 1.1.1.1 2003/04/03 18:54:21 jms
# initial checkin
#
#
#
#
# distributions have the following format:
#
# <token> | <weight> # comment
#
# Distributions are used to bias the selection of a token
# based on its associated weight. The list of tokens and values
# between the keywords BEGIN and END define the distribution named after
# the BEGIN. A uniformly random value from [0, sum(weights)]
# will be chosen and the first token whose cumulative weight is greater than
# or equal to the result will be returned. In essence, the weights for each
# token represent its relative weight within a distribution.
#
# one special token is defined: count (number of data points in the
# distribution). It MUST be defined for each named distribution.
#-----------------------------------------------------------------------
# currently defined distributions and their use:
# NAME FIELD/NOTES
# ======== ==============
# category parts.category
# container parts.container
# instruct shipping instructions
# msegmnt market segment
# names parts.name
# nations must be ordered along with regions
# nations2 stand alone nations set for use with qgen
# o_prio order priority
# regions must be ordered along with nations
# rflag lineitems.returnflag
# types parts.type
# colors embedded string creation; CANNOT BE USED FOR pick_str(), agg_str() perturbs order
# articles comment generation
# nouns
# verbs
# adverbs
# auxillaries
# prepositions
# terminators
# grammar sentence formation
# np
# vp
###
# category
###
BEGIN category
COUNT|5
FURNITURE|1
STORAGE EQUIP|1
TOOLS|1
MACHINE TOOLS|1
OTHER|1
END category
###
# container
###
begin p_cntr
count|40
SM CASE|1
SM BOX|1
SM BAG|1
SM JAR|1
SM PACK|1
SM PKG|1
SM CAN|1
SM DRUM|1
LG CASE|1
LG BOX|1
LG BAG|1
LG JAR|1
LG PACK|1
LG PKG|1
LG CAN|1
LG DRUM|1
MED CASE|1
MED BOX|1
MED BAG|1
MED JAR|1
MED PACK|1
MED PKG|1
MED CAN|1
MED DRUM|1
JUMBO CASE|1
JUMBO BOX|1
JUMBO BAG|1
JUMBO JAR|1
JUMBO PACK|1
JUMBO PKG|1
JUMBO CAN|1
JUMBO DRUM|1
WRAP CASE|1
WRAP BOX|1
WRAP BAG|1
WRAP JAR|1
WRAP PACK|1
WRAP PKG|1
WRAP CAN|1
WRAP DRUM|1
end p_cntr
###
# instruct
###
begin instruct
count|4
DELIVER IN PERSON|1
COLLECT COD|1
TAKE BACK RETURN|1
NONE|1
end instruct
###
# msegmnt
###
begin msegmnt
count|5
AUTOMOBILE|1
BUILDING|1
FURNITURE|1
HOUSEHOLD|1
MACHINERY|1
end msegmnt
###
# names
###
begin p_names
COUNT|4
CLEANER|1
SOAP|1
DETERGENT|1
EXTRA|1
end p_names
###
# nations
# NOTE: this is a special case; the weights here are adjustments to
# map correctly into the regions table, and are *NOT* cummulative
# values to mimic a distribution
###
begin nations
count|25
ALGERIA|0
ARGENTINA|1
BRAZIL|0
CANADA|0
EGYPT|3
ETHIOPIA|-4
FRANCE|3
GERMANY|0
INDIA|-1
INDONESIA|0
IRAN|2
IRAQ|0
JAPAN|-2
JORDAN|2
KENYA|-4
MOROCCO|0
MOZAMBIQUE|0
PERU|1
CHINA|1
ROMANIA|1
SAUDI ARABIA|1
VIETNAM|-2
RUSSIA|1
UNITED KINGDOM|0
UNITED STATES|-2
end nations
###
# nations2
###
begin nations2
count|25
ALGERIA|1
ARGENTINA|1
BRAZIL|1
CANADA|1
EGYPT|1
ETHIOPIA|1
FRANCE|1
GERMANY|1
INDIA|1
INDONESIA|1
IRAN|1
IRAQ|1
JAPAN|1
JORDAN|1
KENYA|1
MOROCCO|1
MOZAMBIQUE|1
PERU|1
CHINA|1
ROMANIA|1
SAUDI ARABIA|1
VIETNAM|1
RUSSIA|1
UNITED KINGDOM|1
UNITED STATES|1
end nations2
###
# regions
###
begin regions
count|5
AFRICA|1
AMERICA|1
ASIA|1
EUROPE|1
MIDDLE EAST|1
end regions
###
# o_prio
###
begin o_oprio
count|5
1-URGENT|1
2-HIGH|1
3-MEDIUM|1
4-NOT SPECIFIED|1
5-LOW|1
end o_oprio
###
# rflag
###
begin rflag
count|2
R|1
A|1
end rflag
###
# smode
###
begin smode
count|7
REG AIR|1
AIR|1
RAIL|1
TRUCK|1
MAIL|1
FOB|1
SHIP|1
end smode
###
# types
###
begin p_types
COUNT|150
STANDARD ANODIZED TIN|1
STANDARD ANODIZED NICKEL|1
STANDARD ANODIZED BRASS|1
STANDARD ANODIZED STEEL|1
STANDARD ANODIZED COPPER|1
STANDARD BURNISHED TIN|1
STANDARD BURNISHED NICKEL|1
STANDARD BURNISHED BRASS|1
STANDARD BURNISHED STEEL|1
STANDARD BURNISHED COPPER|1
STANDARD PLATED TIN|1
STANDARD PLATED NICKEL|1
STANDARD PLATED BRASS|1
STANDARD PLATED STEEL|1
STANDARD PLATED COPPER|1
STANDARD POLISHED TIN|1
STANDARD POLISHED NICKEL|1
STANDARD POLISHED BRASS|1
STANDARD POLISHED STEEL|1
STANDARD POLISHED COPPER|1
STANDARD BRUSHED TIN|1
STANDARD BRUSHED NICKEL|1
STANDARD BRUSHED BRASS|1
STANDARD BRUSHED STEEL|1
STANDARD BRUSHED COPPER|1
SMALL ANODIZED TIN|1
SMALL ANODIZED NICKEL|1
SMALL ANODIZED BRASS|1
SMALL ANODIZED STEEL|1
SMALL ANODIZED COPPER|1
SMALL BURNISHED TIN|1
SMALL BURNISHED NICKEL|1
SMALL BURNISHED BRASS|1
SMALL BURNISHED STEEL|1
SMALL BURNISHED COPPER|1
SMALL PLATED TIN|1
SMALL PLATED NICKEL|1
SMALL PLATED BRASS|1
SMALL PLATED STEEL|1
SMALL PLATED COPPER|1
SMALL POLISHED TIN|1
SMALL POLISHED NICKEL|1
SMALL POLISHED BRASS|1
SMALL POLISHED STEEL|1
SMALL POLISHED COPPER|1
SMALL BRUSHED TIN|1
SMALL BRUSHED NICKEL|1
SMALL BRUSHED BRASS|1
SMALL BRUSHED STEEL|1
SMALL BRUSHED COPPER|1
MEDIUM ANODIZED TIN|1
MEDIUM ANODIZED NICKEL|1
MEDIUM ANODIZED BRASS|1
MEDIUM ANODIZED STEEL|1
MEDIUM ANODIZED COPPER|1
MEDIUM BURNISHED TIN|1
MEDIUM BURNISHED NICKEL|1
MEDIUM BURNISHED BRASS|1
MEDIUM BURNISHED STEEL|1
MEDIUM BURNISHED COPPER|1
MEDIUM PLATED TIN|1
MEDIUM PLATED NICKEL|1
MEDIUM PLATED BRASS|1
MEDIUM PLATED STEEL|1
MEDIUM PLATED COPPER|1
MEDIUM POLISHED TIN|1
MEDIUM POLISHED NICKEL|1
MEDIUM POLISHED BRASS|1
MEDIUM POLISHED STEEL|1
MEDIUM POLISHED COPPER|1
MEDIUM BRUSHED TIN|1
MEDIUM BRUSHED NICKEL|1
MEDIUM BRUSHED BRASS|1
MEDIUM BRUSHED STEEL|1
MEDIUM BRUSHED COPPER|1
LARGE ANODIZED TIN|1
LARGE ANODIZED NICKEL|1
LARGE ANODIZED BRASS|1
LARGE ANODIZED STEEL|1
LARGE ANODIZED COPPER|1
LARGE BURNISHED TIN|1
LARGE BURNISHED NICKEL|1
LARGE BURNISHED BRASS|1
LARGE BURNISHED STEEL|1
LARGE BURNISHED COPPER|1
LARGE PLATED TIN|1
LARGE PLATED NICKEL|1
LARGE PLATED BRASS|1
LARGE PLATED STEEL|1
LARGE PLATED COPPER|1
LARGE POLISHED TIN|1
LARGE POLISHED NICKEL|1
LARGE POLISHED BRASS|1
LARGE POLISHED STEEL|1
LARGE POLISHED COPPER|1
LARGE BRUSHED TIN|1
LARGE BRUSHED NICKEL|1
LARGE BRUSHED BRASS|1
LARGE BRUSHED STEEL|1
LARGE BRUSHED COPPER|1
ECONOMY ANODIZED TIN|1
ECONOMY ANODIZED NICKEL|1
ECONOMY ANODIZED BRASS|1
ECONOMY ANODIZED STEEL|1
ECONOMY ANODIZED COPPER|1
ECONOMY BURNISHED TIN|1
ECONOMY BURNISHED NICKEL|1
ECONOMY BURNISHED BRASS|1
ECONOMY BURNISHED STEEL|1
ECONOMY BURNISHED COPPER|1
ECONOMY PLATED TIN|1
ECONOMY PLATED NICKEL|1
ECONOMY PLATED BRASS|1
ECONOMY PLATED STEEL|1
ECONOMY PLATED COPPER|1
ECONOMY POLISHED TIN|1
ECONOMY POLISHED NICKEL|1
ECONOMY POLISHED BRASS|1
ECONOMY POLISHED STEEL|1
ECONOMY POLISHED COPPER|1
ECONOMY BRUSHED TIN|1
ECONOMY BRUSHED NICKEL|1
ECONOMY BRUSHED BRASS|1
ECONOMY BRUSHED STEEL|1
ECONOMY BRUSHED COPPER|1
PROMO ANODIZED TIN|1
PROMO ANODIZED NICKEL|1
PROMO ANODIZED BRASS|1
PROMO ANODIZED STEEL|1
PROMO ANODIZED COPPER|1
PROMO BURNISHED TIN|1
PROMO BURNISHED NICKEL|1
PROMO BURNISHED BRASS|1
PROMO BURNISHED STEEL|1
PROMO BURNISHED COPPER|1
PROMO PLATED TIN|1
PROMO PLATED NICKEL|1
PROMO PLATED BRASS|1
PROMO PLATED STEEL|1
PROMO PLATED COPPER|1
PROMO POLISHED TIN|1
PROMO POLISHED NICKEL|1
PROMO POLISHED BRASS|1
PROMO POLISHED STEEL|1
PROMO POLISHED COPPER|1
PROMO BRUSHED TIN|1
PROMO BRUSHED NICKEL|1
PROMO BRUSHED BRASS|1
PROMO BRUSHED STEEL|1
PROMO BRUSHED COPPER|1
end p_types
###
# colors
# NOTE: This distribution CANNOT be used by pick_str(), since agg_str() perturbs its order
###
begin colors
COUNT|92
almond|1
antique|1
aquamarine|1
azure|1
beige|1
bisque|1
black|1
blanched|1
blue|1
blush|1
brown|1
burlywood|1
burnished|1
chartreuse|1
chiffon|1
chocolate|1
coral|1
cornflower|1
cornsilk|1
cream|1
cyan|1
dark|1
deep|1
dim|1
dodger|1
drab|1
firebrick|1
floral|1
forest|1
frosted|1
gainsboro|1
ghost|1
goldenrod|1
green|1
grey|1
honeydew|1
hot|1
indian|1
ivory|1
khaki|1
lace|1
lavender|1
lawn|1
lemon|1
light|1
lime|1
linen|1
magenta|1
maroon|1
medium|1
metallic|1
midnight|1
mint|1
misty|1
moccasin|1
navajo|1
navy|1
olive|1
orange|1
orchid|1
pale|1
papaya|1
peach|1
peru|1
pink|1
plum|1
powder|1
puff|1
purple|1
red|1
rose|1
rosy|1
royal|1
saddle|1
salmon|1
sandy|1
seashell|1
sienna|1
sky|1
slate|1
smoke|1
snow|1
spring|1
steel|1
tan|1
thistle|1
tomato|1
turquoise|1
violet|1
wheat|1
white|1
yellow|1
end colors
################
################
## psuedo text distributions
################
################
###
# nouns
###
BEGIN nouns
COUNT|45
packages|40
requests|40
accounts|40
deposits|40
foxes|20
ideas|20
theodolites|20
pinto beans|20
instructions|20
dependencies|10
excuses|10
platelets|10
asymptotes|10
courts|5
dolphins|5
multipliers|1
sauternes|1
warthogs|1
frets|1
dinos|1
attainments|1
somas|1
Tiresias|1
patterns|1
forges|1
braids|1
frays|1
warhorses|1
dugouts|1
notornis|1
epitaphs|1
pearls|1
tithes|1
waters|1
orbits|1
gifts|1
sheaves|1
depths|1
sentiments|1
decoys|1
realms|1
pains|1
grouches|1
escapades|1
hockey players|1
END nouns
###
# verbs
###
BEGIN verbs
COUNT|40
sleep|20
wake|20
are|20
cajole|20
haggle|20
nag|10
use|10
boost|10
affix|5
detect|5
integrate|5
maintain|1
nod|1
was|1
lose|1
sublate|1
solve|1
thrash|1
promise|1
engage|1
hinder|1
print|1
x-ray|1
breach|1
eat|1
grow|1
impress|1
mold|1
poach|1
serve|1
run|1
dazzle|1
snooze|1
doze|1
unwind|1
kindle|1
play|1
hang|1
believe|1
doubt|1
END verbs
###
# adverbs
##
BEGIN adverbs
COUNT|28
sometimes|1
always|1
never|1
furiously|50
slyly|50
carefully|50
blithely|40
quickly|30
fluffily|20
slowly|1
quietly|1
ruthlessly|1
thinly|1
closely|1
doggedly|1
daringly|1
bravely|1
stealthily|1
permanently|1
enticingly|1
idly|1
busily|1
regularly|1
finally|1
ironically|1
evenly|1
boldly|1
silently|1
END adverbs
###
# articles
##
BEGIN articles
COUNT|3
the|50
a|20
an|5
END articles
###
# prepositions
##
BEGIN prepositions
COUNT|47
about|50
above|50
according to|50
across|50
after|50
against|40
along|40
alongside of|30
among|30
around|20
at|10
atop|1
before|1
behind|1
beneath|1
beside|1
besides|1
between|1
beyond|1
by|1
despite|1
during|1
except|1
for|1
from|1
in place of|1
inside|1
instead of|1
into|1
near|1
of|1
on|1
outside|1
over|1
past|1
since|1
through|1
throughout|1
to|1
toward|1
under|1
until|1
up|1
upon|1
whithout|1
with|1
within|1
END prepositions
###
# auxillaries
##
BEGIN auxillaries
COUNT|18
do|1
may|1
might|1
shall|1
will|1
would|1
can|1
could|1
should|1
ought to|1
must|1
will have to|1
shall have to|1
could have to|1
should have to|1
must have to|1
need to|1
try to|1
END auxiallaries
###
# terminators
##
BEGIN terminators
COUNT|6
.|50
;|1
:|1
?|1
!|1
--|1
END terminators
###
# adjectives
##
BEGIN adjectives
COUNT|29
special|20
pending|20
unusual|20
express|20
furious|1
sly|1
careful|1
blithe|1
quick|1
fluffy|1
slow|1
quiet|1
ruthless|1
thin|1
close|1
dogged|1
daring|1
brave|1
stealthy|1
permanent|1
enticing|1
idle|1
busy|1
regular|50
final|40
ironic|40
even|30
bold|20
silent|10
END adjectives
###
# grammar
# first level grammar. N=noun phrase, V=verb phrase,
# P=prepositional phrase, T=setence termination
##
BEGIN grammar
COUNT|5
N V T|3
N V P T|3
N V N T|3
N P V N T|1
N P V P T|1
END grammar
###
# NP
# second level grammar. Noun phrases. N=noun, A=article,
# J=adjective, D=adverb
##
BEGIN np
COUNT|4
N|10
J N|20
J, J N|10
D J N|50
END np
###
# VP
# second level grammar. Verb phrases. V=verb, X=auxiallary,
# D=adverb
##
BEGIN vp
COUNT|4
V|30
X V|1
V D|40
X V D|1
END vp
###
# Q13
# Substitution parameters for Q13
##
BEGIN Q13a
COUNT|4
special|20
pending|20
unusual|20
express|20
END Q13a
BEGIN Q13b
COUNT|4
packages|40
requests|40
accounts|40
deposits|40
END Q13b

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,84 @@
-- Sccsid: @(#)dss.ddl 2.1.8.1
CREATE TABLE NATION ( TID INTEGER NOT NULL,
N_NATIONKEY INTEGER,
N_NAME CHAR(25),
N_REGIONKEY INTEGER,
N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( TID INTEGER NOT NULL,
R_REGIONKEY INTEGER,
R_NAME CHAR(25),
R_COMMENT VARCHAR(152));
CREATE TABLE PART ( TID INTEGER NOT NULL,
P_PARTKEY INTEGER,
P_NAME VARCHAR(55),
P_MFGR CHAR(25),
P_BRAND CHAR(10),
P_TYPE VARCHAR(25),
P_SIZE INTEGER,
P_CONTAINER CHAR(10),
P_RETAILPRICE DECIMAL(15,2),
P_COMMENT VARCHAR(23) );
CREATE TABLE SUPPLIER ( TID INTEGER NOT NULL,
S_SUPPKEY INTEGER,
S_NAME CHAR(25),
S_ADDRESS VARCHAR(40),
S_NATIONKEY INTEGER,
S_PHONE CHAR(15),
S_ACCTBAL DECIMAL(15,2),
S_COMMENT VARCHAR(101));
CREATE TABLE PARTSUPP ( TID INTEGER NOT NULL,
PS_PARTKEY INTEGER,
PS_SUPPKEY INTEGER,
PS_AVAILQTY INTEGER,
PS_SUPPLYCOST DECIMAL(15,2) ,
PS_COMMENT VARCHAR(199) );
CREATE TABLE CUSTOMER ( TID INTEGER NOT NULL,
C_CUSTKEY INTEGER,
C_NAME VARCHAR(25),
C_ADDRESS VARCHAR(40),
C_NATIONKEY INTEGER,
C_PHONE CHAR(15),
C_ACCTBAL DECIMAL(15,2) ,
C_MKTSEGMENT CHAR(10),
C_COMMENT VARCHAR(117));
CREATE TABLE ORDERS ( TID INTEGER NOT NULL,
O_ORDERKEY INTEGER,
O_CUSTKEY INTEGER,
O_ORDERSTATUS CHAR(1),
O_TOTALPRICE DECIMAL(15,2),
O_ORDERDATE DATE,
O_ORDERPRIORITY CHAR(15),
O_CLERK CHAR(15),
O_SHIPPRIORITY INTEGER,
O_COMMENT VARCHAR(79));
CREATE TABLE LINEITEM ( TID INTEGER NOT NULL,
L_ORDERKEY INTEGER,
L_PARTKEY INTEGER,
L_SUPPKEY INTEGER,
L_LINENUMBER INTEGER,
L_QUANTITY DECIMAL(15,2),
L_EXTENDEDPRICE DECIMAL(15,2),
L_DISCOUNT DECIMAL(15,2),
L_TAX DECIMAL(15,2),
L_RETURNFLAG CHAR(1),
L_LINESTATUS CHAR(1),
L_SHIPDATE DATE,
L_COMMITDATE DATE,
L_RECEIPTDATE DATE,
L_SHIPINSTRUCT CHAR(25),
L_SHIPMODE CHAR(10),
L_COMMENT VARCHAR(44));
CREATE TABLE C ( REL VARCHAR(44) NOT NULL,
TID INTEGER NOT NULL,
COL VARCHAR(44) NOT NULL,
CID INTEGER NOT NULL,
WID INTEGER NOT NULL,
VALUE VARCHAR(199) NOT NULL);

View File

@ -0,0 +1,78 @@
-- Sccsid: @(#)dss.ddl 2.1.8.1
CREATE TABLE NATION ( TID INTEGER NOT NULL,
N_NATIONKEY INTEGER NOT NULL,
N_NAME CHAR(25) NOT NULL,
N_REGIONKEY INTEGER NOT NULL,
N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( TID INTEGER NOT NULL,
R_REGIONKEY INTEGER NOT NULL,
R_NAME CHAR(25) NOT NULL,
R_COMMENT VARCHAR(152));
CREATE TABLE PART ( TID INTEGER NOT NULL,
P_PARTKEY INTEGER NOT NULL,
P_NAME VARCHAR(55) NOT NULL,
P_MFGR CHAR(25) NOT NULL,
P_BRAND CHAR(10) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER CHAR(10) NOT NULL,
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
P_COMMENT VARCHAR(23) NOT NULL );
CREATE TABLE SUPPLIER ( TID INTEGER NOT NULL,
S_SUPPKEY INTEGER NOT NULL,
S_NAME CHAR(25) NOT NULL,
S_ADDRESS VARCHAR(40) NOT NULL,
S_NATIONKEY INTEGER NOT NULL,
S_PHONE CHAR(15) NOT NULL,
S_ACCTBAL DECIMAL(15,2) NOT NULL,
S_COMMENT VARCHAR(101) NOT NULL);
CREATE TABLE PARTSUPP ( TID INTEGER NOT NULL,
PS_PARTKEY INTEGER NOT NULL,
PS_SUPPKEY INTEGER NOT NULL,
PS_AVAILQTY INTEGER NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT VARCHAR(199) NOT NULL );
CREATE TABLE CUSTOMER ( TID INTEGER NOT NULL,
C_CUSTKEY INTEGER NOT NULL,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_NATIONKEY INTEGER NOT NULL,
C_PHONE CHAR(15) NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT CHAR(10) NOT NULL,
C_COMMENT VARCHAR(117) NOT NULL);
CREATE TABLE ORDERS ( TID INTEGER NOT NULL,
O_ORDERKEY INTEGER NOT NULL,
O_CUSTKEY INTEGER NOT NULL,
O_ORDERSTATUS CHAR(1) NOT NULL,
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
O_ORDERDATE DATE NOT NULL,
O_ORDERPRIORITY CHAR(15) NOT NULL,
O_CLERK CHAR(15) NOT NULL,
O_SHIPPRIORITY INTEGER NOT NULL,
O_COMMENT VARCHAR(79) NOT NULL);
CREATE TABLE LINEITEM ( TID INTEGER NOT NULL,
L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL);

View File

@ -0,0 +1,806 @@
/*
* $Id: dss.h,v 1.13 2007/03/21 04:05:15 olteanu Exp $
*
* Revision History
* ===================
* $Log: dss.h,v $
* Revision 1.13 2007/03/21 04:05:15 olteanu
* *** empty log message ***
*
* Revision 1.12 2007/03/21 01:42:57 olteanu
* *** empty log message ***
*
* Revision 1.11 2007/03/19 21:07:58 olteanu
* *** empty log message ***
*
* Revision 1.10 2007/03/18 13:04:49 olteanu
* *** empty log message ***
*
* Revision 1.9 2007/03/16 22:40:17 olteanu
* *** empty log message ***
*
* Revision 1.8 2007/03/16 21:30:50 olteanu
* *** empty log message ***
*
* Revision 1.7 2007/03/15 18:49:36 olteanu
* *** empty log message ***
*
* Revision 1.6 2007/03/15 13:35:25 olteanu
* *** empty log message ***
*
* Revision 1.5 2007/03/15 12:31:50 olteanu
* *** empty log message ***
*
* Revision 1.4 2007/03/14 21:03:01 olteanu
* *** empty log message ***
*
* Revision 1.3 2007/03/14 20:19:06 olteanu
* *** empty log message ***
*
* Revision 1.2 2007/03/14 17:57:11 olteanu
* *** empty log message ***
*
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.2 2007/03/11 16:16:40 jansen
* -m max_placeholders
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.9 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.8 2006/03/09 18:55:29 jms
* remove vestigial cvs merge marker
*
* Revision 1.7 2005/10/28 03:05:05 jms
* up maximum scale to 100TB
*
* Revision 1.6 2005/10/28 02:55:26 jms
* add release.h changes
*
* Revision 1.5 2005/10/27 18:13:03 jms
* a_rnd() prototype correction
*
* Revision 1.4 2005/10/25 17:58:59 jms
* update version stamp
*
* Revision 1.3 2005/03/04 19:48:39 jms
* Changes from Doug Johnson to address very large scale factors
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.5 2004/04/08 17:34:15 jms
* cleanup SOLARIS/SUN ifdefs; now all use SUN
*
* Revision 1.4 2004/04/07 20:17:29 jms
* bug #58 (join fails between order/lineitem)
*
* Revision 1.3 2004/03/16 14:37:53 jms
* update version and copyright date; correct comment typo
*
* Revision 1.2 2004/02/18 14:07:20 jms
* change to version 2.1.0
*
* Revision 1.1.1.1 2003/08/08 21:50:33 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* general definitions and control information for the DSS code
* generator; if it controls the data set, it's here
*/
#ifndef DSS_H
#define DSS_H
#ifdef TPCH
#define NAME "TPC-H"
#endif
#ifdef TPCR
#define NAME "TPC-R"
#endif
#ifndef NAME
#error Benchmark version must be defined in config.h
#endif
#define TPC "Transaction Processing Performance Council"
#define C_DATES "1994 - 2005"
#include "config.h"
#include "shared.h"
#include <stdio.h>
#include <stdlib.h>
#define NONE -1
#define PART 0
#define PSUPP 1
#define SUPP 2
#define CUST 3
#define ORDER 4
#define LINE 5
#define ORDER_LINE 6
#define PART_PSUPP 7
#define NATION 8
#define REGION 9
#define UPDATE 10
#define MAX_TABLE 11
#define ONE_STREAM 1
#define ADD_AT_END 2
#define PART_p_PARTKEY 10
#define PART_p_NAME 11
#define PART_p_MFGR 12
#define PART_p_BRAND 13
#define PART_p_TYPE 14
#define PART_p_SIZE 15
#define PART_p_CONTAINER 16
#define PART_p_RETAILPRICE 17
#define PART_p_COMMENT 18
#define SUPP_s_SUPPKEY 19
#define SUPP_s_NAME 20
#define SUPP_s_ADDRESS 21
#define SUPP_s_NATIONKEY 22
#define SUPP_s_PHONE 23
#define SUPP_s_ACCTBAL 24
#define SUPP_s_COMMENT 25
#define PSUPP_ps_PARTKEY 26
#define PSUPP_ps_SUPPKEY 27
#define PSUPP_ps_AVAILQTY 28
#define PSUPP_ps_SUPPLYCOST 29
#define PSUPP_ps_COMMENT 30
#define CUST_c_CUSTKEY 31
#define CUST_c_NAME 32
#define CUST_c_ADDRESS 33
#define CUST_c_NATIONKEY 34
#define CUST_c_PHONE 35
#define CUST_c_ACCTBAL 36
#define CUST_c_MKTSEGMENT 37
#define CUST_c_COMMENT 38
#define ORDERS_o_ORDERKEY 39
#define ORDERS_o_CUSTKEY 40
#define ORDERS_o_ORDERSTATUS 41
#define ORDERS_o_TOTALPRICE 42
#define ORDERS_o_ORDERDATE 43
#define ORDERS_o_ORDERPRIORITY 44
#define ORDERS_o_CLERK 45
#define ORDERS_o_SHIPPRIORITY 46
#define ORDERS_o_COMMENT 47
#define LINEITEM_l_ORDERKEY 48
#define LINEITEM_l_PARTKEY 49
#define LINEITEM_l_SUPPKEY 50
#define LINEITEM_l_LINENUMBER 51
#define LINEITEM_l_QUANTITY 52
#define LINEITEM_l_EXTENDEDPRICE 53
#define LINEITEM_l_DISCOUNT 54
#define LINEITEM_l_TAX 55
#define LINEITEM_l_RETURNFLAG 56
#define LINEITEM_l_LINESTATUS 57
#define LINEITEM_l_SHIPDATE 58
#define LINEITEM_l_COMMITDATE 59
#define LINEITEM_l_RECEIPTDATE 60
#define LINEITEM_l_SHIPINSTRUCT 61
#define LINEITEM_l_SHIPMODE 62
#define LINEITEM_l_COMMENT 63
#define NATION_n_NATIONKEY 64
#define NATION_n_NAME 65
#define NATION_n_REGIONKEY 66
#define NATION_n_COMMENT 67
#define REGION_r_REGIONKEY 68
#define REGION_r_NAME 69
#define REGION_r_COMMENT 70
#ifdef MAX
#undef MAX
#endif
#ifdef MIN
#undef MIN
#endif
#define MAX(a,b) ((a > b )?a:b)
#define MIN(A,B) ( (A) < (B) ? (A) : (B))
#define INTERNAL_ERROR(p) {fprintf(stderr,"%s", p);abort();}
#define LN_CNT 4
static char lnoise[4] = {'|', '/', '-', '\\' };
#define LIFENOISE(n, var) \
if (verbose > 0) fprintf(stderr, "%c\b", lnoise[(var%LN_CNT)])
#define MALLOC_CHECK(var) \
if ((var) == NULL) \
{ \
fprintf(stderr, "Malloc failed at %s:%d\n", \
__FILE__, __LINE__); \
exit(1);\
}
#define OPEN_CHECK(var, path) \
if ((var) == NULL) \
{ \
fprintf(stderr, "Open failed for %s at %s:%d\n", \
path, __FILE__, __LINE__); \
exit(1);\
}
#ifndef MAX_CHILDREN
#define MAX_CHILDREN 1000
#endif
/*
* macros that control sparse keys
*
* refer to Porting.Notes for a complete explanation
*/
#ifndef BITS_PER_LONG
#define BITS_PER_LONG 32
#define MAX_LONG 0x7FFFFFFF
#endif /* BITS_PER_LONG */
#define SPARSE_BITS 2
#define SPARSE_KEEP 3
#define MK_SPARSE(key, seq) \
(((((key>>3)<<2)|(seq & 0x0003))<<3)|(key & 0x0007))
#define RANDOM(tgt, lower, upper, stream) dss_random(&tgt, lower, upper, stream)
#define RANDOM64(tgt, lower, upper, stream) dss_random64(&tgt, lower, upper, stream)
struct _Placeholder
{
unsigned table;
long tid;
unsigned column;
};
typedef struct _Placeholder Placeholder;
typedef struct
{
long weight;
char *text;
} set_member;
typedef struct
{
int count;
int max;
set_member *list;
long *permute;
} distribution;
/*
* some handy access functions
*/
#define DIST_SIZE(d) d->count
#define DIST_MEMBER(d, i) ((set_member *)((d)->list + i))->text
typedef struct
{
char *name;
char *comment;
DSS_HUGE base;
int (*header) ();
int (*loader[2]) ();
long (*gen_seed)();
int (*verify) ();
int child;
DSS_HUGE vtotal;
} tdef;
typedef struct SEED_T {
long table;
DSS_HUGE value;
DSS_HUGE usage;
DSS_HUGE boundary;
#ifdef RNG_TEST
DSS_HUGE nCalls;
#endif
} seed_t;
#if defined(__STDC__)
#define PROTO(s) s
#else
#define PROTO(s) ()
#endif
/* bm_utils.c */
char *env_config PROTO((char *var, char *dflt));
long yes_no PROTO((char *prompt));
void a_rnd PROTO((int min, int max, int column, char *dest));
int tx_rnd PROTO((long min, long max, long column, char *tgt));
long julian PROTO((long date));
long unjulian PROTO((long date));
FILE *tbl_open PROTO((int tbl, char *mode));
long dssncasecmp PROTO((char *s1, char *s2, int n));
long dsscasecmp PROTO((char *s1, char *s2));
int pick_str PROTO((distribution * s, int c, char *target));
void agg_str PROTO((distribution *set, long count, long col, char *dest));
void read_dist PROTO((char *path, char *name, distribution * target));
void embed_str PROTO((distribution *d, int min, int max, int stream, char *dest));
#ifndef STDLIB_HAS_GETOPT
int getopt PROTO((int arg_cnt, char **arg_vect, char *oprions));
#endif /* STDLIB_HAS_GETOPT */
DSS_HUGE set_state PROTO((int t, long scale, long procs, long step, DSS_HUGE *e));
/* rnd.c */
DSS_HUGE NextRand PROTO((DSS_HUGE nSeed));
DSS_HUGE UnifInt PROTO((DSS_HUGE nLow, DSS_HUGE nHigh, long nStream));
void dss_random(DSS_HUGE *tgt, DSS_HUGE min, DSS_HUGE max, long seed);
void row_start(int t);
void row_stop(int t);
void dump_seeds(int t);
/* text.c */
#define MAX_GRAMMAR_LEN 12 /* max length of grammar component */
#define MAX_SENT_LEN 256 /* max length of populated sentence */
#define RNG_PER_SENT 27 /* max number of RNG calls per sentence */
void dbg_text PROTO((char * t, int min, int max, int s));
#ifdef DECLARER
#define EXTERN
#else
#define EXTERN extern
#endif /* DECLARER */
EXTERN distribution nations;
EXTERN distribution nations2;
EXTERN distribution regions;
EXTERN distribution o_priority_set;
EXTERN distribution l_instruct_set;
EXTERN distribution l_smode_set;
EXTERN distribution l_category_set;
EXTERN distribution l_rflag_set;
EXTERN distribution c_mseg_set;
EXTERN distribution colors;
EXTERN distribution p_types_set;
EXTERN distribution p_cntr_set;
/* distributions that control text generation */
EXTERN distribution articles;
EXTERN distribution nouns;
EXTERN distribution adjectives;
EXTERN distribution adverbs;
EXTERN distribution prepositions;
EXTERN distribution verbs;
EXTERN distribution terminators;
EXTERN distribution auxillaries;
EXTERN distribution np;
EXTERN distribution vp;
EXTERN distribution grammar;
EXTERN long scale;
EXTERN int refresh;
EXTERN int resume;
EXTERN long verbose;
EXTERN long force;
EXTERN long header;
EXTERN long columnar;
EXTERN long direct;
EXTERN long updates;
EXTERN long table;
EXTERN long children;
EXTERN long fnames;
EXTERN int gen_sql;
EXTERN int gen_rng;
EXTERN char *db_name;
EXTERN int step;
EXTERN int set_seeds;
EXTERN int validate;
EXTERN char *d_path;
/* added for segmented updates */
EXTERN int insert_segments;
EXTERN int delete_segments;
EXTERN int insert_orders_segment;
EXTERN int insert_lineitem_segment;
EXTERN int delete_segment;
/* added by olteanu for U-relations */
EXTERN double placeholders_ratio; /* in (0,1) */
EXTERN double Zipf_component_ratio; /* in (0,1) */
EXTERN int max_placeholders; /* default: 8 */
EXTERN DSS_HUGE tid_line;
EXTERN DSS_HUGE tid_partsupp;
EXTERN Placeholder** placeholder_set;
EXTERN long placeholders_size;
EXTERN long placeholders_idx;
EXTERN long crt_cid;
EXTERN double stat_max_lworlds;
EXTERN double stat_avg_lworlds;
EXTERN double stat_total_amount_worlds;
EXTERN long upper;
EXTERN long threshold;
EXTERN FILE *part_p_partkey;
EXTERN FILE *part_p_name;
EXTERN FILE *part_p_mfgr;
EXTERN FILE *part_p_brand;
EXTERN FILE *part_p_type;
EXTERN FILE *part_p_size;
EXTERN FILE *part_p_container;
EXTERN FILE *part_p_retailprice;
EXTERN FILE *part_p_comment;
EXTERN FILE *supp_s_suppkey;
EXTERN FILE *supp_s_name;
EXTERN FILE *supp_s_address;
EXTERN FILE *supp_s_nationkey;
EXTERN FILE *supp_s_phone;
EXTERN FILE *supp_s_acctbal;
EXTERN FILE *supp_s_comment;
EXTERN FILE *psupp_ps_partkey;
EXTERN FILE *psupp_ps_suppkey;
EXTERN FILE *psupp_ps_availqty;
EXTERN FILE *psupp_ps_supplycost;
EXTERN FILE *psupp_ps_comment;
EXTERN FILE *cust_c_custkey;
EXTERN FILE *cust_c_name;
EXTERN FILE *cust_c_address;
EXTERN FILE *cust_c_nationkey;
EXTERN FILE *cust_c_phone;
EXTERN FILE *cust_c_acctbal;
EXTERN FILE *cust_c_mktsegment;
EXTERN FILE *cust_c_comment;
EXTERN FILE *orders_o_orderkey;
EXTERN FILE *orders_o_custkey;
EXTERN FILE *orders_o_orderstatus;
EXTERN FILE *orders_o_totalprice;
EXTERN FILE *orders_o_orderdate;
EXTERN FILE *orders_o_orderpriority;
EXTERN FILE *orders_o_clerk;
EXTERN FILE *orders_o_shippriority;
EXTERN FILE *orders_o_comment;
EXTERN FILE *lineitem_l_orderkey;
EXTERN FILE *lineitem_l_partkey;
EXTERN FILE *lineitem_l_suppkey;
EXTERN FILE *lineitem_l_linenumber;
EXTERN FILE *lineitem_l_quantity;
EXTERN FILE *lineitem_l_extendedprice;
EXTERN FILE *lineitem_l_discount;
EXTERN FILE *lineitem_l_tax;
EXTERN FILE *lineitem_l_returnflag;
EXTERN FILE *lineitem_l_linestatus;
EXTERN FILE *lineitem_l_shipdate;
EXTERN FILE *lineitem_l_commitdate;
EXTERN FILE *lineitem_l_receiptdate;
EXTERN FILE *lineitem_l_shipinstruct;
EXTERN FILE *lineitem_l_shipmode;
EXTERN FILE *lineitem_l_comment;
EXTERN FILE *nation_n_nationkey;
EXTERN FILE *nation_n_name;
EXTERN FILE *nation_n_regionkey;
EXTERN FILE *nation_n_comment;
EXTERN FILE *region_r_regionkey;
EXTERN FILE *region_r_name;
EXTERN FILE *region_r_comment;
/* finished addition by olteanu for U-relations */
#ifndef DECLARER
extern tdef tdefs[];
#endif /* DECLARER */
/*
* defines global constants for WSD generation
*/
#define PLACEHOLDERS_INIT 1000000
#define PLACEHOLDERS_LIMIT 10000000
#define SWAP_SIZE 2239
#define SHUFFLE_ITE_RATIO 1
#define LOCAL_WORLDS_RATIO_INIT 0.25
#define PLACEHOLDERS_RATIO_INIT 0.01
#define ZIPF_COMP_RATIO_INIT 0.1
#define MAX_PLACEHOLDERS_INIT 8
#define ONE_WORLD 0
#define TUPLE_LEVEL 0
#define COUNT_ONLY 0
/*****************************************************************
** table level defines use the following naming convention: t_ccc_xxx
** with: t, a table identifier
** ccc, a column identifier
** xxx, a limit type
****************************************************************
*/
/*
* defines which control the parts table
*/
#define P_SIZE 126
#define P_NAME_SCL 5
#define P_MFG_TAG "Manufacturer#"
#define P_MFG_FMT "%%s%%0%d%s"
#define P_MFG_MIN 1
#define P_MFG_MAX 5
#define P_BRND_TAG "Brand#"
#define P_BRND_FMT "%%s%%0%d%s"
#define P_BRND_MIN 1
#define P_BRND_MAX 5
#define P_SIZE_MIN 1
#define P_SIZE_MAX 50
#define P_MCST_MIN 100
#define P_MCST_MAX 99900
#define P_MCST_SCL 100.0
#define P_RCST_MIN 90000
#define P_RCST_MAX 200000
#define P_RCST_SCL 100.0
/*
* defines which control the suppliers table
*/
#define S_SIZE 145
#define S_NAME_TAG "Supplier#"
#define S_NAME_FMT "%%s%%0%d%s"
#define S_ABAL_MIN -99999
#define S_ABAL_MAX 999999
#define S_CMNT_MAX 101
#define S_CMNT_BBB 10 /* number of BBB comments/SF */
#define BBB_DEADBEATS 50 /* % that are complaints */
#define BBB_BASE "Customer "
#define BBB_COMPLAIN "Complaints"
#define BBB_COMMEND "Recommends"
#define BBB_CMNT_LEN 19
#define BBB_BASE_LEN 9
#define BBB_TYPE_LEN 10
/*
* defines which control the partsupp table
*/
#define PS_SIZE 145
#define PS_SKEY_MIN 0
#define PS_SKEY_MAX ((tdefs[SUPP].base - 1) * scale)
#define PS_SCST_MIN 100
#define PS_SCST_MAX 100000
#define PS_QTY_MIN 1
#define PS_QTY_MAX 9999
/*
* defines which control the customers table
*/
#define C_SIZE 165
#define C_NAME_TAG "Customer#"
#define C_NAME_FMT "%%s%%0%d%s"
#define C_MSEG_MAX 5
#define C_ABAL_MIN -99999
#define C_ABAL_MAX 999999
/*
* defines which control the order table
*/
#define O_SIZE 109
#define O_CKEY_MIN 1
#define O_CKEY_MAX (tdefs[CUST].base * scale)
#define O_ODATE_MIN STARTDATE
#define O_ODATE_MAX (STARTDATE + TOTDATE - \
(L_SDTE_MAX + L_RDTE_MAX) - 1)
#define O_CLRK_TAG "Clerk#"
#define O_CLRK_FMT "%%s%%0%d%s"
#define O_CLRK_SCL 1000
#define O_LCNT_MIN 1
#define O_LCNT_MAX 7
/*
* defines which control the lineitem table
*/
#define L_SIZE 144L
#define L_QTY_MIN 1
#define L_QTY_MAX 50
#define L_TAX_MIN 0
#define L_TAX_MAX 8
#define L_DCNT_MIN 0
#define L_DCNT_MAX 10
#define L_PKEY_MIN 1
#define L_PKEY_MAX (tdefs[PART].base * scale)
#define L_SDTE_MIN 1
#define L_SDTE_MAX 121
#define L_CDTE_MIN 30
#define L_CDTE_MAX 90
#define L_RDTE_MIN 1
#define L_RDTE_MAX 30
/*
* defines which control the time table
*/
#define T_SIZE 30
#define T_START_DAY 3 /* wednesday ? */
#define LEAP(y) ((!(y % 4) && (y % 100))?1:0)
/*******************************************************************
*******************************************************************
***
*** general or inter table defines
***
*******************************************************************
*******************************************************************/
#define SUPP_PER_PART 4
#define ORDERS_PER_CUST 10 /* sync this with CUST_MORTALITY */
#define CUST_MORTALITY 3 /* portion with have no orders */
#define NATIONS_MAX 90 /* limited by country codes in phone numbers */
#define PHONE_FMT "%02d-%03d-%03d-%04d"
#define STARTDATE 92001
#define CURRENTDATE 95168
#define ENDDATE 98365
#define TOTDATE 2557
#define UPD_PCT 10
#define MAX_STREAM 56 /* increased by olteanu; 47 previously */
#define V_STR_LOW 0.4
#define PENNIES 100 /* for scaled int money arithmetic */
#define Q11_FRACTION (double)0.0001
/*
* max and min SF in GB; Larger SF will require changes to the build routines
*/
#define MIN_SCALE 1.0
#define MAX_SCALE 100000.0
/*
* beyond this point we need to allow for BCD calculations
*/
#define MAX_32B_SCALE 1000.0
#define LONG2HUGE(src, dst) *dst = (DSS_HUGE)src
#define HUGE2LONG(src, dst) *dst = (long)src
#define HUGE_SET(src, dst) *dst = *src
#define HUGE_MUL(op1, op2) *op1 *= op2
#define HUGE_DIV(op1, op2) *op1 /= op2
#define HUGE_ADD(op1, op2, dst) *dst = *op1 + op2
#define HUGE_SUB(op1, op2, dst) *dst = *op1 - op2
#define HUGE_MOD(op1, op2) *op1 % op2
#define HUGE_CMP(op1, op2) (*op1 == *op2)?0:(*op1 < *op2)-1:1
/******** environmental variables and defaults ***************/
#define DIST_TAG "DSS_DIST" /* environment var to override ... */
#define DIST_DFLT "dists.dss" /* default file to hold distributions */
#define PATH_TAG "DSS_PATH" /* environment var to override ... */
#define PATH_DFLT "." /* default directory to hold tables */
#define CONFIG_TAG "DSS_CONFIG" /* environment var to override ... */
#define CONFIG_DFLT "." /* default directory to config files */
#define ADHOC_TAG "DSS_ADHOC" /* environment var to override ... */
#define ADHOC_DFLT "adhoc.dss" /* default file name for adhoc vars */
/******* output macros ********/
#ifndef SEPARATOR
#define SEPARATOR '|' /* field spearator for generated flat files */
#endif
/* Data type flags for a single print routine */
#define DT_STR 0
#ifndef MVS
#define DT_VSTR DT_STR
#else
#define DT_VSTR 1
#endif /* MVS */
#define DT_INT 2
#define DT_HUGE 3
#define DT_KEY 4
#define DT_MONEY 5
#define DT_CHR 6
int dbg_print(int dt, FILE *tgt, void *data, int len, int eol);
#define PR_STR(f, str, len) dbg_print(DT_STR, f, (void *)str, len, 1)
#define PR_VSTR(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 1)
#define PR_VSTR_LAST(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 0)
#define PR_INT(f, str) dbg_print(DT_INT, f, (void *)str, 0, 1)
#define PR_HUGE(f, str) dbg_print(DT_HUGE, f, (void *)str, 0, 1)
#define PR_KEY(f, str) dbg_print(DT_KEY, f, (void *)str, 0, -1)
#define PR_MONEY(f, str) dbg_print(DT_MONEY, f, (void *)str, 0, 1)
#define PR_CHR(f, str) dbg_print(DT_CHR, f, (void *)str, 0, 1)
#define PR_STRT(fp) /* any line prep for a record goes here */
#define PR_END(fp) fprintf(fp, "\n") /* finish the record here */
#ifdef MDY_DATE
#define PR_DATE(tgt, yr, mn, dy) \
sprintf(tgt, "%02d-%02d-19%02d", mn, dy, yr)
#else
#define PR_DATE(tgt, yr, mn, dy) \
sprintf(tgt, "19%02d-%02d-%02d", yr, mn, dy)
#endif /* DATE_FORMAT */
/*
* verification macros
*/
#define VRF_STR(t, d) {char *xx = d; while (*xx) tdefs[t].vtotal += *xx++;}
#define VRF_INT(t,d) tdefs[t].vtotal += d
#define VRF_HUGE(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1))
/* assume float is a 64 bit quantity */
#define VRF_MONEY(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1))
#define VRF_CHR(t,d) tdefs[t].vtotal += d
#define VRF_STRT(t)
#define VRF_END(t)
/*********** distribuitons currently defined *************/
#define UNIFORM 0
/*
* seed indexes; used to separate the generation of individual columns
*/
#define P_MFG_SD 0
#define P_BRND_SD 1
#define P_TYPE_SD 2
#define P_SIZE_SD 3
#define P_CNTR_SD 4
#define P_RCST_SD 5
#define PS_QTY_SD 7
#define PS_SCST_SD 8
#define O_SUPP_SD 10
#define O_CLRK_SD 11
#define O_ODATE_SD 13
#define L_QTY_SD 14
#define L_DCNT_SD 15
#define L_TAX_SD 16
#define L_SHIP_SD 17
#define L_SMODE_SD 18
#define L_PKEY_SD 19
#define L_SKEY_SD 20
#define L_SDTE_SD 21
#define L_CDTE_SD 22
#define L_RDTE_SD 23
#define L_RFLG_SD 24
#define C_NTRG_SD 27
#define C_PHNE_SD 28
#define C_ABAL_SD 29
#define C_MSEG_SD 30
#define S_NTRG_SD 33
#define S_PHNE_SD 34
#define S_ABAL_SD 35
#define P_NAME_SD 37
#define O_PRIO_SD 38
#define HVAR_SD 39
#define O_CKEY_SD 40
#define N_CMNT_SD 41
#define R_CMNT_SD 42
#define O_LCNT_SD 43
#define BBB_JNK_SD 44
#define BBB_TYPE_SD 45
#define BBB_CMNT_SD 46
#define BBB_OFFSET_SD 47
#define WSD_PLACEHOLDERS 48
#define SUPP_PLACEHOLDERS 49
#define PART_PLACEHOLDERS 50
#define ORDERS_PLACEHOLDERS 51
#define LINE_PLACEHOLDERS 52
#define NATION_PLACEHOLDERS 53
#define REGION_PLACEHOLDERS 54
#define CUST_PLACEHOLDERS 55
#define PSUPP_PLACEHOLDERS 56
#endif /* DSS_H */

View File

@ -0,0 +1,100 @@
-- Sccsid: @(#)dss.ri 2.1.8.1
-- TPCD Benchmark Version 8.0
CONNECT TO TPCD;
--ALTER TABLE TPCD.REGION DROP PRIMARY KEY;
--ALTER TABLE TPCD.NATION DROP PRIMARY KEY;
--ALTER TABLE TPCD.PART DROP PRIMARY KEY;
--ALTER TABLE TPCD.SUPPLIER DROP PRIMARY KEY;
--ALTER TABLE TPCD.PARTSUPP DROP PRIMARY KEY;
--ALTER TABLE TPCD.ORDERS DROP PRIMARY KEY;
--ALTER TABLE TPCD.LINEITEM DROP PRIMARY KEY;
--ALTER TABLE TPCD.CUSTOMER DROP PRIMARY KEY;
-- For table REGION
ALTER TABLE TPCD.REGION
ADD PRIMARY KEY (R_REGIONKEY);
-- For table NATION
ALTER TABLE TPCD.NATION
ADD PRIMARY KEY (N_NATIONKEY);
ALTER TABLE TPCD.NATION
ADD FOREIGN KEY NATION_FK1 (N_REGIONKEY) references TPCD.REGION;
COMMIT WORK;
-- For table PART
ALTER TABLE TPCD.PART
ADD PRIMARY KEY (P_PARTKEY);
COMMIT WORK;
-- For table SUPPLIER
ALTER TABLE TPCD.SUPPLIER
ADD PRIMARY KEY (S_SUPPKEY);
ALTER TABLE TPCD.SUPPLIER
ADD FOREIGN KEY SUPPLIER_FK1 (S_NATIONKEY) references TPCD.NATION;
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE TPCD.PARTSUPP
ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);
COMMIT WORK;
-- For table CUSTOMER
ALTER TABLE TPCD.CUSTOMER
ADD PRIMARY KEY (C_CUSTKEY);
ALTER TABLE TPCD.CUSTOMER
ADD FOREIGN KEY CUSTOMER_FK1 (C_NATIONKEY) references TPCD.NATION;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE TPCD.LINEITEM
ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);
COMMIT WORK;
-- For table ORDERS
ALTER TABLE TPCD.ORDERS
ADD PRIMARY KEY (O_ORDERKEY);
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE TPCD.PARTSUPP
ADD FOREIGN KEY PARTSUPP_FK1 (PS_SUPPKEY) references TPCD.SUPPLIER;
COMMIT WORK;
ALTER TABLE TPCD.PARTSUPP
ADD FOREIGN KEY PARTSUPP_FK2 (PS_PARTKEY) references TPCD.PART;
COMMIT WORK;
-- For table ORDERS
ALTER TABLE TPCD.ORDERS
ADD FOREIGN KEY ORDERS_FK1 (O_CUSTKEY) references TPCD.CUSTOMER;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE TPCD.LINEITEM
ADD FOREIGN KEY LINEITEM_FK1 (L_ORDERKEY) references TPCD.ORDERS;
COMMIT WORK;
ALTER TABLE TPCD.LINEITEM
ADD FOREIGN KEY LINEITEM_FK2 (L_PARTKEY,L_SUPPKEY) references
TPCD.PARTSUPP;
COMMIT WORK;

View File

@ -0,0 +1,191 @@
/*
* $Id: dsstypes.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: dsstypes.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.2 2007/03/04 18:41:02 olteanu
* *** empty log message ***
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.3 2005/10/28 02:57:04 jms
* allow for larger names in customer table
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.3 2004/04/07 20:17:29 jms
* bug #58 (join fails between order/lineitem)
*
* Revision 1.2 2004/01/22 05:49:29 jms
* AIX porting (AIX 5.1)
*
* Revision 1.1.1.1 2003/08/07 17:58:34 jms
* recreation after CVS crash
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* general definitions and control information for the DSS data types
* and function prototypes
*/
/*
* typedefs
*/
typedef struct
{
DSS_HUGE custkey;
char name[C_NAME_LEN + 3];
char address[C_ADDR_MAX + 1];
int alen;
DSS_HUGE nation_code;
char phone[PHONE_LEN + 1];
DSS_HUGE acctbal;
char mktsegment[MAXAGG_LEN + 1];
char comment[C_CMNT_MAX + 1];
int clen;
} customer_t;
/* customers.c */
long mk_cust PROTO((DSS_HUGE n_cust, customer_t * c));
int pr_cust PROTO((customer_t * c, int mode));
int ld_cust PROTO((customer_t * c, int mode));
typedef struct
{
DSS_HUGE okey;
DSS_HUGE partkey;
DSS_HUGE suppkey;
DSS_HUGE lcnt;
DSS_HUGE quantity;
DSS_HUGE eprice;
DSS_HUGE discount;
DSS_HUGE tax;
char rflag[1];
char lstatus[1];
char cdate[DATE_LEN];
char sdate[DATE_LEN];
char rdate[DATE_LEN];
char shipinstruct[MAXAGG_LEN + 1];
char shipmode[MAXAGG_LEN + 1];
char comment[L_CMNT_MAX + 1];
int clen;
} line_t;
typedef struct
{
DSS_HUGE okey;
DSS_HUGE custkey;
char orderstatus;
DSS_HUGE totalprice;
char odate[DATE_LEN];
char opriority[MAXAGG_LEN + 1];
char clerk[O_CLRK_LEN + 1];
long spriority;
DSS_HUGE lines;
char comment[O_CMNT_MAX + 1];
int clen;
line_t l[O_LCNT_MAX];
} order_t;
/* order.c */
long mk_order PROTO((DSS_HUGE index, order_t * o, long upd_num));
int pr_order PROTO((order_t * o, int mode));
int ld_order PROTO((order_t * o, int mode));
void mk_sparse PROTO((DSS_HUGE index, DSS_HUGE *ok, long seq));
typedef struct
{
DSS_HUGE partkey;
DSS_HUGE suppkey;
DSS_HUGE qty;
DSS_HUGE scost;
char comment[PS_CMNT_MAX + 1];
int clen;
} partsupp_t;
typedef struct
{
DSS_HUGE partkey;
char name[P_NAME_LEN + 1];
int nlen;
char mfgr[P_MFG_LEN + 1];
char brand[P_BRND_LEN + 1];
char type[P_TYPE_LEN + 1];
int tlen;
DSS_HUGE size;
char container[P_CNTR_LEN + 1];
DSS_HUGE retailprice;
char comment[P_CMNT_MAX + 1];
int clen;
partsupp_t s[SUPP_PER_PART];
} part_t;
/* parts.c */
long mk_part PROTO((DSS_HUGE index, part_t * p));
int pr_part PROTO((part_t * part, int mode));
int ld_part PROTO((part_t * part, int mode));
typedef struct
{
DSS_HUGE suppkey;
char name[S_NAME_LEN + 1];
char address[S_ADDR_MAX + 1];
int alen;
DSS_HUGE nation_code;
char phone[PHONE_LEN + 1];
DSS_HUGE acctbal;
char comment[S_CMNT_MAX + 1];
int clen;
} supplier_t;
/* supplier.c */
long mk_supp PROTO((DSS_HUGE index, supplier_t * s));
int pr_supp PROTO((supplier_t * supp, int mode));
int ld_supp PROTO((supplier_t * supp, int mode));
typedef struct
{
DSS_HUGE timekey;
char alpha[DATE_LEN];
long year;
long month;
long week;
long day;
} dss_time_t;
/* time.c */
long mk_time PROTO((DSS_HUGE h, dss_time_t * t));
/*
* this assumes that N_CMNT_LEN >= R_CMNT_LEN
*/
typedef struct
{
DSS_HUGE code;
char *text;
long join;
char comment[N_CMNT_MAX + 1];
int clen;
} code_t;
/* code table */
int mk_nation PROTO((DSS_HUGE i, code_t * c));
int pr_nation PROTO((code_t * c, int mode));
int ld_nation PROTO((code_t * c, int mode));
int mk_region PROTO((DSS_HUGE i, code_t * c));
int pr_region PROTO((code_t * c, int mode));
int ld_region PROTO((code_t * c, int mode));

View File

@ -0,0 +1,287 @@
/*
* $Id: load_stub.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: load_stub.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*****************************************************************
* Title: load_stub.c
* Description:
* stub routines for:
* inline load of dss benchmark
* header creation for dss benchmark
*
*****************************************************************
*/
#include <stdio.h>
#include "config.h"
#include "dss.h"
#include "dsstypes.h"
int
close_direct(void)
{
/* any post load cleanup goes here */
return(0);
}
int
prep_direct(void)
{
/* any preload prep goes here */
return(0);
}
int
hd_cust (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the customer table\n");
return(0);
}
int
ld_cust (customer_t *cp, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined",
"for the customer table");
return(0);
}
int
hd_part (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the part table\n");
return(0);
}
int
ld_part (part_t *pp, int mode)
{
static int count = 0;
if (! count++)
printf("No load routine has been defined for the part table\n");
return(0);
}
int
ld_psupp (part_t *pp, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined for the",
"psupp table\n");
return(0);
}
int
hd_supp (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the supplier table\n");
return(0);
}
int
ld_supp (supplier_t *sp, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined",
"for the supplier table\n");
return(0);
}
int
hd_order (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the order table\n");
return(0);
}
int
ld_order (order_t *p, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined",
"for the order table");
return(0);
}
ld_line (order_t *p, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined",
"for the line table");
return(0);
}
int
hd_psupp (FILE *f)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No header has been defined for the",
"part supplier table");
return(0);
}
int
hd_line (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the lineitem table\n");
return(0);
}
int
hd_nation (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the nation table\n");
return(0);
}
int
ld_nation (code_t *cp, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined",
"for the nation table");
return(0);
}
int
hd_region (FILE *f)
{
static int count = 0;
if (! count++)
printf("No header has been defined for the region table\n");
return(0);
}
int
ld_region (code_t *cp, int mode)
{
static int count = 0;
if (! count++)
printf("%s %s\n",
"No load routine has been defined",
"for the region table");
return(0);
}
int
ld_order_line (order_t *p, int mode)
{
ld_order(p, mode);
ld_line (p, mode);
return(0);
}
int
hd_order_line (FILE *f)
{
hd_order(f);
hd_line (f);
return(0);
}
int
ld_part_psupp (part_t *p, int mode)
{
ld_part(p, mode);
ld_psupp (p, mode);
return(0);
}
int
hd_part_psupp (FILE *f)
{
hd_part(f);
hd_psupp(f);
return(0);
}

View File

@ -0,0 +1,177 @@
#
# $Id: makefile.suite,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
#
# Revision History
# ===================
# $Log: makefile.suite,v $
# Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
#
#
# Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
#
#
# Revision 1.16 2007/01/05 20:05:41 jms
# update release number
#
# Revision 1.15 2006/09/07 17:25:57 jms
# correct dss.ddl
#
# Revision 1.14 2006/08/01 17:21:22 jms
# fix bad merge
#
# Revision 1.13 2006/08/01 16:55:44 jms
# move to 2.4.1
#
# Revision 1.12 2006/06/29 20:46:17 jms
# 2.4.0 changes from Meikel
#
# Revision 1.10 2006/05/25 22:30:44 jms
# qgen porting for 32b/64b
#
# Revision 1.9 2006/04/26 23:17:09 jms
# checking release.h prior to release build
#
# Revision 1.8 2006/04/26 23:03:00 jms
# release 2.3.4-1
#
# Revision 1.7 2006/04/12 18:13:58 jms
# release 2.3.3
#
# Revision 1.6 2006/03/09 18:59:19 jms
# move to version 2.3.2
#
# Revision 1.5 2006/01/28 23:54:32 jms
# add reference data to release
#
# Revision 1.4 2005/10/28 03:00:32 jms
# fix release target
#
# Revision 1.3 2005/10/28 02:54:14 jms
# increment build count with each release creation
#
# Revision 1.2 2005/01/03 20:08:58 jms
# change line terminations
#
# Revision 1.1.1.1 2004/11/24 23:31:47 jms
# re-establish external server
#
# Revision 1.5 2004/03/26 20:39:23 jms
# add tpch tag to release files
#
# Revision 1.4 2004/03/16 14:45:57 jms
# correct release target in makefile
#
# Revision 1.3 2004/03/02 20:49:01 jms
# simplify distributions, add Windows IDE files
# releases should use make release from now on
#
# Revision 1.2 2004/02/18 14:05:53 jms
# porting changes for LINUX and 64 bit RNG
#
# Revision 1.1.1.1 2003/04/03 18:54:21 jms
# recreation after CVS crash
#
# Revision 1.1.1.1 2003/04/03 18:54:21 jms
# initial checkin
#
#
#
################
## CHANGE NAME OF ANSI COMPILER HERE
################
CC = /usr/bin/gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
# SQLSERVER, SYBASE
# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS,
# SGI, SUN, U2200, VMS, LINUX, WIN32
# Current values for WORKLOAD are: TPCH
DATABASE= DB2
MACHINE = LINUX
WORKLOAD = TPCH
#
# add -EDTERABYTE if orderkey will execeed 32 bits (SF >= 300)
# and make the appropriate change in gen_schema() of runit.sh
CFLAGS = -O -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD)
LDFLAGS = -g
# The OBJ,EXE and LIB macros will need to be changed for compilation under
# Windows NT
OBJ = .o
EXE =
LIBS = -lm
#
# NO CHANGES SHOULD BE NECESSARY BELOW THIS LINE
###############
VERSION=2
RELEASE=6
PATCH=0
BUILD=`grep BUILD release.h | cut -f3 -d' '`
NEW_BUILD=`expr ${BUILD} + 1`
TREE_ROOT=/tmp/tree
#
PROG1 = dbgen$(EXE)
PROG2 = qgen$(EXE)
PROGS = $(PROG1) $(PROG2)
#
HDR1 = dss.h rnd.h config.h dsstypes.h shared.h bcd2.h rng64.h release.h
HDR2 = tpcd.h permute.h
HDR = $(HDR1) $(HDR2)
#
SRC1 = build.c driver.c bm_utils.c rnd.c print.c load_stub.c bcd2.c \
speed_seed.c text.c permute.c rng64.c
SRC2 = qgen.c varsub.c
SRC = $(SRC1) $(SRC2)
#
OBJ1 = build$(OBJ) driver$(OBJ) bm_utils$(OBJ) rnd$(OBJ) print$(OBJ) \
load_stub$(OBJ) bcd2$(OBJ) speed_seed$(OBJ) text$(OBJ) permute$(OBJ) \
rng64$(OBJ)
OBJ2 = build$(OBJ) bm_utils$(OBJ) qgen$(OBJ) rnd$(OBJ) varsub$(OBJ) \
text$(OBJ) bcd2$(OBJ) permute$(OBJ) speed_seed$(OBJ) rng64$(OBJ)
OBJS = $(OBJ1) $(OBJ2)
#
SETS = dists.dss
DOC=README HISTORY PORTING.NOTES BUGS
DDL = dss.ddl dss.ri
WINDOWS_IDE = tpch.dsw dbgen.dsp
OTHER=makefile.suite $(SETS) $(DDL) $(WINDOWS_IDE)
# case is *important* in TEST_RES
TEST_RES = O.res L.res c.res s.res P.res S.res n.res r.res
#
DBGENSRC=$(SRC1) $(HDR1) $(OTHER) $(DOC) $(SRC2) $(HDR2) $(SRC3)
FQD=queries/1.sql queries/2.sql queries/3.sql queries/4.sql queries/5.sql queries/6.sql queries/7.sql \
queries/8.sql queries/9.sql queries/10.sql queries/11.sql queries/12.sql queries/13.sql \
queries/14.sql queries/15.sql queries/16.sql queries/17.sql queries/18.sql queries/19.sql queries/20.sql \
queries/21.sql queries/22.sql
VARIANTS= variants/8a.sql variants/12a.sql variants/13a.sql variants/14a.sql variants/15a.sql
ANS = answers/1.ans answers/2.ans answers/3.ans answers/4.ans answers/5.ans answers/6.ans answers/7.ans answers/8.ans \
answers/9.ans answers/10.ans answers/11.ans answers/12.ans answers/13.ans answers/14.ans answers/15.ans \
answers/16.ans answers/17.ans answers/18.ans answers/19.ans answers/20.ans answers/21.ans answers/22.ans
QSRC = $(FQD) $(VARIANTS) $(ANS)
TREE_DOC=tree.readme tree.changes appendix.readme appendix.version answers.readme queries.readme variants.readme
REFERENCE=reference/*
ALLSRC=$(DBGENSRC) $(QSRC) $(REFERENCE) update_release.sh
JUNK =
#
all: update_release $(PROGS)
$(PROG1): $(OBJ1) $(SETS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ1) $(LIBS)
$(PROG2): permute.h $(OBJ2)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJ2) $(LIBS)
clean:
rm -f $(PROGS) $(OBJS) $(JUNK)
lint:
lint $(CFLAGS) -u -x -wO -Ma -p $(SRC1)
lint $(CFLAGS) -u -x -wO -Ma -p $(SRC2)
tar: $(ALLSRC)
tar cvzhf tpch_`date '+%Y%m%d'`.tar.gz $(ALLSRC)
zip: $(ALLSRC)
zip tpch_`date '+%Y%m%d'`.zip $(ALLSRC)
release: update_release
make -f makefile.suite tar
make -f makefile.suite zip
( cd tests; sh test_list.sh `date '+%Y%m%d'` )
rnd$(OBJ): rnd.h
$(OBJ1): $(HDR1)
$(OBJ2): dss.h tpcd.h config.h rng64.h release.h
update_release:
update_release.sh ${VERSION} ${RELEASE} ${PATCH}

View File

@ -0,0 +1,208 @@
/*
* $Id: permute.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: permute.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.3 2007/01/04 21:29:21 jms
* Porting changes uncovered as part of move to VS2005. No impact on data set
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/08/07 17:58:34 jms
* recreation after CVS crash
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* permute.c -- a permutation generator for the query
* sequences in TPC-H and TPC-R
*/
#ifdef TEST
#define DECLARER
#endif
#include "config.h"
#include "dss.h"
#ifdef TEST
#include <stdlib.h>
#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */
#include <unistd.h>
#include <sys/wait.h>
#endif /* WIN32 */
#include <stdio.h> /* */
#include <limits.h>
#include <math.h>
#include <ctype.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#ifdef HP
#include <strings.h>
#endif
#if (defined(WIN32)&&!defined(_POSIX_))
#include <process.h>
#pragma warning(disable:4201)
#pragma warning(disable:4214)
#pragma warning(disable:4514)
#define WIN32_LEAN_AND_MEAN
#define NOATOM
#define NOGDICAPMASKS
#define NOMETAFILE
#define NOMINMAX
#define NOMSG
#define NOOPENFILE
#define NORASTEROPS
#define NOSCROLL
#define NOSOUND
#define NOSYSMETRICS
#define NOTEXTMETRIC
#define NOWH
#define NOCOMM
#define NOKANJI
#define NOMCX
#include <windows.h>
#pragma warning(default:4201)
#pragma warning(default:4214)
#endif
#endif
DSS_HUGE NextRand(DSS_HUGE seed);
long *permute(long *set, int cnt, long stream);
long *permute_dist(distribution *d, long stream);
long seed;
char *eol[2] = {" ", "},"};
extern seed_t Seed[];
#ifdef TEST
tdef tdefs = { NULL };
#endif
#define MAX_QUERY 22
#define ITERATIONS 1000
#define UNSET 0
long *
permute(long *a, int c, long s)
{
int i;
static DSS_HUGE source;
static long *set, temp;
if (a != (long *)NULL)
{
set = a;
for (i=0; i < c; i++)
*(a + i) = i;
for (i=0; i < c; i++)
{
RANDOM(source, 0L, (long)(c - 1), s);
temp = *(a + source);
*(a + source) = *(a + i) ;
*(a + i) = temp;
source = 0;
}
}
else
source += 1;
if (source >= c)
source -= c;
return(set + source);
}
long *
permute_dist(distribution *d, long stream)
{
static distribution *dist = NULL;
int i;
if (d != NULL)
{
if (d->permute == (long *)NULL)
{
d->permute = (long *)malloc(sizeof(long) * DIST_SIZE(d));
MALLOC_CHECK(d->permute);
for (i=0; i < DIST_SIZE(d); i++)
*(d->permute + i) = i;
}
dist = d;
return(permute(dist->permute, DIST_SIZE(dist), stream));
}
if (dist != NULL)
return(permute(NULL, DIST_SIZE(dist), stream));
else
INTERNAL_ERROR("Bad call to permute_dist");
return(NULL);
}
#ifdef TEST
main(int ac, char *av[])
{
long *sequence,
i,
j,
streams = UNSET,
*a;
char sep;
int index = 0;
set_seeds = 0;
sequence = (long *)malloc(MAX_QUERY * sizeof(long));
a = sequence;
for (i=0; i < MAX_QUERY; i++)
*(sequence + i) = i;
if (ac < 3)
goto usage;
Seed[0].value = (long)atoi(av[1]);
streams = atoi(av[2]);
if (Seed[0].value == UNSET || streams == UNSET)
goto usage;
index = 0;
printf("long permutation[%d][%d] = {\n", streams, MAX_QUERY);
for (j=0; j < streams; j++)
{
sep = '{';
printf("%s\n", eol[index]);
for (i=0; i < MAX_QUERY; i++)
{
printf("%c%2d", sep, *permute(a, MAX_QUERY, 0) + 1);
a = (long *)NULL;
sep = ',';
}
a = sequence;
index=1;
}
printf("}\n};\n");
return(0);
usage:
printf("Usage: %s <seed> <streams>\n",av[0]);
printf(" uses <seed> to start the generation of <streams> permutations of [1..%d]\n", MAX_QUERY);
return(-1);
}
#endif /* TEST */

View File

@ -0,0 +1,70 @@
/*
* $Id: permute.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: permute.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
long permutation[41][22] =
{
{14, 2, 9,20, 6,17,18, 8,21,13, 3,22,16, 4,11,15, 1,10,19, 5, 7,12},
{21, 3,18, 5,11, 7, 6,20,17,12,16,15,13,10, 2, 8,14,19, 9,22, 1, 4},
{ 6,17,14,16,19,10, 9, 2,15, 8, 5,22,12, 7,13,18, 1, 4,20, 3,11,21},
{ 8, 5, 4, 6,17, 7, 1,18,22,14, 9,10,15,11,20, 2,21,19,13,16,12, 3},
{ 5,21,14,19,15,17,12, 6, 4, 9, 8,16,11, 2,10,18, 1,13, 7,22, 3,20},
{21,15, 4, 6, 7,16,19,18,14,22,11,13, 3, 1, 2, 5, 8,20,12,17,10, 9},
{10, 3,15,13, 6, 8, 9, 7, 4,11,22,18,12, 1, 5,16, 2,14,19,20,17,21},
{18, 8,20,21, 2, 4,22,17, 1,11, 9,19, 3,13, 5, 7,10,16, 6,14,15,12},
{19, 1,15,17, 5, 8, 9,12,14, 7, 4, 3,20,16, 6,22,10,13, 2,21,18,11},
{ 8,13, 2,20,17, 3, 6,21,18,11,19,10,15, 4,22, 1, 7,12, 9,14, 5,16},
{ 6,15,18,17,12, 1, 7, 2,22,13,21,10,14, 9, 3,16,20,19,11, 4, 8, 5},
{15,14,18,17,10,20,16,11, 1, 8, 4,22, 5,12, 3, 9,21, 2,13, 6,19, 7},
{ 1, 7,16,17,18,22,12, 6, 8, 9,11, 4, 2, 5,20,21,13,10,19, 3,14,15},
{21,17, 7, 3, 1,10,12,22, 9,16, 6,11, 2, 4, 5,14, 8,20,13,18,15,19},
{ 2, 9, 5, 4,18, 1,20,15,16,17, 7,21,13,14,19, 8,22,11,10, 3,12, 6},
{16, 9,17, 8,14,11,10,12, 6,21, 7, 3,15, 5,22,20, 1,13,19, 2, 4,18},
{ 1, 3, 6, 5, 2,16,14,22,17,20, 4, 9,10,11,15, 8,12,19,18,13, 7,21},
{ 3,16, 5,11,21, 9, 2,15,10,18,17, 7, 8,19,14,13, 1, 4,22,20, 6,12},
{14, 4,13, 5,21,11, 8, 6, 3,17, 2,20, 1,19,10, 9,12,18,15, 7,22,16},
{ 4,12,22,14, 5,15,16, 2, 8,10,17, 9,21, 7, 3, 6,13,18,11,20,19, 1},
{16,15,14,13, 4,22,18,19, 7, 1,12,17, 5,10,20, 3, 9,21,11, 2, 6, 8},
{20,14,21,12,15,17, 4,19,13,10,11, 1,16, 5,18, 7, 8,22, 9, 6, 3, 2},
{16,14,13, 2,21,10,11, 4, 1,22,18,12,19, 5, 7, 8, 6, 3,15,20, 9,17},
{18,15, 9,14,12, 2, 8,11,22,21,16, 1, 6,17, 5,10,19, 4,20,13, 3, 7},
{ 7, 3,10,14,13,21,18, 6,20, 4, 9, 8,22,15, 2, 1, 5,12,19,17,11,16},
{18, 1,13, 7,16,10,14, 2,19, 5,21,11,22,15, 8,17,20, 3, 4,12, 6, 9},
{13, 2,22, 5,11,21,20,14, 7,10, 4, 9,19,18, 6, 3, 1, 8,15,12,17,16},
{14,17,21, 8, 2, 9, 6, 4, 5,13,22, 7,15, 3, 1,18,16,11,10,12,20,19},
{10,22, 1,12,13,18,21,20, 2,14,16, 7,15, 3, 4,17, 5,19, 6, 8, 9,11},
{10, 8, 9,18,12, 6, 1, 5,20,11,17,22,16, 3,13, 2,15,21,14,19, 7, 4},
{ 7,17,22, 5, 3,10,13,18, 9, 1,14,15,21,19,16,12, 8, 6,11,20, 4, 2},
{ 2, 9,21, 3, 4, 7, 1,11,16, 5,20,19,18, 8,17,13,10,12,15, 6,14,22},
{15,12, 8, 4,22,13,16,17,18, 3, 7, 5, 6, 1, 9,11,21,10,14,20,19, 2},
{15,16, 2,11,17, 7, 5,14,20, 4,21, 3,10, 9,12, 8,13, 6,18,19,22, 1},
{ 1,13,11, 3, 4,21, 6,14,15,22,18, 9, 7, 5,10,20,12,16,17, 8,19, 2},
{14,17,22,20, 8,16, 5,10, 1,13, 2,21,12, 9, 4,18, 3, 7, 6,19,15,11},
{ 9,17, 7, 4, 5,13,21,18,11, 3,22, 1, 6,16,20,14,15,10, 8, 2,12,19},
{13,14, 5,22,19,11, 9, 6,18,15, 8,10, 7, 4,17,16, 3, 1,12, 2,21,20},
{20, 5, 4,14,11, 1, 6,16, 8,22, 7, 3, 2,12,21,19,17,13,10,15,18, 9},
{ 3, 7,14,15, 6, 5,21,20,18,10, 4,16,19, 1,13, 9, 8,17,11,12,22, 2},
{13,15,17, 1,22,11, 3, 4, 7,20,14,21, 9, 8, 2,18,16, 6,10,12, 5,19}
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,494 @@
/*
* $Id: qgen.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: qgen.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.3 2005/10/28 02:54:35 jms
* add release.h changes
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* qgen.c -- routines to convert query templates to executable query
* text for TPC-H and TPC-R
*/
#define DECLARER
#include <stdio.h>
#include <string.h>
#if (defined(_POSIX_)||!defined(WIN32))
#include <unistd.h>
#else
#include "process.h"
#endif /* WIN32 */
#include <ctype.h>
#include <time.h>
#include "config.h"
#include "dss.h"
#include "tpcd.h"
#include "permute.h"
#include "release.h"
#define LINE_SIZE 512
/*
* Function Protoypes
*/
void varsub PROTO((int qnum, int vnum, int flags));
int strip_comments PROTO((char *line));
void usage PROTO((void));
int process_options PROTO((int cnt, char **args));
int setup PROTO((void));
void qsub PROTO((char *qtag, int flags));
extern char *optarg;
extern int optind;
char **mk_ascdate(void);
extern seed_t Seed[];
char **asc_date;
int snum = -1;
char *prog;
tdef tdefs = { NULL };
long rndm;
double flt_scale;
distribution q13a, q13b;
int qnum;
/*
* FUNCTION strip_comments(line)
*
* remove all comments from 'line'; recognizes both {} and -- comments
*/
int
strip_comments(char *line)
{
static int in_comment = 0;
char *cp1, *cp2;
cp1 = line;
while (1) /* traverse the entire string */
{
if (in_comment)
{
if ((cp2 = strchr(cp1, '}')) != NULL) /* comment ends */
{
strcpy(cp1, cp2 + 1);
in_comment = 0;
continue;
}
else
{
*cp1 = '\0';
break;
}
}
else /* not in_comment */
{
if ((cp2 = strchr(cp1, '-')) != NULL)
{
if (*(cp2 + 1) == '-') /* found a '--' comment */
{
*cp2 = '\0';
break;
}
}
if ((cp2 = strchr(cp1, '{')) != NULL) /* comment starts */
{
in_comment = 1;
*cp2 = ' ';
continue;
}
else break;
}
}
return(0);
}
/*
* FUNCTION qsub(char *qtag, int flags)
*
* based on the settings of flags, and the template file $QDIR/qtag.sql
* make the following substitutions to turn a query template into EQT
*
* String Converted to Based on
* ====== ============ ===========
* first line database <db_name>; -n from command line
* second line set explain on; -x from command line
* :<number> parameter <number>
* :k set number
* :o output to outpath/qnum.snum
* -o from command line, SET_OUTPUT
* :s stream number
* :b BEGIN WORK; -a from command line, START_TRAN
* :e COMMIT WORK; -a from command line, END_TRAN
* :q query number
* :n<number> sets rowcount to be returned
*/
void
qsub(char *qtag, int flags)
{
static char *line = NULL,
*qpath = NULL;
FILE *qfp;
char *cptr,
*mark,
*qroot = NULL;
qnum = atoi(qtag);
if (line == NULL)
{
line = malloc(BUFSIZ);
qpath = malloc(BUFSIZ);
MALLOC_CHECK(line);
MALLOC_CHECK(qpath);
}
qroot = env_config(QDIR_TAG, QDIR_DFLT);
sprintf(qpath, "%s%c%s.sql",
qroot, PATH_SEP, qtag);
qfp = fopen(qpath, "r");
OPEN_CHECK(qfp, qpath);
rowcnt = rowcnt_dflt[qnum];
varsub(qnum, 0, flags); /* set the variables */
if (flags & DFLT_NUM)
fprintf(ofp, SET_ROWCOUNT, rowcnt);
while (fgets(line, BUFSIZ, qfp) != NULL)
{
if (!(flags & COMMENT))
strip_comments(line);
mark = line;
while ((cptr = strchr(mark, VTAG)) != NULL)
{
*cptr = '\0';
cptr++;
fprintf(ofp,"%s", mark);
switch(*cptr)
{
case 'b':
case 'B':
if (!(flags & ANSI))
fprintf(ofp,"%s\n", START_TRAN);
cptr++;
break;
case 'c':
case 'C':
if (flags & DBASE)
fprintf(ofp, SET_DBASE, db_name);
cptr++;
break;
case 'e':
case 'E':
if (!(flags & ANSI))
fprintf(ofp,"%s\n", END_TRAN);
cptr++;
break;
case 'n':
case 'N':
if (!(flags & DFLT_NUM))
{
rowcnt=atoi(++cptr);
while (isdigit(*cptr) || *cptr == ' ') cptr++;
fprintf(ofp, SET_ROWCOUNT, rowcnt);
}
continue;
case 'o':
case 'O':
if (flags & OUTPUT)
fprintf(ofp,"%s '%s/%s.%d'", SET_OUTPUT, osuff,
qtag, (snum < 0)?0:snum);
cptr++;
break;
case 'q':
case 'Q':
fprintf(ofp,"%s", qtag);
cptr++;
break;
case 's':
case 'S':
fprintf(ofp,"%d", (snum < 0)?0:snum);
cptr++;
break;
case 'X':
case 'x':
if (flags & EXPLAIN)
fprintf(ofp, "%s\n", GEN_QUERY_PLAN);
cptr++;
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
varsub(qnum, atoi(cptr), flags & DFLT);
while (isdigit(*++cptr));
break;
default:
fprintf(stderr, "-- unknown flag '%c%c' ignored\n",
VTAG, *cptr);
cptr++;
break;
}
mark=cptr;
}
fprintf(ofp,"%s", mark);
}
fclose(qfp);
fflush(stdout);
return;
}
void
usage(void)
{
printf("%s Parameter Substitution (v. %d.%d.%d build %d)\n",
NAME, VERSION,RELEASE,
PATCH,BUILD);
printf("Copyright %s %s\n", TPC, C_DATES);
printf("USAGE: %s <options> [ queries ]\n", prog);
printf("Options:\n");
printf("\t-a\t\t-- use ANSI semantics.\n");
printf("\t-b <str>\t-- load distributions from <str>\n");
printf("\t-c\t\t-- retain comments found in template.\n");
printf("\t-d\t\t-- use default substitution values.\n");
printf("\t-h\t\t-- print this usage summary.\n");
printf("\t-i <str>\t-- use the contents of file <str> to begin a query.\n");
printf("\t-l <str>\t-- log parameters to <str>.\n");
printf("\t-n <str>\t-- connect to database <str>.\n");
printf("\t-N\t\t-- use default rowcounts and ignore :n directive.\n");
printf("\t-o <str>\t-- set the output file base path to <str>.\n");
printf("\t-p <n>\t\t-- use the query permutation for stream <n>\n");
printf("\t-r <n>\t\t-- seed the random number generator with <n>\n");
printf("\t-s <n>\t\t-- base substitutions on an SF of <n>\n");
printf("\t-v\t\t-- verbose.\n");
printf("\t-t <str>\t-- use the contents of file <str> to complete a query\n");
printf("\t-x\t\t-- enable SET EXPLAIN in each query.\n");
}
int
process_options(int cnt, char **args)
{
int flag;
while((flag = getopt(cnt, args, "ab:cdhi:n:Nl:o:p:r:s:t:vx")) != -1)
switch(flag)
{
case 'a': /* use ANSI semantics */
flags |= ANSI;
break;
case 'b': /* load distributions from named file */
d_path = (char *)malloc(strlen(optarg) + 1);
MALLOC_CHECK(d_path);
strcpy(d_path, optarg);
break;
case 'c': /* retain comments in EQT */
flags |= COMMENT;
break;
case 'd': /* use default substitution values */
flags |= DFLT;
break;
case 'h': /* just generate the usage summary */
usage();
exit(0);
break;
case 'i': /* set stream initialization file name */
ifile = malloc(strlen(optarg) + 1);
MALLOC_CHECK(ifile);
strcpy(ifile, optarg);
flags |= INIT;
break;
case 'l': /* log parameter usages */
lfile = malloc(strlen(optarg) + 1);
MALLOC_CHECK(lfile);
strcpy(lfile, optarg);
flags |= LOG;
break;
case 'N': /* use default rowcounts */
flags |= DFLT_NUM;
break;
case 'n': /* set database name */
db_name = malloc(strlen(optarg) + 1);
MALLOC_CHECK(db_name);
strcpy(db_name, optarg);
flags |= DBASE;
break;
case 'o': /* set the output path */
osuff = malloc(strlen(optarg) + 1);
MALLOC_CHECK(osuff);
strcpy(osuff, optarg);
flags |=OUTPUT;
break;
case 'p': /* permutation for a given stream */
snum = atoi(optarg);
break;
case 'r': /* set random number seed for parameter gen */
flags |= SEED;
rndm = atol(optarg);
break;
case 's': /* scale of data set to run against */
flt_scale = atof(optarg);
if (scale > MAX_SCALE)
fprintf(stderr, "%s %5.0f %s\n%s\n",
"WARNING: Support for scale factors >",
MAX_SCALE,
"GB is still in development.",
"Data set integrity is not guaranteed.\n");
break;
case 't': /* set termination file name */
tfile = malloc(strlen(optarg) + 1);
MALLOC_CHECK(tfile);
strcpy(tfile, optarg);
flags |= TERMINATE;
break;
case 'v': /* verbose */
flags |= VERBOSE;
break;
case 'x': /* set explain in the queries */
flags |= EXPLAIN;
break;
default:
printf("unknown option '%s' ignored\n", args[optind]);
usage();
exit(1);
break;
}
return(0);
}
int
setup(void)
{
asc_date = mk_ascdate();
read_dist(env_config(DIST_TAG, DIST_DFLT), "p_cntr", &p_cntr_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "colors", &colors);
read_dist(env_config(DIST_TAG, DIST_DFLT), "p_types", &p_types_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "nations", &nations);
read_dist(env_config(DIST_TAG, DIST_DFLT), "nations2", &nations2);
read_dist(env_config(DIST_TAG, DIST_DFLT), "regions", &regions);
read_dist(env_config(DIST_TAG, DIST_DFLT), "o_oprio",
&o_priority_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "instruct",
&l_instruct_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "smode", &l_smode_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "category",
&l_category_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "rflag", &l_rflag_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "msegmnt", &c_mseg_set);
read_dist(env_config(DIST_TAG, DIST_DFLT), "Q13a", &q13a);
read_dist(env_config(DIST_TAG, DIST_DFLT), "Q13b", &q13b);
return(0);
}
main(int ac, char **av)
{
int i;
FILE *ifp;
char line[LINE_SIZE];
prog = av[0];
flt_scale = (double)1.0;
flags = 0;
d_path = NULL;
process_options(ac, av);
if (flags & VERBOSE)
fprintf(ofp,
"-- TPC %s Parameter Substitution (Version %d.%d.%d build %d)\n",
NAME, VERSION, RELEASE, PATCH, BUILD);
setup();
if (!(flags & DFLT)) /* perturb the RNG */
{
if (!(flags & SEED))
rndm = (long)((unsigned)time(NULL) * DSS_PROC);
if (rndm < 0)
rndm += 2147483647;
Seed[0].value = rndm;
for (i=1; i <= QUERIES_PER_SET; i++)
{
Seed[0].value = NextRand(Seed[0].value);
Seed[i].value = Seed[0].value;
}
printf("-- using %ld as a seed to the RNG\n", rndm);
}
else
printf("-- using default substitutions\n");
if (flags & INIT) /* init stream with ifile */
{
ifp = fopen(ifile, "r");
OPEN_CHECK(ifp, ifile);
while (fgets(line, LINE_SIZE, ifp) != NULL)
fprintf(stdout, "%s", line);
}
if (snum >= 0)
if (optind < ac)
for (i=optind; i < ac; i++)
{
char qname[10];
sprintf(qname, "%d", SEQUENCE(snum, atoi(av[i])));
qsub(qname, flags);
}
else
for (i=1; i <= QUERIES_PER_SET; i++)
{
char qname[10];
sprintf(qname, "%d", SEQUENCE(snum, i));
qsub(qname, flags);
}
else
if (optind < ac)
for (i=optind; i < ac; i++)
qsub(av[i], flags);
else
for (i=1; i <= QUERIES_PER_SET; i++)
{
char qname[10];
sprintf(qname, "%d", i);
qsub(qname, flags);
}
if (flags & TERMINATE) /* terminate stream with tfile */
{
ifp = fopen(tfile, "r");
if (ifp == NULL)
OPEN_CHECK(ifp, tfile);
while (fgets(line, LINE_SIZE, ifp) != NULL)
fprintf(stdout, "%s", line);
}
return(0);
}

View File

@ -0,0 +1,5 @@
#ORIGINAL TPC-H distribution
#define VERSION 2
#define RELEASE 6
#define PATCH 0
#define BUILD 1

View File

@ -0,0 +1,238 @@
/*
* $Id: rnd.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: rnd.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.7 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.6 2005/10/25 17:26:38 jms
* check in integration between microsoft changes and baseline code
*
* Revision 1.5 2005/10/14 23:16:54 jms
* fix for answer set compliance
*
* Revision 1.4 2005/09/23 22:29:35 jms
* fix to assume 64b support in the 32b RNG calls. Should speed generation, and corrects a problem with FK between Customer and Orders
*
* Revision 1.3 2005/03/04 21:43:23 jms
* correct segfult in random()
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.7 2004/04/08 17:34:15 jms
* cleanup SOLARIS/SUN ifdefs; now all use SUN
*
* Revision 1.6 2004/03/26 20:22:56 jms
* correct Solaris header
*
* Revision 1.5 2004/03/02 20:50:50 jms
* MP/RAS porting changes
*
* Revision 1.4 2004/02/18 16:37:33 jms
* add int32_t for solaris
*
* Revision 1.3 2004/02/18 16:26:49 jms
* 32/64 bit changes for overflow handling needed additional changes when ported back to windows
*
* Revision 1.2 2004/02/18 16:17:32 jms
* add 32bit specific changes to UnifInt
*
* Revision 1.1.1.1 2003/08/08 21:50:34 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* RANDOM.C -- Implements Park & Miller's "Minimum Standard" RNG
*
* (Reference: CACM, Oct 1988, pp 1192-1201)
*
* NextRand: Computes next random integer
* UnifInt: Yields an long uniformly distributed between given bounds
* UnifReal: ields a real uniformly distributed between given bounds
* Exponential: Yields a real exponentially distributed with given mean
*
*/
#include "config.h"
#include <stdio.h>
#include <math.h>
#ifdef LINUX
#include <stdint.h>
#endif
#ifdef IBM
#include <inttypes.h>
#endif
#ifdef SUN
#include <inttypes.h>
#endif
#ifdef ATT
#include <sys/bitypes.h>
#endif
#ifdef WIN32
#define int32_t __int32
#endif
#include "dss.h"
#include "rnd.h"
char *env_config PROTO((char *tag, char *dflt));
void NthElement(DSS_HUGE, DSS_HUGE *);
void
dss_random(DSS_HUGE *tgt, DSS_HUGE lower, DSS_HUGE upper, long stream)
{
*tgt = UnifInt(lower, upper, stream);
Seed[stream].usage += 1;
return;
}
void
row_start(int t) \
{
int i;
for (i=0; i <= MAX_STREAM; i++)
Seed[i].usage = 0 ;
return;
}
void
row_stop(int t) \
{
int i;
/* need to allow for handling the master and detail together */
if (t == ORDER_LINE)
t = ORDER;
if (t == PART_PSUPP)
t = PART;
for (i=0; i <= MAX_STREAM; i++)
if ((Seed[i].table == t) || (Seed[i].table == tdefs[t].child))
{
if (set_seeds && (Seed[i].usage > Seed[i].boundary))
{
fprintf(stderr, "\nSEED CHANGE: seed[%d].usage = %d\n",
i, Seed[i].usage);
Seed[i].boundary = Seed[i].usage;
}
else
{
NthElement((Seed[i].boundary - Seed[i].usage), &Seed[i].value);
#ifdef RNG_TEST
Seed[i].nCalls += Seed[i].boundary - Seed[i].usage;
#endif
}
}
return;
}
void
dump_seeds(int tbl)
{
int i;
for (i=0; i <= MAX_STREAM; i++)
if (Seed[i].table == tbl)
#ifdef RNG_TEST
printf("%d(%ld):\t%ld\n", i, Seed[i].nCalls, Seed[i].value);
#else
printf("%d:\t%ld\n", i, Seed[i].value);
#endif
return;
}
/******************************************************************
NextRand: Computes next random integer
*******************************************************************/
/*
* long NextRand( long nSeed )
*/
DSS_HUGE
NextRand(DSS_HUGE nSeed)
/*
* nSeed is the previous random number; the returned value is the
* next random number. The routine generates all numbers in the
* range 1 .. nM-1.
*/
{
nSeed = (nSeed * 16807) % 2147483647;
return (nSeed);
}
/******************************************************************
UnifInt: Yields an long uniformly distributed between given bounds
*******************************************************************/
/*
* long UnifInt( long nLow, long nHigh, long nStream )
*/
DSS_HUGE
UnifInt(DSS_HUGE nLow, DSS_HUGE nHigh, long nStream)
/*
* Returns an integer uniformly distributed between nLow and nHigh,
* including * the endpoints. nStream is the random number stream.
* Stream 0 is used if nStream is not in the range 0..MAX_STREAM.
*/
{
double dRange;
DSS_HUGE nTemp,
nRange;
int32_t nLow32 = (int32_t)nLow,
nHigh32 = (int32_t)nHigh;
if (nStream < 0 || nStream > MAX_STREAM)
nStream = 0;
if ((nHigh == MAX_LONG) && (nLow == 0))
{
dRange = DOUBLE_CAST (nHigh32 - nLow32 + 1);
nRange = nHigh32 - nLow32 + 1;
}
else
{
dRange = DOUBLE_CAST (nHigh - nLow + 1);
nRange = nHigh - nLow + 1;
}
Seed[nStream].value = NextRand(Seed[nStream].value);
#ifdef RNG_TEST
Seed[nStream].nCalls += 1;
#endif
nTemp = (long) (((double) Seed[nStream].value / dM) * (dRange));
return (nLow + nTemp);
}

View File

@ -0,0 +1,126 @@
/*
* $Id: rnd.h,v 1.2 2007/03/21 04:05:15 olteanu Exp $
*
* Revision History
* ===================
* $Log: rnd.h,v $
* Revision 1.2 2007/03/21 04:05:15 olteanu
* *** empty log message ***
*
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.4 2006/08/01 04:13:17 jms
* fix parallel generation
*
* Revision 1.3 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/08/08 21:50:34 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* rnd.h -- header file for use withthe portable random number generator
* provided by Frank Stephens of Unisys
*/
/* function protypes */
DSS_HUGE NextRand PROTO((DSS_HUGE));
DSS_HUGE UnifInt PROTO((DSS_HUGE, DSS_HUGE, long));
static long nA = 16807; /* the multiplier */
static long nM = 2147483647;/* the modulus == 2^31 - 1 */
static long nQ = 127773; /* the quotient nM / nA */
static long nR = 2836; /* the remainder nM % nA */
double dM = 2147483647.0;
/*
* macros to control RNG and assure reproducible multi-stream
* runs without the need for seed files. Keep track of invocations of RNG
* and always round-up to a known per-row boundary.
*/
/*
* preferred solution, but not initializing correctly
*/
#define VSTR_MAX(len) (long)(len / 5 + (len % 5 == 0)?0:1 + 1)
seed_t Seed[MAX_STREAM + 1] =
{
{PART, 1, 0, 1}, /* P_MFG_SD 0 */
{PART, 46831694, 0, 1}, /* P_BRND_SD 1 */
{PART, 1841581359, 0, 1}, /* P_TYPE_SD 2 */
{PART, 1193163244, 0, 1}, /* P_SIZE_SD 3 */
{PART, 727633698, 0, 1}, /* P_CNTR_SD 4 */
{NONE, 933588178, 0, 1}, /* text pregeneration 5 */
{PART, 804159733, 0, 2}, /* P_CMNT_SD 6 */
{PSUPP, 1671059989, 0, SUPP_PER_PART}, /* PS_QTY_SD 7 */
{PSUPP, 1051288424, 0, SUPP_PER_PART}, /* PS_SCST_SD 8 */
{PSUPP, 1961692154, 0, SUPP_PER_PART * 2}, /* PS_CMNT_SD 9 */
{ORDER, 1227283347, 0, 1}, /* O_SUPP_SD 10 */
{ORDER, 1171034773, 0, 1}, /* O_CLRK_SD 11 */
{ORDER, 276090261, 0, 2}, /* O_CMNT_SD 12 */
{ORDER, 1066728069, 0, 1}, /* O_ODATE_SD 13 */
{LINE, 209208115, 0, O_LCNT_MAX}, /* L_QTY_SD 14 */
{LINE, 554590007, 0, O_LCNT_MAX}, /* L_DCNT_SD 15 */
{LINE, 721958466, 0, O_LCNT_MAX}, /* L_TAX_SD 16 */
{LINE, 1371272478, 0, O_LCNT_MAX}, /* L_SHIP_SD 17 */
{LINE, 675466456, 0, O_LCNT_MAX}, /* L_SMODE_SD 18 */
{LINE, 1808217256, 0, O_LCNT_MAX}, /* L_PKEY_SD 19 */
{LINE, 2095021727, 0, O_LCNT_MAX}, /* L_SKEY_SD 20 */
{LINE, 1769349045, 0, O_LCNT_MAX}, /* L_SDTE_SD 21 */
{LINE, 904914315, 0, O_LCNT_MAX}, /* L_CDTE_SD 22 */
{LINE, 373135028, 0, O_LCNT_MAX}, /* L_RDTE_SD 23 */
{LINE, 717419739, 0, O_LCNT_MAX}, /* L_RFLG_SD 24 */
{LINE, 1095462486, 0, O_LCNT_MAX * 2}, /* L_CMNT_SD 25 */
{CUST, 881155353, 0, 9}, /* C_ADDR_SD 26 */
{CUST, 1489529863, 0, 1}, /* C_NTRG_SD 27 */
{CUST, 1521138112, 0, 3}, /* C_PHNE_SD 28 */
{CUST, 298370230, 0, 1}, /* C_ABAL_SD 29 */
{CUST, 1140279430, 0, 1}, /* C_MSEG_SD 30 */
{CUST, 1335826707, 0, 2}, /* C_CMNT_SD 31 */
{SUPP, 706178559, 0, 9}, /* S_ADDR_SD 32 */
{SUPP, 110356601, 0, 1}, /* S_NTRG_SD 33 */
{SUPP, 884434366, 0, 3}, /* S_PHNE_SD 34 */
{SUPP, 962338209, 0, 1}, /* S_ABAL_SD 35 */
{SUPP, 1341315363, 0, 2}, /* S_CMNT_SD 36 */
{PART, 709314158, 0, 92}, /* P_NAME_SD 37 */
{ORDER, 591449447, 0, 1}, /* O_PRIO_SD 38 */
{LINE, 431918286, 0, 1}, /* HVAR_SD 39 */
{ORDER, 851767375, 0, 1}, /* O_CKEY_SD 40 */
{NATION, 606179079, 0, 2}, /* N_CMNT_SD 41 */
{REGION, 1500869201, 0, 2}, /* R_CMNT_SD 42 */
{ORDER, 1434868289, 0, 1}, /* O_LCNT_SD 43 */
{SUPP, 263032577, 0, 1}, /* BBB offset 44 */
{SUPP, 753643799, 0, 1}, /* BBB type 45 */
{SUPP, 202794285, 0, 1}, /* BBB comment 46 */
{SUPP, 715851524, 0, 1}, /* BBB junk 47 */
{SUPP, 715851524, 0, 1}, /* WSD_PLACEHOLDERS 48 */
{SUPP, 715851524, 0, 1}, /* SUPP_PLACEHOLDERS 49 */
{SUPP, 715851524, 0, 1}, /* PART_PLACEHOLDERS 50 */
{SUPP, 715851524, 0, 1}, /* ORDERS_PLACEHOLDERS 51 */
{SUPP, 715851524, 0, 1}, /* LINE_PLACEHOLDERS 52 */
{SUPP, 715851524, 0, 1}, /* NATION_PLACEHOLDERS 53 */
{SUPP, 715851524, 0, 1}, /* REGION_PLACEHOLDERS 54 */
{SUPP, 715851524, 0, 1}, /* CUST_PLACEHOLDERS 55 */
{SUPP, 715851524, 0, 1} /* PSUPP_PLACEHOLDERS 56 */
};

View File

@ -0,0 +1,140 @@
/*
* $Id: rng64.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* This software contains proprietary and confidential information of Gradient
* Systems Inc. By accepting transfer of this copy, Recipient agrees
* to retain this software in confidence, to prevent disclosure to others, and
* to make no use of this software other than that for which it was delivered.
* This is an unpublished copyright work Gradient Systems, Inc. Execpt as
* permitted by federal law, 17 USC 117, copying is strictly prohibited.
*
* Gradient Systems Inc. CONFIDENTIAL - (Gradient Systems Inc. Confidential
* when combined with the aggregated modules for this product)
* OBJECT CODE ONLY SOURCE MATERIALS
* (C) COPYRIGHT Gradient Systems Inc. 2003
*
* All Rights Reserved
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF GRADIENT SYSTEMS, INC.
* The copyright notice above does not evidence any
* actual or intended publication of such source code.
*
* Revision History
* ===================
* $Log: rng64.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.6 2006/04/26 23:20:05 jms
* Data type clenaup for qgen
*
* Revision 1.5 2006/03/08 21:25:27 jms
* change to RNG64 to address overflow/underflow issues
*
* Revision 1.4 2005/10/25 17:26:38 jms
* check in integration between microsoft changes and baseline code
*
* Revision 1.3 2005/03/04 19:48:39 jms
* Changes from Doug Johnson to address very large scale factors
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.2 2004/02/18 16:45:30 jms
* remove C++ style comments for AIX compiler
*
* Revision 1.1.1.1 2003/08/08 21:57:34 jms
* recreation after CVS crash
*
* Revision 1.1 2003/08/08 21:57:34 jms
* first integration of rng64 for o_custkey and l_partkey
*
*/
#include "config.h"
#include "dss.h"
#include <stdio.h>
#include <stdlib.h>
#include "rng64.h"
extern double dM;
extern seed_t Seed[];
void
dss_random64(DSS_HUGE *tgt, DSS_HUGE nLow, DSS_HUGE nHigh, long nStream)
{
DSS_HUGE nTemp;
if (nStream < 0 || nStream > MAX_STREAM)
nStream = 0;
if (nLow > nHigh)
{
nTemp = nLow;
nLow = nHigh;
nHigh = nTemp;
}
Seed[nStream].value = NextRand64(Seed[nStream].value);
nTemp = Seed[nStream].value;
if (nTemp < 0)
nTemp = -nTemp;
nTemp %= (nHigh - nLow + 1);
*tgt = nLow + nTemp;
Seed[nStream].usage += 1;
return;
}
DSS_HUGE
NextRand64(DSS_HUGE nSeed){
DSS_HUGE a = (unsigned DSS_HUGE) RNG_A;
DSS_HUGE c = (unsigned DSS_HUGE) RNG_C;
nSeed = (nSeed * a + c); /* implicitely truncated to 64bits */
return (nSeed);
}
DSS_HUGE AdvanceRand64( DSS_HUGE nSeed,
DSS_HUGE nCount) {
unsigned DSS_HUGE a = RNG_A ;
unsigned DSS_HUGE c = RNG_C ;
int nBit;
unsigned DSS_HUGE Apow=a, Dsum=c;
/* if nothing to do, do nothing ! */
if( nCount == 0 ) return nSeed;
/* Recursively compute X(n) = A * X(n-1) + C */
/* */
/* explicitely: */
/* X(n) = A^n * X(0) + { A^(n-1) + A^(n-2) + ... A + 1 } * C */
/* */
/* we write this as: */
/* X(n) = Apow(n) * X(0) + Dsum(n) * C */
/* */
/* we use the following relations: */
/* Apow(n) = A^(n%2)*Apow(n/2)*Apow(n/2) */
/* Dsum(n) = (n%2)*Apow(n/2)*Apow(n/2) + (Apow(n/2) + 1) * Dsum(n/2) */
/* */
/* first get the highest non-zero bit */
for( nBit = 0; (nCount >> nBit) != RNG_C ; nBit ++){}
/* go 1 bit at the time */
while( --nBit >= 0 ) {
Dsum *= (Apow + 1);
Apow = Apow * Apow;
if( ((nCount >> nBit) % 2) == 1 ) { /* odd value */
Dsum += Apow;
Apow *= a;
}
}
nSeed = nSeed * Apow + Dsum * c;
return nSeed;
}

View File

@ -0,0 +1,29 @@
/*
* $Id: rng64.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: rng64.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/08/08 21:57:34 jms
* recreation after CVS crash
*
* Revision 1.1 2003/08/08 21:57:34 jms
* first integration of rng64 for o_custkey and l_partkey
*
*
*/
DSS_HUGE AdvanceRand64( DSS_HUGE nSeed, DSS_HUGE nCount);
void dss_random64(DSS_HUGE *tgt, DSS_HUGE nLow, DSS_HUGE nHigh, long stream);
DSS_HUGE NextRand64(DSS_HUGE nSeed);

View File

@ -0,0 +1,75 @@
/*
* $Id: shared.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: shared.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
#define N_CMNT_LEN 72
#define N_CMNT_MAX 152
#define R_CMNT_LEN 72
#define R_CMNT_MAX 152
#define MONEY_SCL 0.01
#define V_STR_HGH 1.6
#define P_NAME_LEN 55
#define P_MFG_LEN 25
#define P_BRND_LEN 10
#define P_TYPE_LEN 25
#define P_CNTR_LEN 10
#define P_CMNT_LEN 14
#define P_CMNT_MAX 23
#define S_NAME_LEN 25
#define S_ADDR_LEN 25
#define S_ADDR_MAX 40
#define S_CMNT_LEN 63
#define S_CMNT_MAX 101
#define PS_CMNT_LEN 124
#define PS_CMNT_MAX 199
#define C_NAME_LEN 18
#define C_ADDR_LEN 25
#define C_ADDR_MAX 40
#define C_MSEG_LEN 10
#define C_CMNT_LEN 73
#define C_CMNT_MAX 117
#define O_OPRIO_LEN 15
#define O_CLRK_LEN 15
#define O_CMNT_LEN 49
#define O_CMNT_MAX 79
#define L_CMNT_LEN 27
#define L_CMNT_MAX 44
#define L_INST_LEN 25
#define L_SMODE_LEN 10
#define T_ALPHA_LEN 10
#define DATE_LEN 13 /* long enough to hold either date format */
#define NATION_LEN 25
#define REGION_LEN 25
#define PHONE_LEN 15
#define MAXAGG_LEN 20 /* max component length for a agg str */
#define P_CMNT_SD 6
#define PS_CMNT_SD 9
#define O_CMNT_SD 12
#define C_ADDR_SD 26
#define C_CMNT_SD 31
#define S_ADDR_SD 32
#define S_CMNT_SD 36
#define L_CMNT_SD 25

View File

@ -0,0 +1,260 @@
/*
* $Id: speed_seed.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: speed_seed.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.8 2006/08/01 04:13:17 jms
* fix parallel generation
*
* Revision 1.7 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.6 2006/05/16 16:26:51 jms
* remove calls to FAKE_V_STR
*
* Revision 1.5 2006/04/26 23:14:28 jms
* Declaraion cleanup of fakeVStr()
*
* Revision 1.4 2006/04/26 23:01:10 jms
* address update generation problems
*
* Revision 1.3 2005/10/25 17:26:38 jms
* check in integration between microsoft changes and baseline code
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.2 2004/01/22 03:54:12 jms
* 64 bit support changes for customer address
*
* Revision 1.1.1.1 2003/08/08 22:37:36 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 22:37:36 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "dss.h"
#include "rng64.h"
#include "dss.h"
/* _tal long RandSeed = "Random^SeedFromTimestamp" (void); */
#define ADVANCE_STREAM(stream_id, num_calls) \
advanceStream(stream_id, num_calls, 0)
#define ADVANCE_STREAM64(stream_id, num_calls) \
advanceStream(stream_id, num_calls, 1)
#define MAX_COLOR 92
long name_bits[MAX_COLOR / BITS_PER_LONG];
extern seed_t Seed[];
void fakeVStr(int nAvg, long nSeed, DSS_HUGE nCount);
void NthElement (DSS_HUGE N, DSS_HUGE *StartSeed);
void
advanceStream(int nStream, int nCalls, int bUse64Bit)
{
if (bUse64Bit)
Seed[nStream].value = AdvanceRand64(Seed[nStream].value, nCalls);
else
NthElement(nCalls, &Seed[nStream].value);
#ifdef RNG_TEST
Seed[nStream].nCalls += nCalls;
#endif
return;
}
/* WARNING! This routine assumes the existence of 64-bit */
/* integers. The notation used here- "HUGE" is *not* ANSI standard. */
/* Hopefully, you have this extension as well. If not, use whatever */
/* nonstandard trick you need to in order to get 64 bit integers. */
/* The book says that this will work if MAXINT for the type you choose */
/* is at least 2**46 - 1, so 64 bits is more than you *really* need */
static DSS_HUGE Multiplier = 16807; /* or whatever nonstandard */
static DSS_HUGE Modulus = 2147483647; /* trick you use to get 64 bit int */
/* Advances value of Seed after N applications of the random number generator
with multiplier Mult and given Modulus.
NthElement(Seed[],count);
Theory: We are using a generator of the form
X_n = [Mult * X_(n-1)] mod Modulus. It turns out that
X_n = [(Mult ** n) X_0] mod Modulus.
This can be computed using a divide-and-conquer technique, see
the code below.
In words, this means that if you want the value of the Seed after n
applications of the generator, you multiply the initial value of the
Seed by the "super multiplier" which is the basic multiplier raised
to the nth power, and then take mod Modulus.
*/
/* Nth Element of sequence starting with StartSeed */
void NthElement (DSS_HUGE N, DSS_HUGE *StartSeed)
{
DSS_HUGE Z;
DSS_HUGE Mult;
static int ln=-1;
int i;
if ((verbose > 0) && ++ln % 1000 == 0)
{
i = ln % LN_CNT;
fprintf(stderr, "%c\b", lnoise[i]);
}
Mult = Multiplier;
Z = (DSS_HUGE) *StartSeed;
while (N > 0 )
{
if (N % 2 != 0) /* testing for oddness, this seems portable */
Z = (Mult * Z) % Modulus;
N = N / 2; /* integer division, truncates */
Mult = (Mult * Mult) % Modulus;
}
*StartSeed = Z;
return;
}
/* updates Seed[column] using the a_rnd algorithm */
void
fake_a_rnd(int min, int max, int column)
{
DSS_HUGE len;
DSS_HUGE itcount;
RANDOM(len, min, max, column);
if (len % 5L == 0)
itcount = len/5;
else
itcount = len/5 + 1L;
NthElement(itcount, &Seed[column].usage);
#ifdef RNG_TEST
Seed[column].nCalls += itcount;
#endif
return;
}
long
sd_part(int child, DSS_HUGE skip_count)
{
int i;
for (i=P_MFG_SD; i<= P_CNTR_SD; i++)
ADVANCE_STREAM(i, skip_count);
ADVANCE_STREAM(P_CMNT_SD, skip_count * 2);
ADVANCE_STREAM(P_NAME_SD, skip_count * 92);
return(0L);
}
long
sd_line(int child, DSS_HUGE skip_count)
{
int i,j;
for (j=0; j < O_LCNT_MAX; j++)
{
for (i=L_QTY_SD; i<= L_RFLG_SD; i++)
if (scale >= 30000 && i == L_PKEY_SD)
ADVANCE_STREAM64(i, skip_count);
else
ADVANCE_STREAM(i, skip_count);
ADVANCE_STREAM(L_CMNT_SD, skip_count * 2);
}
/* need to special case this as the link between master and detail */
if (child == 1)
{
ADVANCE_STREAM(O_ODATE_SD, skip_count);
ADVANCE_STREAM(O_LCNT_SD, skip_count);
}
return(0L);
}
long
sd_order(int child, DSS_HUGE skip_count)
{
ADVANCE_STREAM(O_LCNT_SD, skip_count);
if (scale >= 30000)
ADVANCE_STREAM64(O_CKEY_SD, skip_count);
else
ADVANCE_STREAM(O_CKEY_SD, skip_count);
ADVANCE_STREAM(O_CMNT_SD, skip_count * 2);
ADVANCE_STREAM(O_SUPP_SD, skip_count);
ADVANCE_STREAM(O_CLRK_SD, skip_count);
ADVANCE_STREAM(O_PRIO_SD, skip_count);
ADVANCE_STREAM(O_ODATE_SD, skip_count);
return (0L);
}
long
sd_psupp(int child, DSS_HUGE skip_count)
{
int j;
for (j=0; j < SUPP_PER_PART; j++)
{
ADVANCE_STREAM(PS_QTY_SD, skip_count);
ADVANCE_STREAM(PS_SCST_SD, skip_count);
ADVANCE_STREAM(PS_CMNT_SD, skip_count * 2);
}
return(0L);
}
long
sd_cust(int child, DSS_HUGE skip_count)
{
ADVANCE_STREAM(C_ADDR_SD, skip_count * 9);
ADVANCE_STREAM(C_CMNT_SD, skip_count * 2);
ADVANCE_STREAM(C_NTRG_SD, skip_count);
ADVANCE_STREAM(C_PHNE_SD, 3L * skip_count);
ADVANCE_STREAM(C_ABAL_SD, skip_count);
ADVANCE_STREAM(C_MSEG_SD, skip_count);
return(0L);
}
long
sd_supp(int child, DSS_HUGE skip_count)
{
ADVANCE_STREAM(S_NTRG_SD, skip_count);
ADVANCE_STREAM(S_PHNE_SD, 3L * skip_count);
ADVANCE_STREAM(S_ABAL_SD, skip_count);
ADVANCE_STREAM(S_ADDR_SD, skip_count * 9);
ADVANCE_STREAM(S_CMNT_SD, skip_count * 2);
ADVANCE_STREAM(BBB_CMNT_SD, skip_count);
ADVANCE_STREAM(BBB_JNK_SD, skip_count);
ADVANCE_STREAM(BBB_OFFSET_SD, skip_count);
ADVANCE_STREAM(BBB_TYPE_SD, skip_count); /* avoid one trudge */
return(0L);
}

View File

@ -0,0 +1,392 @@
/*
* $Id: text.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: text.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.6 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.5 2006/05/18 23:50:00 jms
* commit text generation change with larger buffer
*
* Revision 1.4 2006/05/16 16:26:51 jms
* remove calls to FAKE_V_STR
*
* Revision 1.3 2006/05/16 15:55:58 jms
* first cut to Meikel
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/08/07 17:58:34 jms
* recreation after CVS crash
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* text.c --- pseaudo text generator for use in DBGEN 2.0
*
* Defined Routines:
* dbg_text() -- select and translate a sentance form
*/
#ifdef TEXT_TEST
#define DECLARER
#endif /* TEST */
#include "config.h"
#include <stdlib.h>
#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */
#include <unistd.h>
#include <sys/wait.h>
#endif /* WIN32 */
#include <stdio.h> /* */
#include <limits.h>
#include <math.h>
#include <ctype.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#ifdef HP
#include <strings.h>
#endif
#if (defined(WIN32)&&!defined(_POSIX_))
#include <process.h>
#pragma warning(disable:4201)
#pragma warning(disable:4214)
#pragma warning(disable:4514)
#define WIN32_LEAN_AND_MEAN
#define NOATOM
#define NOGDICAPMASKS
#define NOMETAFILE
#define NOMINMAX
#define NOMSG
#define NOOPENFILE
#define NORASTEROPS
#define NOSCROLL
#define NOSOUND
#define NOSYSMETRICS
#define NOTEXTMETRIC
#define NOWH
#define NOCOMM
#define NOKANJI
#define NOMCX
#include <windows.h>
#pragma warning(default:4201)
#pragma warning(default:4214)
#endif
#define TEXT_POOL_SIZE (300 * 1024 * 1024) /* 300MiB */
#include "dss.h"
#include "dsstypes.h"
/*
* txt_vp() --
* generate a verb phrase by
* 1) selecting a verb phrase form
* 2) parsing it to select parts of speech
* 3) selecting appropriate words
* 4) adding punctuation as required
*
* Returns: length of generated phrase
* Called By: txt_sentence()
* Calls: pick_str()
*/
static int
txt_vp(char *dest, int sd)
{
char syntax[MAX_GRAMMAR_LEN + 1],
*cptr,
*parse_target;
distribution *src;
int i,
res = 0;
pick_str(&vp, sd, &syntax[0]);
parse_target = syntax;
while ((cptr = strtok(parse_target, " ")) != NULL)
{
src = NULL;
switch(*cptr)
{
case 'D':
src = &adverbs;
break;
case 'V':
src = &verbs;
break;
case 'X':
src = &auxillaries;
break;
} /* end of POS switch statement */
i = pick_str(src, sd, dest);
i = strlen(DIST_MEMBER(src, i));
dest += i;
res += i;
if (*(++cptr)) /* miscelaneous fillagree, like punctuation */
{
dest += 1;
res += 1;
*dest = *cptr;
}
*dest = ' ';
dest++;
res++;
parse_target = NULL;
} /* end of while loop */
return(res);
}
/*
* txt_np() --
* generate a noun phrase by
* 1) selecting a noun phrase form
* 2) parsing it to select parts of speech
* 3) selecting appropriate words
* 4) adding punctuation as required
*
* Returns: length of generated phrase
* Called By: txt_sentence()
* Calls: pick_str(),
*/
static int
txt_np(char *dest, int sd)
{
char syntax[MAX_GRAMMAR_LEN + 1],
*cptr,
*parse_target;
distribution *src;
int i,
res = 0;
pick_str(&np, sd, &syntax[0]);
parse_target = syntax;
while ((cptr = strtok(parse_target, " ")) != NULL)
{
src = NULL;
switch(*cptr)
{
case 'A':
src = &articles;
break;
case 'J':
src = &adjectives;
break;
case 'D':
src = &adverbs;
break;
case 'N':
src = &nouns;
break;
} /* end of POS switch statement */
i = pick_str(src, sd, dest);
i = strlen(DIST_MEMBER(src, i));
dest += i;
res += i;
if (*(++cptr)) /* miscelaneous fillagree, like punctuation */
{
*dest = *cptr;
dest += 1;
res += 1;
}
*dest = ' ';
dest++;
res++;
parse_target = NULL;
} /* end of while loop */
return(res);
}
/*
* txt_sentence() --
* generate a sentence by
* 1) selecting a sentence form
* 2) parsing it to select parts of speech or phrase types
* 3) selecting appropriate words
* 4) adding punctuation as required
*
* Returns: length of generated sentence
* Called By: dbg_text()
* Calls: pick_str(), txt_np(), txt_vp()
*/
static int
txt_sentence(char *dest, int sd)
{
char syntax[MAX_GRAMMAR_LEN + 1],
*cptr;
int i,
res = 0,
len = 0;
pick_str(&grammar, sd, syntax);
cptr = syntax;
next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */
while (*cptr && *cptr == ' ')
cptr++;
if (*cptr == '\0')
goto done;
switch(*cptr)
{
case 'V':
len = txt_vp(dest, sd);
break;
case 'N':
len = txt_np(dest, sd);
break;
case 'P':
i = pick_str(&prepositions, sd, dest);
len = strlen(DIST_MEMBER(&prepositions, i));
strcpy((dest + len), " the ");
len += 5;
len += txt_np(dest + len, sd);
break;
case 'T':
i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */
len = strlen(DIST_MEMBER(&terminators, i));
break;
} /* end of POS switch statement */
dest += len;
res += len;
cptr++;
if (*cptr && *cptr != ' ') /* miscelaneous fillagree, like punctuation */
{
dest += 1;
res += 1;
*dest = *cptr;
}
goto next_token;
done:
*dest = '\0';
return(--res);
}
/*
* dbg_text() --
* produce ELIZA-like text of random, bounded length, truncating the last
* generated sentence as required
*/
void
dbg_text(char *tgt, int min, int max, int sd)
{
DSS_HUGE hgLength = 0,
hgOffset,
wordlen = 0,
s_len,
needed;
char sentence[MAX_SENT_LEN + 1],
*cp;
static char szTextPool[TEXT_POOL_SIZE + 1];
static int bInit = 0;
int nLifeNoise = 0;
if (!bInit)
{
cp = &szTextPool[0];
if (verbose)
fprintf(stderr, "\nPreloading text ... ");
while (wordlen < TEXT_POOL_SIZE)
{
if (verbose && (wordlen > nLifeNoise))
{
nLifeNoise += 200000;
fprintf(stderr, "%3.0f%%\b\b\b\b", (100.0 * wordlen)/TEXT_POOL_SIZE);
}
s_len = txt_sentence(sentence, 5);
if ( s_len < 0)
INTERNAL_ERROR("Bad sentence formation");
needed = TEXT_POOL_SIZE - wordlen;
if (needed >= (s_len + 1)) /* need the entire sentence */
{
strcpy(cp, sentence);
cp += s_len;
wordlen += s_len + 1;
*(cp++) = ' ';
}
else /* chop the new sentence off to match the length target */
{
sentence[needed] = '\0';
strcpy(cp, sentence);
wordlen += needed;
cp += needed;
}
}
*cp = '\0';
bInit = 1;
if (verbose)
fprintf(stderr, "\n");
}
RANDOM(hgOffset, 0, TEXT_POOL_SIZE - max, sd);
RANDOM(hgLength, min, max, sd);
strncpy(&tgt[0], &szTextPool[hgOffset], (int)hgLength);
tgt[hgLength] = '\0';
return;
}
#ifdef TEXT_TEST
tdef tdefs[1] = { NULL };
distribution nouns,
verbs,
adjectives,
adverbs,
auxillaries,
terminators,
articles,
prepositions,
grammar,
np,
vp;
main()
{
char prattle[401];
verbose = 1;
read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns);
read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs);
read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives);
read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs);
read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries);
read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators);
read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles);
read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions);
read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar);
read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np);
read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp);
while (1)
{
dbg_text(&prattle[0], 300, 400, 0);
printf("<%s>\n", prattle);
}
return(0);
}
#endif /* TEST */

View File

@ -0,0 +1,125 @@
/*
* $Id: tpcd.h,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: tpcd.h,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*****************************************************************
* Title: tpcd.h for TPC D
*****************************************************************
*/
#define DFLT 0x0001
#define OUTPUT 0x0002
#define EXPLAIN 0x0004
#define DBASE 0x0008
#define VERBOSE 0x0010
#define TIMING 0x0020
#define LOG 0x0040
#define QUERY 0x0080
#define REFRESH 0x0100
#define ANSI 0x0200
#define SEED 0x0400
#define COMMENT 0x0800
#define INIT 0x1000
#define TERMINATE 0x2000
#define DFLT_NUM 0x4000
/*
* general defines
*/
#define VTAG ':' /* flags a variable substitution */
#define ofp stdout /* make the routine a filter */
#define QDIR_TAG "DSS_QUERY" /* variable to point to queries */
#define QDIR_DFLT "." /* and its default */
/*
* database portability defines
*/
#ifdef DB2
#define GEN_QUERY_PLAN "SET CURRENT EXPLAIN SNAPSHOT ON;"
#define START_TRAN ""
#define END_TRAN "COMMIT WORK;"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "--#SET ROWS_FETCH %d\n"
#define SET_DBASE "CONNECT TO %s ;\n"
#endif
#ifdef INFORMIX
#define GEN_QUERY_PLAN "SET EXPLAIN ON;"
#define START_TRAN "BEGIN WORK;"
#define END_TRAN "COMMIT WORK;"
#define SET_OUTPUT "OUTPUT TO "
#define SET_ROWCOUNT "FIRST %d"
#define SET_DBASE "database %s ;\n"
#endif
#ifdef SQLSERVER
#define GEN_QUERY_PLAN "set showplan on\nset noexec on\ngo\n"
#define START_TRAN "begin transaction\ngo\n"
#define END_TRAN "commit transaction\ngo\n"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "set rowcount %d\ngo\n\n"
#define SET_DBASE "use %s\ngo\n"
#endif
#ifdef SYBASE
#define GEN_QUERY_PLAN "set showplan on\nset noexec on\ngo\n"
#define START_TRAN "begin transaction\ngo\n"
#define END_TRAN "commit transaction\ngo\n"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "set rowcount %d\ngo\n\n"
#define SET_DBASE "use %s\ngo\n"
#endif
#ifdef TDAT
#define GEN_QUERY_PLAN "EXPLAIN"
#define START_TRAN "BEGIN TRANSACTION"
#define END_TRAN "END TRANSACTION"
#define SET_OUTPUT ".SET FORMAT OFF\n.EXPORT REPORT file="
#define SET_ROWCOUNT ".SET RETCANCEL ON\n.SET RETLIMIT %d\n"
#define SET_DBASE ".LOGON %s\n"
#endif
#define MAX_VARS 8 /* max number of host vars in any query */
#define QLEN_MAX 2048 /* max length of any query */
#define QUERIES_PER_SET 22
#define MAX_PIDS 50
EXTERN int flags;
EXTERN int s_cnt;
EXTERN char *osuff;
EXTERN int stream;
EXTERN char *lfile;
EXTERN char *ifile;
EXTERN char *tfile;
#define MAX_PERMUTE 41
#ifdef DECLARER
int rowcnt_dflt[QUERIES_PER_SET + 1] =
{-1,-1,100,10,-1,-1,-1,-1,-1,-1,20,-1,-1,-1,-1,-1,-1,-1,100,-1,-1,100,-1};
int rowcnt;
#define SEQUENCE(stream, query) permutation[stream % MAX_PERMUTE][query - 1]
#else
extern int rowcnt_dflt[];
extern int rowcnt;
#endif

Binary file not shown.

View File

@ -0,0 +1,23 @@
#!/bin/sh
PATH=.:$PATH;export PATH
if [ -f UPDATE_RELEASE_NUMBER ]
then
cvs update
if [ ! -f release.h ]
then
BUILD=1
else
BUILD=`grep BUILD release.h |cut -f3 -d' '`
BUILD=`expr $BUILD + 1`
fi
cat > release.h << __EOF__
/*
* \$Id: update_release.sh,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*/
#define VERSION $1
#define RELEASE $2
#define PATCH $3
#define BUILD $BUILD
__EOF__
cvs commit -m "update release number" release.h
fi

View File

@ -0,0 +1,370 @@
/*
* $Id: varsub.c,v 1.1.1.1 2007/03/14 15:01:09 olteanu Exp $
*
* Revision History
* ===================
* $Log: varsub.c,v $
* Revision 1.1.1.1 2007/03/14 15:01:09 olteanu
*
*
* Revision 1.1.1.1 2007/03/01 18:11:56 olteanu
*
*
* Revision 1.7 2006/05/31 22:25:21 jms
* Rework UnifInt calls in varsub to handle lack of PROTO defn in windows
*
* Revision 1.6 2006/05/25 22:30:44 jms
* qgen porting for 32b/64b
*
* Revision 1.5 2006/05/25 16:08:52 jms
* Rework UnifInt call for query 3
*
* Revision 1.4 2006/04/26 23:20:05 jms
* Data type clenaup for qgen
*
* Revision 1.3 2005/11/03 14:50:44 jms
* solaris porting changes
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* recreation after CVS crash
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
#include <stdio.h>
#ifndef _POSIX_SOURCE
#include <malloc.h>
#endif /* POSIX_SOURCE */
#if (defined(_POSIX_)||!defined(WIN32))
#include <unistd.h>
#endif /* WIN32 */
#include <string.h>
#include "config.h"
#include "dss.h"
#include "tpcd.h"
#ifdef ADHOC
#include "adhoc.h"
extern adhoc_t adhocs[];
#endif /* ADHOC */
#define MAX_PARAM 10 /* maximum number of parameter substitutions in a query */
extern long Seed[];
extern char **asc_date;
extern double flt_scale;
extern distribution q13a, q13b;
long *permute(long *set, int cnt, long stream);
long brands[25] = {11,12,13,14,15,21,22,23,24,25,31,32,33,34,35,
41,42,43,44,45,51,52,53,54,55};
long sizes[50] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
41,42,43,44,45,46,47,48,49,50};
long ccode[25] = {10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34};
char *defaults[24][11] =
{
{"90", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 1 */
{"15", "BRASS", "EUROPE",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 2 */
{"BUILDING", "1995-03-15", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 3 */
{"1993-07-01", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 4 */
{"ASIA", "1994-01-01", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 5 */
{"1994-01-01", ".06", "24",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 6 */
{"FRANCE", "GERMANY", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 7 */
{"BRAZIL", "AMERICA", "ECONOMY ANODIZED STEEL",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL},/* 8 */
{"green", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 9 */
{"1993-10-01", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 10 */
{"GERMANY", "0.0001", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 11 */
{"MAIL", "SHIP", "1994-01-01",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 12 */
{"special", "requests", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 13 */
{"1995-09-01", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 14 */
{"1996-01-01", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 15 */
{"Brand#45", "MEDIUM POLISHED", "49",
"14","23","45","19","3","36","9", NULL}, /* 16 */
{"Brand#23", "MED BOX", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 17 */
{"300", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 18 */
{"Brand#12", "Brand#23", "Brand#34", "1", "10", "20", NULL, NULL, NULL, NULL, NULL}, /* 19 */
{"forest", "1994-01-01", "CANADA", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 20 */
{"SAUDI ARABIA", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* 21 */
{"13","31","23", "29", "30", "18", "17", NULL, NULL, NULL, NULL}, /* 22 */
{NULL,NULL,NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* UF1 */
{NULL,NULL,NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, /* UF2 */
};
void
varsub(int qnum, int vnum, int flags)
{
static char param[11][128];
static char formats[23][128];
static FILE *lfp = NULL;
static int bInit = 0;
long *lptr;
char *ptr;
int i = 0;
DSS_HUGE tmp_date, tmp1, tmp2;
if (!bInit)
{
sprintf(formats[4], "19%s-%s-01", HUGE_DATE_FORMAT, HUGE_DATE_FORMAT);
sprintf(formats[5], "19%s-01-01", HUGE_DATE_FORMAT);
sprintf(formats[6], "19%s-01-01", HUGE_DATE_FORMAT);
sprintf(formats[7], "0.%s", HUGE_DATE_FORMAT); /* used by q6 */
sprintf(formats[10], "19%s-%s-01", HUGE_DATE_FORMAT, HUGE_DATE_FORMAT);
sprintf(formats[12], "19%s-01-01", HUGE_DATE_FORMAT);
sprintf(formats[14], "19%s-01-01", HUGE_DATE_FORMAT);
sprintf(formats[15], "19%s-01-01", HUGE_DATE_FORMAT);
sprintf(formats[16], "Brand#%s%s", HUGE_FORMAT, HUGE_FORMAT);
sprintf(formats[17], "Brand#%s%s", HUGE_FORMAT, HUGE_FORMAT);
sprintf(formats[19], "Brand#%s%s", HUGE_FORMAT, HUGE_FORMAT);
sprintf(formats[20], "19%s-01-01", HUGE_DATE_FORMAT);
bInit = 1;
}
if (vnum == 0)
{
if ((flags & DFLT) == 0)
{
switch(qnum)
{
case 1:
sprintf(param[1], HUGE_FORMAT, UnifInt((DSS_HUGE)60,(DSS_HUGE)120,qnum));
param[2][0] = '\0';
break;
case 2:
sprintf(param[1], HUGE_FORMAT,
UnifInt((DSS_HUGE)P_SIZE_MIN, (DSS_HUGE)P_SIZE_MAX, qnum));
pick_str(&p_types_set, qnum, param[3]);
ptr = param[3] + strlen(param[3]);
while (*(ptr - 1) != ' ') ptr--;
strcpy(param[2], ptr);
pick_str(&regions, qnum, param[3]);
param[4][0] = '\0';
break;
case 3:
pick_str(&c_mseg_set, qnum, param[1]);
/*
* pick a random offset within the month of march and add the
* appropriate magic numbers to position the output functions
* at the start of March '95
*/
RANDOM(tmp_date, 0, 30, qnum);
strcpy(param[2], *(asc_date + tmp_date + 1155));
param[3][0] = '\0';
break;
case 4:
tmp_date = UnifInt((DSS_HUGE)1,(DSS_HUGE)58,qnum);
sprintf(param[1],formats[4],
93 + tmp_date/12, tmp_date%12 + 1);
param[2][0] = '\0';
break;
case 5:
pick_str(&regions, qnum, param[1]);
tmp_date = UnifInt((DSS_HUGE)93, (DSS_HUGE)97,qnum);
sprintf(param[2], formats[5], tmp_date);
param[3][0] = '\0';
break;
case 6:
tmp_date = UnifInt((DSS_HUGE)93,(DSS_HUGE)97,qnum);
sprintf(param[1], formats[6], tmp_date);
sprintf(param[2], formats[7],
UnifInt((DSS_HUGE)2, (DSS_HUGE)9, qnum));
sprintf(param[3], HUGE_FORMAT, UnifInt((DSS_HUGE)24, (DSS_HUGE)25, qnum));
param[4][0] = '\0';
break;
case 7:
tmp_date = pick_str(&nations2, qnum, param[1]);
while (pick_str(&nations2, qnum, param[2]) == tmp_date);
param[3][0] = '\0';
break;
case 8:
tmp_date = pick_str(&nations2, qnum, param[1]);
tmp_date = nations.list[tmp_date].weight;
strcpy(param[2], regions.list[tmp_date].text);
pick_str(&p_types_set, qnum, param[3]);
param[4][0] = '\0';
break;
case 9:
pick_str(&colors, qnum, param[1]);
param[2][0] = '\0';
break;
case 10:
tmp_date = UnifInt((DSS_HUGE)1,(DSS_HUGE)24,qnum);
sprintf(param[1],formats[10],
93 + tmp_date/12, tmp_date%12 + 1);
param[2][0] = '\0';
break;
case 11:
pick_str(&nations2, qnum, param[1]);
sprintf(param[2], "%11.10f", Q11_FRACTION / flt_scale );
param[3][0] = '\0';
break;
case 12:
tmp_date = pick_str(&l_smode_set, qnum, param[1]);
while (tmp_date == pick_str(&l_smode_set, qnum, param[2]));
tmp_date = UnifInt((DSS_HUGE)93,(DSS_HUGE)97,qnum);
sprintf(param[3], formats[12], tmp_date);
param[4][0] = '\0';
break;
case 13:
pick_str(&q13a, qnum, param[1]);
pick_str(&q13b, qnum, param[2]);
param[3][0] = '\0';
break;
case 14:
tmp_date = UnifInt((DSS_HUGE)1,(DSS_HUGE)60,qnum);
sprintf(param[1],formats[14],
93 + tmp_date/12, tmp_date%12 + 1);
param[2][0] = '\0';
break;
case 15:
tmp_date = UnifInt((DSS_HUGE)1,(DSS_HUGE)58,qnum);
sprintf(param[1],formats[15],
93 + tmp_date/12, tmp_date%12 + 1);
param[2][0] = '\0';
break;
case 16:
tmp1 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
tmp2 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
sprintf(param[1], formats[16], tmp1, tmp2);
pick_str(&p_types_set, qnum, param[2]);
ptr = param[2] + strlen(param[2]);
while (*(--ptr) != ' ');
*ptr = '\0';
lptr = &sizes[0];
for (i=3; i <= MAX_PARAM; i++)
{
sprintf(param[i], "%ld", *permute(lptr,50,qnum) + 1);
lptr = (long *)NULL;
}
break;
case 17:
tmp1 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
tmp2 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
sprintf(param[1], formats[17], tmp1, tmp2);
pick_str(&p_cntr_set, qnum, param[2]);
param[3][0] = '\0';
break;
case 18:
sprintf(param[1], HUGE_FORMAT, UnifInt((DSS_HUGE)312, (DSS_HUGE)315, qnum));
param[2][0] = '\0';
break;
case 19:
tmp1 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
tmp2 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
sprintf(param[1], formats[19], tmp1, tmp2);
tmp1 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
tmp2 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
sprintf(param[2], formats[19], tmp1, tmp2);
tmp1 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
tmp2 = UnifInt((DSS_HUGE)1, (DSS_HUGE)5, qnum);
sprintf(param[3], formats[19], tmp1, tmp2);
sprintf(param[4], HUGE_FORMAT, UnifInt((DSS_HUGE)1, (DSS_HUGE)10, qnum));
sprintf(param[5], HUGE_FORMAT, UnifInt((DSS_HUGE)10, (DSS_HUGE)20, qnum));
sprintf(param[6], HUGE_FORMAT, UnifInt((DSS_HUGE)20, (DSS_HUGE)30, qnum));
param[7][0] = '\0';
break;
case 20:
pick_str(&colors, qnum, param[1]);
tmp_date = UnifInt((DSS_HUGE)93,(DSS_HUGE)97,qnum);
sprintf(param[2], formats[20], tmp_date);
pick_str(&nations2, qnum, param[3]);
param[4][0] = '\0';
break;
case 21:
pick_str(&nations2, qnum, param[1]);
param[2][0] = '\0';
break;
case 22:
lptr = &ccode[0];
for (i=0; i <= 7; i++)
{
sprintf(param[i+1], "%ld", 10 + *permute(lptr,25, qnum));
lptr = (long *)NULL;
}
param[8][0] = '\0';
break;
case 23:
case 24:
break;
default:
fprintf(stderr,
"No variable definitions available for query %d\n",
qnum);
return;
}
}
if (flags & LOG)
{
if (lfp == NULL)
{
lfp = fopen(lfile, "a");
OPEN_CHECK(lfp, lfile);
}
fprintf(lfp, "%d", qnum);
for (i=1; i <= 10; i++)
if (flags & DFLT)
{
if (defaults[qnum - 1][i - 1] == NULL)
break;
else
fprintf(lfp, "\t%s", defaults[qnum - 1][i - 1]);
}
else
{
if (param[i][0] == '\0')
break;
else
fprintf(lfp, "\t%s", param[i]);
}
fprintf(lfp, "\n");
}
}
else
{
if (flags & DFLT)
{
/* to allow -d to work at all scale factors */
if (qnum == 11 && vnum == 2)
fprintf(ofp, "%11.10f", Q11_FRACTION/flt_scale);
else
if (defaults[qnum - 1][vnum - 1])
fprintf(ofp, "%s", defaults[qnum - 1][vnum - 1]);
else
fprintf(stderr,
"Bad default request (q: %d, p: %d)\n",
qnum, vnum);
}
else
{
if (param[vnum] && vnum <= MAX_PARAM)
fprintf(ofp, "%s", param[vnum]);
else
fprintf(stderr, "Bad parameter request (q: %d, p: %d)\n",
qnum, vnum);
}
}
return;
}

576
MayBMS-tpch/urel-ULDB.sql Normal file
View File

@ -0,0 +1,576 @@
--set search_path to public,data_s0_01_x0_001_z0_1_p0_25;
------------------------------------------
-- setup trio metadata tables
------------------------------------------
CREATE TABLE trio_tables(tablename name,"type" integer,confbase boolean,istemporary boolean)
WITHOUT OIDS;
CREATE INDEX trio_tables_idx ON trio_tables USING btree (tablename, "type", confbase);
CREATE TABLE trio_tables_lin(tablename name,srctable name)
WITHOUT OIDS;
CREATE INDEX trio_tables_lin_idx ON trio_tables_lin USING btree (tablename, srctable);
CREATE TABLE trio_attribs(tablename name,attrname name,colnum integer,iscertain boolean)
WITHOUT OIDS;
CREATE INDEX trio_attribs_idx ON trio_attribs USING btree (tablename, attrname, colnum);
------------------------------------------------------------------------------------
-- create table wbase holding the pairs cid, lwid as a TRIO base relation
------------------------------------------------------------------------------------
insert into trio_tables values('wbase',0,'t','f');
insert into trio_attribs values('wbase','trio_xid',0,'f');
create table trio_c_wbase(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_wbase(trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_wbase(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
insert into trio_c_wbase
select cid as xid, count(*) as trio_alts, count(*) as trio_enc
from w group by cid;
insert into trio_u_wbase
select cid as xid, aid
from w;
create or replace view wbase as
select c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_wbase u, trio_c_wbase c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_wbase_idx
ON trio_lin_wbase
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_wbase_xid_idx
ON trio_c_wbase
USING btree
(trio_xid);
CREATE INDEX trio_u_wbase_xid_idx
ON trio_u_wbase
USING btree
(trio_xid);
------------------------------------------
-- nation
------------------------------------------
insert into trio_tables values('tnation',0,'f','f');
insert into trio_attribs values('tnation','n_nationkey',0,'f');
insert into trio_attribs values('tnation','n_name',1,'f');
insert into trio_attribs values('tnation','n_regionkey',2,'f');
insert into trio_attribs values('tnation','n_comment',3,'f');
insert into trio_tables_lin values('tnation','wbase');
create table trio_c_tnation(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tnation(n_nationkey integer,n_name character(25),n_regionkey integer,n_comment character varying(152),trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tnation(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tnation
insert into trio_c_tnation
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from nation group by tid;
insert into trio_u_tnation
select n_nationkey, n_name, n_regionkey, n_comment, tid as trio_xid, oid as trio_aid
from nation;
insert into trio_lin_tnation
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from nation, w
where nation.c1 = w.cid and nation.w1 = w.lwid or
nation.c2 = w.cid and nation.w2 = w.lwid or
nation.c3 = w.cid and nation.w3 = w.lwid or
nation.c4 = w.cid and nation.w4 = w.lwid;
create or replace view tnation as
select u.n_nationkey,u.n_name,u.n_regionkey,u.n_comment, c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tnation u, trio_c_tnation c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tnation_idx
ON trio_lin_tnation
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tnation_xid_idx
ON trio_c_tnation
USING btree
(trio_xid);
CREATE INDEX trio_u_tnation_xid_idx
ON trio_u_tnation
USING btree
(trio_xid);
------------------------------------------
-- region
------------------------------------------
insert into trio_tables values('tregion',0,'f','f');
insert into trio_attribs values('tregion','r_regionkey',0,'f');
insert into trio_attribs values('tregion','r_name',1,'f');
insert into trio_attribs values('tregion','r_comment',2,'f');
insert into trio_tables_lin values('tregion','wbase');
create table trio_c_tregion(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tregion(r_regionkey integer,r_name character(25),r_comment character varying(152),trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tregion(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tregion
insert into trio_c_tregion
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from region group by tid;
insert into trio_u_tregion
select r_regionkey, r_name, r_comment, tid as trio_xid, oid as trio_aid
from region;
insert into trio_lin_tregion
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from region, w
where region.c1 = w.cid and region.w1 = w.lwid or
region.c2 = w.cid and region.w2 = w.lwid or
region.c3 = w.cid and region.w3 = w.lwid;
create or replace view tregion as
select u.r_regionkey,u.r_name,u.r_comment, c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tregion u, trio_c_tregion c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tregion_idx
ON trio_lin_tregion
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tregion_xid_idx
ON trio_c_tregion
USING btree
(trio_xid);
CREATE INDEX trio_u_tregion_xid_idx
ON trio_u_tregion
USING btree
(trio_xid);
------------------------------------------
-- part
------------------------------------------
insert into trio_tables values('tpart',0,'f','f');
insert into trio_attribs values('tpart','p_partkey',0,'f');
insert into trio_attribs values('tpart','p_name',1,'f');
insert into trio_attribs values('tpart','p_mfgr',2,'f');
insert into trio_attribs values('tpart','p_brand',3,'f');
insert into trio_attribs values('tpart','p_type',4,'f');
insert into trio_attribs values('tpart','p_size',5,'f');
insert into trio_attribs values('tpart','p_container',6,'f');
insert into trio_attribs values('tpart','p_retailprice',7,'f');
insert into trio_attribs values('tpart','p_comment',8,'f');
insert into trio_tables_lin values('tpart','wbase');
create table trio_c_tpart(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tpart(p_partkey integer,p_name character varying(55),p_mfgr character(25),p_brand character(10),p_type character varying(25),
p_size integer,p_container character(10),p_retailprice numeric(15,2),p_comment character varying(23),trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tpart(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tpart
insert into trio_c_tpart
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from part group by tid;
insert into trio_u_tpart
select p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, tid as trio_xid, oid as trio_aid
from part;
insert into trio_lin_tpart
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from part, w
where part.c1 = w.cid and part.w1 = w.lwid or
part.c2 = w.cid and part.w2 = w.lwid or
part.c3 = w.cid and part.w3 = w.lwid or
part.c4 = w.cid and part.w4 = w.lwid or
part.c5 = w.cid and part.w5 = w.lwid or
part.c6 = w.cid and part.w6 = w.lwid or
part.c7 = w.cid and part.w7 = w.lwid or
part.c8 = w.cid and part.w8 = w.lwid or
part.c9 = w.cid and part.w9 = w.lwid;
create or replace view tpart as
select u.p_partkey, u.p_name, u.p_mfgr, u.p_brand, u.p_type, u.p_size, u.p_container, u.p_retailprice, u.p_comment,
c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tpart u, trio_c_tpart c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tpart_idx
ON trio_lin_tpart
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tpart_xid_idx
ON trio_c_tpart
USING btree
(trio_xid);
CREATE INDEX trio_u_tpart_xid_idx
ON trio_u_tpart
USING btree
(trio_xid);
------------------------------------------
-- supplier
------------------------------------------
insert into trio_tables values('tsupplier',0,'f','f');
insert into trio_attribs values('tsupplier','s_suppkey',0,'f');
insert into trio_attribs values('tsupplier','s_name',1,'f');
insert into trio_attribs values('tsupplier','s_address',2,'f');
insert into trio_attribs values('tsupplier','s_nationkey',3,'f');
insert into trio_attribs values('tsupplier','s_phone',4,'f');
insert into trio_attribs values('tsupplier','s_acctbal',5,'f');
insert into trio_attribs values('tsupplier','s_comment',6,'f');
insert into trio_tables_lin values('tsupplier','wbase');
create table trio_c_tsupplier(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tsupplier(s_suppkey integer,s_name character(25),s_address character varying(40),s_nationkey integer,s_phone character(15),
s_acctbal numeric(15,2),s_comment character varying(101),
trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tsupplier(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tsupplier
insert into trio_c_tsupplier
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from supplier group by tid;
insert into trio_u_tsupplier
select s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, tid as trio_xid, oid as trio_aid
from supplier;
insert into trio_lin_tsupplier
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from supplier, w
where supplier.c1 = w.cid and supplier.w1 = w.lwid or
supplier.c2 = w.cid and supplier.w2 = w.lwid or
supplier.c3 = w.cid and supplier.w3 = w.lwid or
supplier.c4 = w.cid and supplier.w4 = w.lwid or
supplier.c5 = w.cid and supplier.w5 = w.lwid or
supplier.c6 = w.cid and supplier.w6 = w.lwid or
supplier.c7 = w.cid and supplier.w7 = w.lwid;
create or replace view tsupplier as
select u.s_suppkey, u.s_name, u.s_address, u.s_nationkey, u.s_phone, u.s_acctbal, u.s_comment,
c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tsupplier u, trio_c_tsupplier c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tsupplier_idx
ON trio_lin_tsupplier
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tsupplier_xid_idx
ON trio_c_tsupplier
USING btree
(trio_xid);
CREATE INDEX trio_u_tsupplier_xid_idx
ON trio_u_tsupplier
USING btree
(trio_xid);
------------------------------------------
-- partsupp
------------------------------------------
insert into trio_tables values('tpartsupp',0,'f','f');
insert into trio_attribs values('tpartsupp','ps_partkey',0,'f');
insert into trio_attribs values('tpartsupp','ps_suppkey',1,'f');
insert into trio_attribs values('tpartsupp','ps_availqty',2,'f');
insert into trio_attribs values('tpartsupp','ps_supplycost',3,'f');
insert into trio_attribs values('tpartsupp','ps_comment',4,'f');
insert into trio_tables_lin values('tpartsupp','wbase');
create table trio_c_tpartsupp(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tpartsupp(ps_partkey integer, ps_suppkey integer,ps_availqty integer,
ps_supplycost numeric(15,2),ps_comment character varying(199),
trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tpartsupp(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tpartsupp
insert into trio_c_tpartsupp
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from partsupp group by tid;
insert into trio_u_tpartsupp
select ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, tid as trio_xid, oid as trio_aid
from partsupp;
insert into trio_lin_tpartsupp
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from partsupp, w
where partsupp.c1 = w.cid and partsupp.w1 = w.lwid or
partsupp.c2 = w.cid and partsupp.w2 = w.lwid or
partsupp.c3 = w.cid and partsupp.w3 = w.lwid or
partsupp.c4 = w.cid and partsupp.w4 = w.lwid or
partsupp.c5 = w.cid and partsupp.w5 = w.lwid;
create or replace view tpartsupp as
select ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment,
c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tpartsupp u, trio_c_tpartsupp c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tpartsupp_idx
ON trio_lin_tpartsupp
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tpartsupp_xid_idx
ON trio_c_tpartsupp
USING btree
(trio_xid);
CREATE INDEX trio_u_tpartsupp_xid_idx
ON trio_u_tpartsupp
USING btree
(trio_xid);
------------------------------------------
-- customer
------------------------------------------
insert into trio_tables values('tcustomer',0,'f','f');
insert into trio_attribs values('tcustomer','c_custkey',0,'f');
insert into trio_attribs values('tcustomer','c_name',1,'f');
insert into trio_attribs values('tcustomer','c_address',2,'f');
insert into trio_attribs values('tcustomer','c_nationkey',3,'f');
insert into trio_attribs values('tcustomer','c_phone',4,'f');
insert into trio_attribs values('tcustomer','c_acctbal',5,'f');
insert into trio_attribs values('tcustomer','c_mktsegment',6,'f');
insert into trio_attribs values('tcustomer','c_comment',7,'f');
insert into trio_tables_lin values('tcustomer','wbase');
create table trio_c_tcustomer(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tcustomer(c_custkey integer,c_name character varying(25),c_address character varying(40),c_nationkey integer,
c_phone character(15),c_acctbal numeric(15,2),c_mktsegment character(10),c_comment character varying(117),
trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tcustomer(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tcustomer
insert into trio_c_tcustomer
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from customer group by tid;
insert into trio_u_tcustomer
select c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, tid as trio_xid, oid as trio_aid
from customer;
insert into trio_lin_tcustomer
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from customer, w
where customer.c1 = w.cid and customer.w1 = w.lwid or
customer.c2 = w.cid and customer.w2 = w.lwid or
customer.c3 = w.cid and customer.w3 = w.lwid or
customer.c4 = w.cid and customer.w4 = w.lwid or
customer.c5 = w.cid and customer.w5 = w.lwid or
customer.c6 = w.cid and customer.w6 = w.lwid or
customer.c7 = w.cid and customer.w7 = w.lwid or
customer.c8 = w.cid and customer.w8 = w.lwid;
create or replace view tcustomer as
select u.c_custkey, u.c_name, u.c_address, u.c_nationkey, u.c_phone, u.c_acctbal, u.c_mktsegment, u.c_comment,
c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tcustomer u, trio_c_tcustomer c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tcustomer_idx
ON trio_lin_tcustomer
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tcustomer_xid_idx
ON trio_c_tcustomer
USING btree
(trio_xid);
CREATE INDEX trio_u_tcustomer_xid_idx
ON trio_u_tcustomer
USING btree
(trio_xid);
------------------------------------------
-- orders
------------------------------------------
insert into trio_tables values('torders',0,'f','f');
insert into trio_attribs values('torders','o_orderkey',0,'f');
insert into trio_attribs values('torders','o_custkey',1,'f');
insert into trio_attribs values('torders','o_orderstatus',2,'f');
insert into trio_attribs values('torders','o_totalprice',3,'f');
insert into trio_attribs values('torders','o_orderdate',4,'f');
insert into trio_attribs values('torders','o_orderpriority',5,'f');
insert into trio_attribs values('torders','o_clerk',6,'f');
insert into trio_attribs values('torders','o_shippriority',7,'f');
insert into trio_attribs values('torders','o_comment',8,'f');
insert into trio_tables_lin values('torders','wbase');
create table trio_c_torders(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_torders(o_orderkey integer,o_custkey integer,o_orderstatus character(1),o_totalprice numeric(15,2),
o_orderdate date,o_orderpriority character(15),o_clerk character(15),o_shippriority integer,o_comment character varying(79),
trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_torders(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate torders
insert into trio_c_torders
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from orders group by tid;
insert into trio_u_torders
select o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, tid as trio_xid, oid as trio_aid
from orders;
insert into trio_lin_torders
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from orders, w
where orders.c1 = w.cid and orders.w1 = w.lwid or
orders.c2 = w.cid and orders.w2 = w.lwid or
orders.c3 = w.cid and orders.w3 = w.lwid or
orders.c4 = w.cid and orders.w4 = w.lwid or
orders.c5 = w.cid and orders.w5 = w.lwid or
orders.c6 = w.cid and orders.w6 = w.lwid or
orders.c7 = w.cid and orders.w7 = w.lwid or
orders.c8 = w.cid and orders.w8 = w.lwid or
orders.c9 = w.cid and orders.w9 = w.lwid;
create or replace view torders as
select u.o_orderkey, u.o_custkey, u.o_orderstatus, u.o_totalprice, u.o_orderdate, u.o_orderpriority, u.o_clerk, u.o_shippriority, u.o_comment,
c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_torders u, trio_c_torders c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_torders_idx
ON trio_lin_torders
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_torders_xid_idx
ON trio_c_torders
USING btree
(trio_xid);
CREATE INDEX trio_u_torders_xid_idx
ON trio_u_torders
USING btree
(trio_xid);
------------------------------------------
-- lineitem
------------------------------------------
insert into trio_tables values('tlineitem',0,'f','f');
insert into trio_attribs values('tlineitem','l_orderkey',0,'f');
insert into trio_attribs values('tlineitem','l_partkey',1,'f');
insert into trio_attribs values('tlineitem','l_suppkey',2,'f');
insert into trio_attribs values('tlineitem','l_linenumber',3,'f');
insert into trio_attribs values('tlineitem','l_quantity',4,'f');
insert into trio_attribs values('tlineitem','l_extendedprice',5,'f');
insert into trio_attribs values('tlineitem','l_discount',6,'f');
insert into trio_attribs values('tlineitem','l_tax',7,'f');
insert into trio_attribs values('tlineitem','l_returnflag',8,'f');
insert into trio_attribs values('tlineitem','l_linestatus',9,'f');
insert into trio_attribs values('tlineitem','l_shipdate',10,'f');
insert into trio_attribs values('tlineitem','l_commitdate',11,'f');
insert into trio_attribs values('tlineitem','l_receiptdate',12,'f');
insert into trio_attribs values('tlineitem','l_shipinstruct',13,'f');
insert into trio_attribs values('tlineitem','l_shipmode',14,'f');
insert into trio_attribs values('tlineitem','l_comment',15,'f');
insert into trio_tables_lin values('tlineitem','wbase');
create table trio_c_tlineitem(trio_xid integer,trio_alts integer,trio_enc integer) WITHOUT OIDS;
create table trio_u_tlineitem(l_orderkey integer,l_partkey integer,l_suppkey integer,l_linenumber integer,
l_quantity numeric(15,2),l_extendedprice numeric(15,2),l_discount numeric(15,2),l_tax numeric(15,2),
l_returnflag character(1),l_linestatus character(1),l_shipdate date,l_commitdate date,l_receiptdate date,
l_shipinstruct character(25),l_shipmode character(10),l_comment character varying(44),
trio_xid integer,trio_aid integer) WITHOUT OIDS;
create table trio_lin_tlineitem(aid integer,disjlin integer,srclin integer,srctable name) WITHOUT OIDS;
-- populate tlineitem
insert into trio_c_tlineitem
select tid as xid, count(*) as trio_alts, count(*) as trio_enc
from lineitem group by tid;
insert into trio_u_tlineitem
select l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,
l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment, tid as trio_xid, oid as trio_aid
from lineitem;
insert into trio_lin_tlineitem
select oid as aid, oid as disjlin, w.aid as srclin, 'wbase'
from lineitem, w
where lineitem.c1 = w.cid and lineitem.w1 = w.lwid or
lineitem.c2 = w.cid and lineitem.w2 = w.lwid or
lineitem.c3 = w.cid and lineitem.w3 = w.lwid or
lineitem.c4 = w.cid and lineitem.w4 = w.lwid or
lineitem.c5 = w.cid and lineitem.w5 = w.lwid or
lineitem.c6 = w.cid and lineitem.w6 = w.lwid or
lineitem.c7 = w.cid and lineitem.w7 = w.lwid or
lineitem.c8 = w.cid and lineitem.w8 = w.lwid or
lineitem.c9 = w.cid and lineitem.w9 = w.lwid or
lineitem.c10 = w.cid and lineitem.w10 = w.lwid or
lineitem.c11 = w.cid and lineitem.w11 = w.lwid or
lineitem.c12 = w.cid and lineitem.w12 = w.lwid or
lineitem.c13 = w.cid and lineitem.w13 = w.lwid or
lineitem.c14 = w.cid and lineitem.w14 = w.lwid or
lineitem.c15 = w.cid and lineitem.w15 = w.lwid or
lineitem.c16 = w.cid and lineitem.w8 = w.lwid;
create or replace view tlineitem as
select u.l_orderkey,u.l_partkey,u.l_suppkey,u.l_linenumber,u.l_quantity,u.l_extendedprice,u.l_discount,u.l_tax,u.l_returnflag,
u.l_linestatus,u.l_shipdate,u.l_commitdate,u.l_receiptdate,u.l_shipinstruct,u.l_shipmode,u.l_comment,
c.trio_xid, u.trio_aid, c.trio_alts, c.trio_enc, NULL::numeric AS trio_conf
FROM trio_u_tlineitem u, trio_c_tlineitem c
WHERE u.trio_xid = c.trio_xid
ORDER BY c.trio_xid;
CREATE INDEX trio_lin_tlineitem_idx
ON trio_lin_tlineitem
USING btree
(aid, srctable, srclin, disjlin);
CREATE INDEX trio_c_tlineitem_xid_idx
ON trio_c_tlineitem
USING btree
(trio_xid);
CREATE INDEX trio_u_tlineitem_xid_idx
ON trio_u_tlineitem
USING btree
(trio_xid);

View File

@ -0,0 +1,151 @@
--set search_path to data_s0_1_x0_001_z0_1_m8;
create table nation with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4,
U1.tid, n_nationkey, n_name, n_regionkey, n_comment
from u_n_nationkey U1, u_n_name U2, u_n_regionkey U3, u_n_comment U4
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1);
create table region with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3,
U1.tid, r_regionkey, r_name, r_comment
from u_r_regionkey U1, u_r_name U2, u_r_comment U3
where U1.tid=U2.tid and U1.tid=U3.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1);
create table part with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4, U5.c1 as c5, U5.w1 as w5,
U6.c1 as c6, U6.w1 as w6, U7.c1 as c7, U7.w1 as w7, U8.c1 as c8, U8.w1 as w8, U9.c1 as c9, U9.w1 as w9,
U1.tid, p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment
from u_p_partkey U1, u_p_name U2, u_p_mfgr U3, u_p_brand U4, u_p_type U5, u_p_size U6, u_p_container U7, u_p_retailprice U8, u_p_comment U9
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and U1.tid=U5.tid and
U1.tid=U6.tid and U1.tid=U7.tid and U1.tid=U8.tid and U1.tid=U9.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U1.C1<>U5.C1 or U1.W1=U5.W1) and (U1.C1<>U6.C1 or U1.W1=U6.W1) and (U1.C1<>U7.C1 or U1.W1=U7.W1) and (U1.C1<>U8.C1 or U1.W1=U8.W1) and (U1.C1<>U9.C1 or U1.W1=U9.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U2.C1<>U5.C1 or U2.W1=U5.W1) and (U2.C1<>U6.C1 or U2.W1=U6.W1) and (U2.C1<>U7.C1 or U2.W1=U7.W1) and (U2.C1<>U8.C1 or U2.W1=U8.W1) and (U2.C1<>U9.C1 or U2.W1=U9.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1) and (U3.C1<>U5.C1 or U3.W1=U5.W1) and (U3.C1<>U6.C1 or U3.W1=U6.W1) and (U3.C1<>U7.C1 or U3.W1=U7.W1) and (U3.C1<>U8.C1 or U3.W1=U8.W1) and (U3.C1<>U9.C1 or U3.W1=U9.W1) and (U4.C1<>U5.C1 or U4.W1=U5.W1) and (U4.C1<>U6.C1 or U4.W1=U6.W1) and (U4.C1<>U7.C1 or U4.W1=U7.W1) and (U4.C1<>U8.C1 or U4.W1=U8.W1) and (U4.C1<>U9.C1 or U4.W1=U9.W1) and (U5.C1<>U6.C1 or U5.W1=U6.W1) and (U5.C1<>U7.C1 or U5.W1=U7.W1) and (U5.C1<>U8.C1 or U5.W1=U8.W1) and (U5.C1<>U9.C1 or U5.W1=U9.W1) and (U6.C1<>U7.C1 or U6.W1=U7.W1) and (U6.C1<>U8.C1 or U6.W1=U8.W1) and (U6.C1<>U9.C1 or U6.W1=U9.W1) and (U7.C1<>U8.C1 or U7.W1=U8.W1) and (U7.C1<>U9.C1 or U7.W1=U9.W1) and (U8.C1<>U9.C1 or U8.W1=U9.W1);
create table supplier with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4, U5.c1 as c5, U5.w1 as w5,
U6.c1 as c6, U6.w1 as w6, U7.c1 as c7, U7.w1 as w7,
U1.tid, s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment
from u_s_suppkey U1, u_s_name U2, u_s_address U3, u_s_nationkey U4, u_s_phone U5, u_s_acctbal U6, u_s_comment U7
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and U1.tid=U5.tid and U1.tid=U6.tid and U1.tid=U7.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U1.C1<>U5.C1 or U1.W1=U5.W1) and (U1.C1<>U6.C1 or U1.W1=U6.W1) and (U1.C1<>U7.C1 or U1.W1=U7.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U2.C1<>U5.C1 or U2.W1=U5.W1) and (U2.C1<>U6.C1 or U2.W1=U6.W1) and (U2.C1<>U7.C1 or U2.W1=U7.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1) and (U3.C1<>U5.C1 or U3.W1=U5.W1) and (U3.C1<>U6.C1 or U3.W1=U6.W1) and (U3.C1<>U7.C1 or U3.W1=U7.W1) and (U4.C1<>U5.C1 or U4.W1=U5.W1) and (U4.C1<>U6.C1 or U4.W1=U6.W1) and (U4.C1<>U7.C1 or U4.W1=U7.W1) and (U5.C1<>U6.C1 or U5.W1=U6.W1) and (U5.C1<>U7.C1 or U5.W1=U7.W1) and (U6.C1<>U7.C1 or U6.W1=U7.W1);
create table partsupp with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4, U5.c1 as c5, U5.w1 as w5,
U1.tid, ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment
from u_ps_partkey U1, u_ps_suppkey U2, u_ps_availqty U3, u_ps_supplycost U4, u_ps_comment U5
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and U1.tid=U5.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U1.C1<>U5.C1 or U1.W1=U5.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U2.C1<>U5.C1 or U2.W1=U5.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1) and (U3.C1<>U5.C1 or U3.W1=U5.W1) and (U4.C1<>U5.C1 or U4.W1=U5.W1);
create table customer with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4, U5.c1 as c5, U5.w1 as w5,
U6.c1 as c6, U6.w1 as w6, U7.c1 as c7, U7.w1 as w7, U8.c1 as c8, U8.w1 as w8,
U1.tid, c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment
from u_c_custkey U1, u_c_name U2, u_c_address U3, u_c_nationkey U4, u_c_phone U5, u_c_acctbal U6, u_c_mktsegment U7, u_c_comment U8
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and U1.tid=U5.tid and
U1.tid=U6.tid and U1.tid=U7.tid and U1.tid=U8.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U1.C1<>U5.C1 or U1.W1=U5.W1) and (U1.C1<>U6.C1 or U1.W1=U6.W1) and (U1.C1<>U7.C1 or U1.W1=U7.W1) and (U1.C1<>U8.C1 or U1.W1=U8.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U2.C1<>U5.C1 or U2.W1=U5.W1) and (U2.C1<>U6.C1 or U2.W1=U6.W1) and (U2.C1<>U7.C1 or U2.W1=U7.W1) and (U2.C1<>U8.C1 or U2.W1=U8.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1) and (U3.C1<>U5.C1 or U3.W1=U5.W1) and (U3.C1<>U6.C1 or U3.W1=U6.W1) and (U3.C1<>U7.C1 or U3.W1=U7.W1) and (U3.C1<>U8.C1 or U3.W1=U8.W1) and (U4.C1<>U5.C1 or U4.W1=U5.W1) and (U4.C1<>U6.C1 or U4.W1=U6.W1) and (U4.C1<>U7.C1 or U4.W1=U7.W1) and (U4.C1<>U8.C1 or U4.W1=U8.W1) and (U5.C1<>U6.C1 or U5.W1=U6.W1) and (U5.C1<>U7.C1 or U5.W1=U7.W1) and (U5.C1<>U8.C1 or U5.W1=U8.W1) and (U6.C1<>U7.C1 or U6.W1=U7.W1) and (U6.C1<>U8.C1 or U6.W1=U8.W1) and (U7.C1<>U8.C1 or U7.W1=U8.W1);
create table orders with oids as
select U1.c1, U1.w1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4, U5.c1 as c5, U5.w1 as w5,
U6.c1 as c6, U6.w1 as w6, U7.c1 as c7, U7.w1 as w7, U8.c1 as c8, U8.w1 as w8, U9.c1 as c9, U9.w1 as w9,
U1.tid, o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment
from u_o_orderkey U1, u_o_custkey U2, u_o_orderstatus U3, u_o_totalprice U4, u_o_orderdate U5,
u_o_orderpriority U6, u_o_clerk U7, u_o_shippriority U8, u_o_comment U9
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and U1.tid=U5.tid and
U1.tid=U6.tid and U1.tid=U7.tid and U1.tid=U8.tid and U1.tid=U9.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U1.C1<>U5.C1 or U1.W1=U5.W1) and (U1.C1<>U6.C1 or U1.W1=U6.W1) and (U1.C1<>U7.C1 or U1.W1=U7.W1) and (U1.C1<>U8.C1 or U1.W1=U8.W1) and (U1.C1<>U9.C1 or U1.W1=U9.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U2.C1<>U5.C1 or U2.W1=U5.W1) and (U2.C1<>U6.C1 or U2.W1=U6.W1) and (U2.C1<>U7.C1 or U2.W1=U7.W1) and (U2.C1<>U8.C1 or U2.W1=U8.W1) and (U2.C1<>U9.C1 or U2.W1=U9.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1) and (U3.C1<>U5.C1 or U3.W1=U5.W1) and (U3.C1<>U6.C1 or U3.W1=U6.W1) and (U3.C1<>U7.C1 or U3.W1=U7.W1) and (U3.C1<>U8.C1 or U3.W1=U8.W1) and (U3.C1<>U9.C1 or U3.W1=U9.W1) and (U4.C1<>U5.C1 or U4.W1=U5.W1) and (U4.C1<>U6.C1 or U4.W1=U6.W1) and (U4.C1<>U7.C1 or U4.W1=U7.W1) and (U4.C1<>U8.C1 or U4.W1=U8.W1) and (U4.C1<>U9.C1 or U4.W1=U9.W1) and (U5.C1<>U6.C1 or U5.W1=U6.W1) and (U5.C1<>U7.C1 or U5.W1=U7.W1) and (U5.C1<>U8.C1 or U5.W1=U8.W1) and (U5.C1<>U9.C1 or U5.W1=U9.W1) and (U6.C1<>U7.C1 or U6.W1=U7.W1) and (U6.C1<>U8.C1 or U6.W1=U8.W1) and (U6.C1<>U9.C1 or U6.W1=U9.W1) and (U7.C1<>U8.C1 or U7.W1=U8.W1) and (U7.C1<>U9.C1 or U7.W1=U9.W1) and (U8.C1<>U9.C1 or U8.W1=U9.W1);
create table lineitem with oids as
select U1.C1, U1.W1, U2.c1 as c2, U2.w1 as w2, U3.c1 as c3, U3.w1 as w3, U4.c1 as c4, U4.w1 as w4, U5.c1 as c5, U5.w1 as w5,
U6.c1 as c6, U6.w1 as w6, U7.c1 as c7, U7.w1 as w7, U8.c1 as c8, U8.w1 as w8, U9.c1 as c9, U9.w1 as w9,
U10.c1 as c10, U10.w1 as w10, U11.c1 as c11, U11.w1 as w11, U12.c1 as c12, U12.w1 as w12, U13.c1 as c13, U13.w1 as w13,
U14.c1 as c14, U14.w1 as w14, U15.c1 as c15, U15.w1 as W15, U16.C1 as c16, U16.w1 as w16,
U1.tid, l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus,
l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
from u_l_orderkey U1, u_l_partkey U2, u_l_suppkey U3, u_l_linenumber U4, u_l_quantity U5, u_l_extendedprice U6,
u_l_discount U7, u_l_tax U8, u_l_returnflag U9, u_l_linestatus U10, u_l_shipdate U11, u_l_commitdate U12, u_l_receiptdate U13,
u_l_shipinstruct U14, u_l_shipmode U15, u_l_comment U16
where U1.tid=U2.tid and U1.tid=U3.tid and U1.tid=U4.tid and U1.tid=U5.tid and U1.tid=U6.tid and U1.tid=U7.tid and
U1.tid=U8.tid and U1.tid=U9.tid and U1.tid=U10.tid and U1.tid=U11.tid and U1.tid=U12.tid and U1.tid=U13.tid and
U1.tid=U14.tid and U1.tid=U15.tid and U1.tid=U16.tid and
(U1.C1<>U2.C1 or U1.W1=U2.W1) and (U1.C1<>U3.C1 or U1.W1=U3.W1) and (U1.C1<>U4.C1 or U1.W1=U4.W1) and (U1.C1<>U5.C1 or U1.W1=U5.W1) and (U1.C1<>U6.C1 or U1.W1=U6.W1) and (U1.C1<>U7.C1 or U1.W1=U7.W1) and (U1.C1<>U8.C1 or U1.W1=U8.W1) and (U1.C1<>U9.C1 or U1.W1=U9.W1) and (U1.C1<>U10.C1 or U1.W1=U10.W1) and (U1.C1<>U11.C1 or U1.W1=U11.W1) and (U1.C1<>U12.C1 or U1.W1=U12.W1) and (U1.C1<>U13.C1 or U1.W1=U13.W1) and (U1.C1<>U14.C1 or U1.W1=U14.W1) and (U1.C1<>U15.C1 or U1.W1=U15.W1) and (U1.C1<>U16.C1 or U1.W1=U16.W1) and (U2.C1<>U3.C1 or U2.W1=U3.W1) and (U2.C1<>U4.C1 or U2.W1=U4.W1) and (U2.C1<>U5.C1 or U2.W1=U5.W1) and (U2.C1<>U6.C1 or U2.W1=U6.W1) and (U2.C1<>U7.C1 or U2.W1=U7.W1) and (U2.C1<>U8.C1 or U2.W1=U8.W1) and (U2.C1<>U9.C1 or U2.W1=U9.W1) and (U2.C1<>U10.C1 or U2.W1=U10.W1) and (U2.C1<>U11.C1 or U2.W1=U11.W1) and (U2.C1<>U12.C1 or U2.W1=U12.W1) and (U2.C1<>U13.C1 or U2.W1=U13.W1) and (U2.C1<>U14.C1 or U2.W1=U14.W1) and (U2.C1<>U15.C1 or U2.W1=U15.W1) and (U2.C1<>U16.C1 or U2.W1=U16.W1) and (U3.C1<>U4.C1 or U3.W1=U4.W1) and (U3.C1<>U5.C1 or U3.W1=U5.W1) and (U3.C1<>U6.C1 or U3.W1=U6.W1) and (U3.C1<>U7.C1 or U3.W1=U7.W1) and (U3.C1<>U8.C1 or U3.W1=U8.W1) and (U3.C1<>U9.C1 or U3.W1=U9.W1) and (U3.C1<>U10.C1 or U3.W1=U10.W1) and (U3.C1<>U11.C1 or U3.W1=U11.W1) and (U3.C1<>U12.C1 or U3.W1=U12.W1) and (U3.C1<>U13.C1 or U3.W1=U13.W1) and (U3.C1<>U14.C1 or U3.W1=U14.W1) and (U3.C1<>U15.C1 or U3.W1=U15.W1) and (U3.C1<>U16.C1 or U3.W1=U16.W1) and (U4.C1<>U5.C1 or U4.W1=U5.W1) and (U4.C1<>U6.C1 or U4.W1=U6.W1) and (U4.C1<>U7.C1 or U4.W1=U7.W1) and (U4.C1<>U8.C1 or U4.W1=U8.W1) and (U4.C1<>U9.C1 or U4.W1=U9.W1) and (U4.C1<>U10.C1 or U4.W1=U10.W1) and (U4.C1<>U11.C1 or U4.W1=U11.W1) and (U4.C1<>U12.C1 or U4.W1=U12.W1) and (U4.C1<>U13.C1 or U4.W1=U13.W1) and (U4.C1<>U14.C1 or U4.W1=U14.W1) and (U4.C1<>U15.C1 or U4.W1=U15.W1) and (U4.C1<>U16.C1 or U4.W1=U16.W1) and (U5.C1<>U6.C1 or U5.W1=U6.W1) and (U5.C1<>U7.C1 or U5.W1=U7.W1) and (U5.C1<>U8.C1 or U5.W1=U8.W1) and (U5.C1<>U9.C1 or U5.W1=U9.W1) and (U5.C1<>U10.C1 or U5.W1=U10.W1) and (U5.C1<>U11.C1 or U5.W1=U11.W1) and (U5.C1<>U12.C1 or U5.W1=U12.W1) and (U5.C1<>U13.C1 or U5.W1=U13.W1) and (U5.C1<>U14.C1 or U5.W1=U14.W1) and (U5.C1<>U15.C1 or U5.W1=U15.W1) and (U5.C1<>U16.C1 or U5.W1=U16.W1) and (U6.C1<>U7.C1 or U6.W1=U7.W1) and (U6.C1<>U8.C1 or U6.W1=U8.W1) and (U6.C1<>U9.C1 or U6.W1=U9.W1) and (U6.C1<>U10.C1 or U6.W1=U10.W1) and (U6.C1<>U11.C1 or U6.W1=U11.W1) and (U6.C1<>U12.C1 or U6.W1=U12.W1) and (U6.C1<>U13.C1 or U6.W1=U13.W1) and (U6.C1<>U14.C1 or U6.W1=U14.W1) and (U6.C1<>U15.C1 or U6.W1=U15.W1) and (U6.C1<>U16.C1 or U6.W1=U16.W1) and (U7.C1<>U8.C1 or U7.W1=U8.W1) and (U7.C1<>U9.C1 or U7.W1=U9.W1) and (U7.C1<>U10.C1 or U7.W1=U10.W1) and (U7.C1<>U11.C1 or U7.W1=U11.W1) and (U7.C1<>U12.C1 or U7.W1=U12.W1) and (U7.C1<>U13.C1 or U7.W1=U13.W1) and (U7.C1<>U14.C1 or U7.W1=U14.W1) and (U7.C1<>U15.C1 or U7.W1=U15.W1) and (U7.C1<>U16.C1 or U7.W1=U16.W1) and (U8.C1<>U9.C1 or U8.W1=U9.W1) and (U8.C1<>U10.C1 or U8.W1=U10.W1) and (U8.C1<>U11.C1 or U8.W1=U11.W1) and (U8.C1<>U12.C1 or U8.W1=U12.W1) and (U8.C1<>U13.C1 or U8.W1=U13.W1) and (U8.C1<>U14.C1 or U8.W1=U14.W1) and (U8.C1<>U15.C1 or U8.W1=U15.W1) and (U8.C1<>U16.C1 or U8.W1=U16.W1) and (U9.C1<>U10.C1 or U9.W1=U10.W1) and (U9.C1<>U11.C1 or U9.W1=U11.W1) and (U9.C1<>U12.C1 or U9.W1=U12.W1) and (U9.C1<>U13.C1 or U9.W1=U13.W1) and (U9.C1<>U14.C1 or U9.W1=U14.W1) and (U9.C1<>U15.C1 or U9.W1=U15.W1) and (U9.C1<>U16.C1 or U9.W1=U16.W1) and (U10.C1<>U11.C1 or U10.W1=U11.W1) and (U10.C1<>U12.C1 or U10.W1=U12.W1) and (U10.C1<>U13.C1 or U10.W1=U13.W1) and (U10.C1<>U14.C1 or U10.W1=U14.W1) and (U10.C1<>U15.C1 or U10.W1=U15.W1) and (U10.C1<>U16.C1 or U10.W1=U16.W1) and (U11.C1<>U12.C1 or U11.W1=U12.W1) and (U11.C1<>U13.C1 or U11.W1=U13.W1) and (U11.C1<>U14.C1 or U11.W1=U14.W1) and (U11.C1<>U15.C1 or U11.W1=U15.W1) and (U11.C1<>U16.C1 or U11.W1=U16.W1) and (U12.C1<>U13.C1 or U12.W1=U13.W1) and (U12.C1<>U14.C1 or U12.W1=U14.W1) and (U12.C1<>U15.C1 or U12.W1=U15.W1) and (U12.C1<>U16.C1 or U12.W1=U16.W1) and (U13.C1<>U14.C1 or U13.W1=U14.W1) and (U13.C1<>U15.C1 or U13.W1=U15.W1) and (U13.C1<>U16.C1 or U13.W1=U16.W1) and (U14.C1<>U15.C1 or U14.W1=U15.W1) and (U14.C1<>U16.C1 or U14.W1=U16.W1) and (U15.C1<>U16.C1 or U15.W1=U16.W1);
-- create table W
create table w(cid bigint, lwid bigint, aid serial);
insert into w
select c1,w1 from u_n_nationkey
union select c1,w1 from u_n_name
union select c1,w1 from u_n_regionkey
union select c1,w1 from u_n_comment
--
union select c1,w1 from u_r_regionkey
union select c1,w1 from u_r_name
union select c1,w1 from u_r_comment
--
union select c1,w1 from u_p_partkey
union select c1,w1 from u_p_name
union select c1,w1 from u_p_mfgr
union select c1,w1 from u_p_brand
union select c1,w1 from u_p_type
union select c1,w1 from u_p_size
union select c1,w1 from u_p_container
union select c1,w1 from u_p_retailprice
union select c1,w1 from u_p_comment
--
union select c1,w1 from u_s_suppkey
union select c1,w1 from u_s_name
union select c1,w1 from u_s_address
union select c1,w1 from u_s_nationkey
union select c1,w1 from u_s_phone
union select c1,w1 from u_s_acctbal
union select c1,w1 from u_s_comment
--
union select c1,w1 from u_ps_partkey
union select c1,w1 from u_ps_suppkey
union select c1,w1 from u_ps_availqty
union select c1,w1 from u_ps_supplycost
union select c1,w1 from u_ps_comment
--
union select c1,w1 from u_c_custkey
union select c1,w1 from u_c_name
union select c1,w1 from u_c_address
union select c1,w1 from u_c_nationkey
union select c1,w1 from u_c_phone
union select c1,w1 from u_c_acctbal
union select c1,w1 from u_c_mktsegment
union select c1,w1 from u_c_comment
--
union select c1,w1 from u_o_orderkey
union select c1,w1 from u_o_custkey
union select c1,w1 from u_o_orderstatus
union select c1,w1 from u_o_totalprice
union select c1,w1 from u_o_orderdate
union select c1,w1 from u_o_orderpriority
union select c1,w1 from u_o_clerk
union select c1,w1 from u_o_shippriority
union select c1,w1 from u_o_comment
--
union select c1,w1 from u_l_orderkey
union select c1,w1 from u_l_partkey
union select c1,w1 from u_l_suppkey
union select c1,w1 from u_l_linenumber
union select c1,w1 from u_l_quantity
union select c1,w1 from u_l_extendedprice
union select c1,w1 from u_l_discount
union select c1,w1 from u_l_tax
union select c1,w1 from u_l_returnflag
union select c1,w1 from u_l_linestatus
union select c1,w1 from u_l_shipdate
union select c1,w1 from u_l_commitdate
union select c1,w1 from u_l_receiptdate
union select c1,w1 from u_l_shipinstruct
union select c1,w1 from u_l_shipmode
union select c1,w1 from u_l_comment;

35
README Normal file
View File

@ -0,0 +1,35 @@
WWW graph benchmark
A variation of the random graph example using web graph data.
The zip file contains a set of SQL scripts intended for use with Postgres (small
modifications might be necessary to run them on a different DBMS).
The scripts create a U-relational representations of a random
graph, and of the answers to queries on the random graph, where each world in
the the database corresponds to one particular instance of the random graph, and
has a corresponding probability.
The graph structure is taken from the web graph data set available at
http://www.nd.edu/~networks/resources/www/www.dat.gz
We assign probabilities to the edges relative to the degree of the end nodes, thus the graph has
few edges with high probability, with the majority of edges having low
probability. The dataset contains an example query showing how to construct
the answer to a query looking for the occurrence of a pattern in the random graph,
such as for example a triangle etc.
* import-www-data.sql - imports the data into relational tables
* init-www-nodes.sql - select a subset of the nodes of the whole graph
* init-www-graph.sql - assign weights to the edges of the graph, giving higher weights to edges whose end nodes have high degree.
* triangle-query.sql - execute the triangle query on the graph (without the conf part).
See the comments inside the SQL source files for a more detailed description.
To use the benchmark run the following sequence of steps:
* unzip the WWW graph file and make sure it is called www.dat
* psql -f import-www-data.sql
* psql -f init-www-nodes.sql
* psql -c "select * from init_www_nodes(0.1);"
* psql -f init-www-graph.sql
* psql -f triangle-query.sql
(Node: see psql options for specifiying database name etc)

View File

@ -0,0 +1,105 @@
CREATE TRIO TABLE PARTSUPP (
PARTKEY int,
SUPPKEY int,
AVAILQTY int,
SUPPLYCOST float,
COMMENT varchar(79),
uncertain(partkey, suppkey, availqty, supplycost)
) with confidences;
CREATE TRIO TABLE LINEITEM (
ORDERKEY int,
PARTKEY int,
SUPPKEY int,
LINENUMBER int,
QUANTITY float,
EXTENDEDPRICE float,
DISCOUNT float,
TAX float,
RETURNFLAG varchar(1),
LINESTATUS varchar(1),
SHIPDATE varchar(10),
COMMITDATE varchar(10),
RECEIPTDATE varchar(10),
SHIPINSTRUCT varchar(25),
SHIPMODE varchar(10),
COMMENT varchar(44),
uncertain(ORDERKEY,PARTKEY,SUPPKEY,LINENUMBER,QUANTITY,EXTENDEDPRICE,DISCOUNT,TAX,RETURNFLAG,LINESTATUS,SHIPDATE,COMMITDATE,RECEIPTDATE,SHIPINSTRUCT,SHIPMODE,COMMENT)
) with confidences;
CREATE TRIO TABLE ORDERS (
ORDERKEY int,
CUSTKEY int,
ORDERSTATUS varchar(1),
TOTALPRICE float,
ORDERDATE varchar(10),
ORDERPRIORITY varchar(15),
CLERK varchar(15),
SHIPPRIORITY int,
COMMENT varchar(79),
uncertain(ORDERKEY,CUSTKEY,ORDERSTATUS,TOTALPRICE,ORDERDATE,ORDERPRIORITY,CLERK,SHIPPRIORITY,COMMENT))
with confidences;
CREATE TRIO TABLE PART (
PARTKEY int,
NAME varchar(48),
MFGR varchar(32),
BRAND varchar(32),
TYPE varchar(32),
SIZE int,
CONTAINER varchar(32),
RETAILPRICE float,
COMMENT varchar(79),
uncertain(PARTKEY,NAME,MFGR,BRAND,TYPE,SIZE,CONTAINER,RETAILPRICE,COMMENT))
with confidences;
CREATE TRIO TABLE SUPPLIER (
SUPPKEY int,
NAME varchar(48),
ADDRESS varchar(32),
NATIONKEY int,
PHONE varchar(32),
ACCTBAL float,
COMMENT varchar(79),
uncertain(SUPPKEY,NAME,ADDRESS,NATIONKEY,PHONE,ACCTBAL,COMMENT))
with confidences;
CREATE TRIO TABLE CUSTOMER (
CUSTKEY int,
NAME varchar(48),
ADDRESS varchar(32),
NATIONKEY int,
PHONE varchar(32),
ACCTBAL float,
MKTSEGMENT varchar(32),
COMMENT varchar(79),
uncertain(CUSTKEY,NAME,ADDRESS,NATIONKEY,PHONE,ACCTBAL,MKTSEGMENT,COMMENT))
with confidences;
CREATE TRIO TABLE NATION (
NATIONKEY int,
NAME varchar(48),
REGIONKEY int,
COMMENT varchar(79),
uncertain(NATIONKEY,NAME,REGIONKEY,COMMENT))
with confidences;
CREATE TRIO TABLE REGION (
REGIONKEY int,
NAME varchar(48),
COMMENT varchar(79))
with confidences;
create index lineitem_orderkey_idx on lineitem(orderkey);
create index lineitem_partkey_idx on lineitem(partkey);
create index lineitem_suppkey_idx on lineitem(suppkey);
create index orders_orderkey_idx on orders(orderkey);
create index orders_custkey_idx on orders(custkey);
create index partsupp_partkey_idx on partsupp(partkey);
create index partsupp_suppkey_idx on partsupp(suppkey);
create index part_partkey_idx on part(partkey);
create index supplier_suppkey_idx on supplier(suppkey);
create index customer_custkey_idx on customer(custkey);
create index nation_nationkey_idx on nation(nationkey);
create index region_regionkey_idx on region(regionkey);

View File

@ -0,0 +1,150 @@
CREATE TRIO TABLE PARTSUPP_SF1_V_0 (
ID int,
PARTKEY int
) with confidences;
CREATE TRIO TABLE PARTSUPP_SF1_V_1 (
ID int,
SUPPKEY int
) with confidences;
CREATE TRIO TABLE PARTSUPP_SF1_V_2 (
ID int,
AVAILQTY int
) with confidences;
CREATE TRIO TABLE PARTSUPP_SF1_V_3 (
ID int,
SUPPLYCOST float
) with confidences;
CREATE TRIO TABLE PARTSUPP_SF1_V_4 (
ID int,
COMMENT varchar(200)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_0 (
ID int,
ORDERKEY int
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_1 (
ID int,
PARTKEY int
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_2 (
ID int,
SUPPKEY int
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_3 (
ID int,
LINENUMBER int
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_4 (
ID int,
QUANTITY float
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_5 (
ID int,
EXTENDEDPRICE float
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_6 (
ID int,
DISCOUNT float
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_7 (
ID int,
TAX float
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_8 (
ID int,
RETURNFLAG varchar(1)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_9 (
ID int,
STATUS varchar(1)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_10 (
ID int,
SHIPDATE varchar(10)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_11 (
ID int,
COMMITDATE varchar(10)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_12 (
ID int,
RECEIPTDATE varchar(10)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_13 (
ID int,
SHIPINSTRUCT varchar(25)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_14 (
ID int,
SHIPMODE varchar(10)
) with confidences;
CREATE TRIO TABLE LINEITEM_SF1_V_15 (
ID int,
COMMENT varchar(44)
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_0 (
ID int,
ORDERKEY int
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_1 (
ID int,
CUSTKEY int
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_2 (
ID int,
ORDERSTATUS varchar(1)
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_3 (
ID int,
TOTALPRICE float
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_4 (
ID int,
ORDERDATE varchar(10)
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_5 (
ID int,
ORDERPRIORITY varchar(15)
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_6 (
ID int,
CLERK varchar(15)
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_7 (
ID int,
SHIPPRIORITY int
) with confidences;
CREATE TRIO TABLE ORDER_SF1_V_8 (
ID int,
COMMENT varchar(79)
) with confidences;

View File

@ -0,0 +1,27 @@
./tpch_horizontal_partitioner.py ../tpch/lineitem.tbl 0 0 16 1 lineitem > /tmp/theobald/tpch/lineitem_1.triql
./tpch_horizontal_partitioner.py ../tpch/orders.tbl 0 0 9 1 orders > /tmp/theobald/tpch/orders_1.triql
./tpch_horizontal_partitioner.py ../tpch/partsupp.tbl 0 0 5 1 partsupp > /tmp/theobald/tpch/partsupp_1.triql
./tpch_horizontal_partitioner.py ../tpch/customer.tbl 0 0 8 1 customer > /tmp/theobald/tpch/customer_1.triql
./tpch_horizontal_partitioner.py ../tpch/part.tbl 0 0 9 1 part > /tmp/theobald/tpch/part_1.triql
./tpch_horizontal_partitioner.py ../tpch/supplier.tbl 0 0 7 1 supplier > /tmp/theobald/tpch/supplier_1.triql
./tpch_horizontal_partitioner.py ../tpch/nation.tbl 0 0 8 1 nation > /tmp/theobald/tpch/nation_1.triql
./tpch_horizontal_partitioner.py ../tpch/region.tbl 0 0 3 1 region > /tmp/theobald/tpch/region_1.triql
./tpch_horizontal_partitioner.py ../tpch/lineitem.tbl 0 0 16 10 lineitem > /tmp/theobald/tpch/lineitem_10.triql
./tpch_horizontal_partitioner.py ../tpch/orders.tbl 0 0 9 10 orders > /tmp/theobald/tpch/orders_10.triql
./tpch_horizontal_partitioner.py ../tpch/partsupp.tbl 0 0 5 10 partsupp > /tmp/theobald/tpch/partsupp_10.triql
./tpch_horizontal_partitioner.py ../tpch/customer.tbl 0 0 8 10 customer > /tmp/theobald/tpch/customer_10.triql
./tpch_horizontal_partitioner.py ../tpch/part.tbl 0 0 9 10 part > /tmp/theobald/tpch/part_10.triql
./tpch_horizontal_partitioner.py ../tpch/supplier.tbl 0 0 7 10 supplier > /tmp/theobald/tpch/supplier_10.triql
./tpch_horizontal_partitioner.py ../tpch/nation.tbl 0 0 8 10 nation > /tmp/theobald/tpch/nation_10.triql
./tpch_horizontal_partitioner.py ../tpch/region.tbl 0 0 3 1 region > /tmp/theobald/tpch/region_10.triql
./tpch_horizontal_partitioner.py ../tpch/lineitem.tbl 0 0 16 100 lineitem > /tmp/theobald/tpch/lineitem_100.triql
./tpch_horizontal_partitioner.py ../tpch/orders.tbl 0 0 9 100 orders > /tmp/theobald/tpch/orders_100.triql
./tpch_horizontal_partitioner.py ../tpch/partsupp.tbl 0 0 5 100 partsupp > /tmp/theobald/tpch/partsupp_100.triql
./tpch_horizontal_partitioner.py ../tpch/customer.tbl 0 0 8 100 customer > /tmp/theobald/tpch/customer_100.triql
./tpch_horizontal_partitioner.py ../tpch/part.tbl 0 0 9 100 part > /tmp/theobald/tpch/part_100.triql
./tpch_horizontal_partitioner.py ../tpch/supplier.tbl 0 0 7 100 supplier > /tmp/theobald/tpch/supplier_100.triql
./tpch_horizontal_partitioner.py ../tpch/nation.tbl 0 0 8 1 nation > /tmp/theobald/tpch/nation_100.triql
./tpch_horizontal_partitioner.py ../tpch/region.tbl 0 0 3 1 region > /tmp/theobald/tpch/region_100.triql

View File

@ -0,0 +1,75 @@
#!/usr/bin/env python
import sys
import re
import random
if len(sys.argv) != 7:
print 'invalid arg'
print 'Usage: ' + sys.argv[0] + ' filename #lines #partitions #attributes #maxalts tablename'
sys.exit(1)
filename=sys.argv[1]
lines=int(sys.argv[2])
partitions=int(sys.argv[3])
attrs=int(sys.argv[4])
maxalts=int(sys.argv[5])
tablename=sys.argv[6]
f=open(filename, 'r')
lineno = 0
part_no = 1
part_lines = 0
next_alts = random.randint(1, maxalts)
xtuple=[]
while 1:
line = f.readline()
if line:
line = re.sub(';','', line)
line = re.split('\|', line)
line.pop()
xtuple.append(line)
if (not line and len(xtuple) > 0) or len(xtuple) == next_alts:
str = "insert into %s%s values[(" % (tablename, (partitions > 0 and str(part_no) or ""))
i = 0
for alt in xtuple:
j = 0
for attr in alt:
if re.match("^[+-]?\d*\.?\d+?$", attr):
str += attr
else:
str += "\'" + attr + "\'"
if j == attrs - 1 or j == len(alt) - 1:
str += "):%s" % (len(xtuple) == 1 and 1 or (1.0 / len(xtuple)))
else:
str += ','
j += 1
if j == len(alt):
break
if i < len(xtuple) - 1:
str += "|("
else:
str += "];"
i += 1
print str
xtuple=[]
next_alts = random.randint(1, maxalts)
if partitions > 0 and lines > 0 and part_lines >= lines / partitions:
part_no += 1
part_lines = 0
if not line:
break
lineno += 1
part_lines += 1
if part_lines % 1000 == 0:
print "commit;"
print "commit;"

View File

@ -0,0 +1,73 @@
#!/usr/bin/env python
import sys
import re
import random
if len(sys.argv) != 4:
print 'invalid arg'
print 'Usage: ' + sys.argv[0] + ' filename #maxalts tablename'
sys.exit(1)
filename=sys.argv[1]
maxalts=int(sys.argv[2])
tablename=sys.argv[3]
try:
f=open(filename, 'r')
except:
print 'open error'
sys.exit(2)
lineno=0
alts = 0
next_alts = random.randint(1, maxalts)
xtuple=[]
xtups = 0
while 1:
line = f.readline()
if line != "":
line = re.sub(';','', line)
line = re.split('\|', line)
line.pop()
xtuple.append(line)
alts += 1
if alts == next_alts or line == "":
inserts = []
for i in range(len(xtuple)):
line = xtuple[i]
for j in range(len(line)):
if i == 0:
str = 'insert into %s_%s values[ (' % (tablename, j)
inserts.append(str)
else:
str = inserts[j]
if re.match('^[+-]?\d*\.?\d+?$', line[j]):
str += '%s, %s' % (xtups + i, line[j])
else:
str += '%s, \'%s\'' % (xtups + i, line[j])
if i == len(xtuple)-1:
str += ' ):%s ];' % (alts == 1 and 1 or (1.0 / alts))
print str
else:
str += ' ):%s | ( ' % (alts == 1 and 1 or (1.0 / alts))
inserts[j] = str
next_alts = random.randint(1, maxalts)
xtups += len(xtuple)
xtuple=[]
alts = 0
if line == "":
print 'commit;'
#print "%d ALTS." % xtups
sys.exit(2)
lineno += 1
if lineno % 1000 == 0:
print 'commit;'
#print "%d ALTS." % xtups

View File

@ -0,0 +1,248 @@
<settings>
<db>
<dbname>postgres</dbname>
<username>lantova</username>
<password>abcd</password>
</db>
<rules>
<rule>
<type>eq</type>
<if>
<term>
<left>CITIZEN</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>IMMIGR</left>
<op>=</op>
<right>0</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>CITIZEN</left>
<op>&gt;</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>IMMIGR</left>
<op>&gt;</op>
<right>0</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>FEB55</left>
<op>=</op>
<right>1</right>
</term>
</if>
<then>
<term>
<left>MILITARY</left>
<op>!=</op>
<right>4</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>KOREAN</left>
<op>=</op>
<right>1</right>
</term>
</if>
<then>
<term>
<left>MILITARY</left>
<op>!=</op>
<right>4</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>VIETNAM</left>
<op>=</op>
<right>1</right>
</term>
</if>
<then>
<term>
<left>MILITARY</left>
<op>!=</op>
<right>4</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>WWII</left>
<op>=</op>
<right>1</right>
</term>
</if>
<then>
<term>
<left>MILITARY</left>
<op>!=</op>
<right>4</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>MARITAL</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>RSPOUSE</left>
<op>!=</op>
<right>6</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>MARITAL</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>RSPOUSE</left>
<op>!=</op>
<right>5</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>MARITAL</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>RSPOUSE</left>
<op>!=</op>
<right>4</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>LANG1</left>
<op>=</op>
<right>2</right>
</term>
</if>
<then>
<term>
<left>ENGLISH</left>
<op>!=</op>
<right>4</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>RPOB</left>
<op>=</op>
<right>52</right>
</term>
</if>
<then>
<term>
<left>CITIZEN</left>
<op>!=</op>
<right>0</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>SCHOOL</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>KOREAN</left>
<op>!=</op>
<right>1</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>SCHOOL</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>FEB55</left>
<op>!=</op>
<right>1</right>
</term>
</then>
</rule>
<rule>
<type>eq</type>
<if>
<term>
<left>SCHOOL</left>
<op>=</op>
<right>0</right>
</term>
</if>
<then>
<term>
<left>WWII</left>
<op>!=</op>
<right>1</right>
</term>
</then>
</rule>
</rules>
</settings>

BIN
census/Chase/jdbc3.jar Normal file

Binary file not shown.

61
census/Chase/prsel.sql Normal file
View File

@ -0,0 +1,61 @@
CREATE OR REPLACE FUNCTION project_select(relname text, mapname text, compname text, worldname text, newrelname text, newmapname text, newcompname text, newworldname text, attributes _text, "left" _text, op _text, "right" _text)
RETURNS void AS
$BODY$ DECLARE cols text;
colsQuoted text;
cond1 text; -- the condition used to filter the template relation; form: (A1 op1 c1 OR A1 IS NULL) AND (A2 op2 c2 OR A2 IS NULL) AND ...
cond3 text; -- the condition used for filtering the component relation; form: WHEN 'A1' THEN Value op1 c1 WHEN 'A2' THEN Value op2 c2 ...
cond4 text; -- the condition used to filter the mapping relation; form: Col NOT IN ('A', 'B', ...)
size integer;
BEGIN
cond1 = '';
cond3 = '';
IF array_upper(attributes, 1) = 1 AND attributes[1] = '*' THEN
cols = '*';
cond4 = 'false';
ELSE
cols = 'TID, ' || array_to_string(attributes, ',');
colsQuoted = '''' || array_to_string(attributes, ''',''') || '''';
cond4 = 'Col NOT IN (' || colsQuoted || ') ';
END IF;
size = array_upper(left, 1);
FOR i IN 1..size LOOP
IF i > 1 THEN
cond1 = cond1 || ' AND (' || left[i] || ' ' || op[i] || ' ' || right[i] || ' OR ' || left[i] || ' = -1)';
ELSE
cond1 = '(' || left[i] || ' ' || op[i] || ' ' || right[i] || ' OR ' || left[i] || ' = -1)';
END IF;
cond3 = cond3 || ' WHEN ' || quote_literal(left[i]) || ' THEN Value ' || op[i] || ' ' || right[i];
END LOOP;
EXECUTE '
CREATE TABLE ' || newRelName || ' AS
SELECT ' || cols || ' FROM ' || relName || ' WHERE ' || cond1 || ';';
EXECUTE '
CREATE TABLE ' || newMapName || ' AS SELECT * FROM ' || mapName || ' WHERE Relation = ' || quote_literal(relName)
|| ' AND TID IN (SELECT TID FROM ' || newRelName || ');';
EXECUTE '
CREATE TABLE ' || newCompName || ' AS
SELECT ' || compName || '.* FROM ' || compName || ', ' || newMapName || '
WHERE ' || compName || '.HID = ' || newMapName || '.HID AND
(CASE Col ' || cond3 || ' ELSE true END);';
EXECUTE '
CREATE TABLE DEL WITHOUT OIDS AS
SELECT TID FROM ' || newMapName || '
WHERE HID NOT IN (SELECT HID FROM ' || newCompName || ');';
EXECUTE '
DELETE FROM ' || newRelName || '
WHERE TID IN (SELECT DEL.TID FROM DEL);
DELETE FROM ' || newMapName || '
WHERE ' || cond4 || ' OR TID IN (SELECT DEL.TID FROM DEL);';
EXECUTE '
DELETE FROM ' || newCompName || '
WHERE HID NOT IN (SELECT HID FROM ' || newMapName || ');';
DROP TABLE DEL;
--EXECUTE '
-- CREATE TABLE ' || newWorldName || ' AS SELECT DISTINCT ' || worldName || '.* FROM ' || worldName || ', ' || newCompName ||
-- ' WHERE ' || worldName || '.cid = ' || newCompName || '.cid AND ' || worldName || '.wid = ' || newCompName || '.wid;';
RETURN;
END;
$BODY$
LANGUAGE 'plpgsql' VOLATILE;

View File

@ -0,0 +1,706 @@
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
/**
* @author Lublena
*
*/
public class Chaser
{
private Connection dbConnection;
private Statement sqlStatement;
/**
* Name of the template relation
*/
private String templateRelName;
/**
* Name of the mapping relation
*/
private String mappingRelName;
/**
* Name of the components relation
*/
private String compRelName;
/**
* Name of the world relation
*/
private String worldRelName;
/**
* Number of pairs of components that were merged during the chasing phase.
*/
public int merged;
/**
* Constructs a new chaser and initializes database connection.
* @param aDatabase Name of the database
* @param aUser Username
* @param aPassword Pasword
* @param aTemplateRelName Name of the template relation
* @param aMappingRelName Name of the mapping relation
* @param aCompRelName Name of the components relation
* @param aWorldRelName Name of the world relation
*/
public Chaser(String aDatabase, String aUser, String aPassword,
String aTemplateRelName, String aMappingRelName, String aCompRelName,
String aWorldRelName)
{
init(aDatabase, aUser, aPassword);
templateRelName = aTemplateRelName;
mappingRelName = aMappingRelName;
compRelName = aCompRelName;
worldRelName = aWorldRelName;
merged = 0;
}
/**
* Initializes database connection.
* @param aDatabase Name of the database to connect to
* @param aUser Username
* @param aPassword Password
*/
private void init(String aDatabase, String aUser, String aPassword)
{
try
{
Class.forName("org.postgresql.Driver"); //load the driver
dbConnection = DriverManager.getConnection("jdbc:postgresql:" + aDatabase,
aUser, aPassword);
sqlStatement = dbConnection.createStatement();
}
catch (SQLException e)
{
System.out.println("Could not connect to the database!");
e.printStackTrace();
return;
}
catch (ClassNotFoundException e)
{
System.out.println("Could not load database driver!");
return;
}
}
/**
* Closes connection to the database.
*
*/
public void closeConnection()
{
try
{
sqlStatement.close();
dbConnection.close();
}
catch (SQLException e)
{
// Ignore
return;
}
}
private boolean applyFD(FDependency aFD)
{
// TODO: Implement
return true;
}
/**
* Enforces an equality-generating dependency on the database by deleting invalid
* worlds.
* @param aEQD An equality-generating dependency to enforce
* @return True if the dependency was successfully enforced,
* false if the database was inconsistent.
*/
private boolean applyEQD(EQDependency aEQD)
{
// Find tuples that do not satisfy the dependency
//ArrayList<Term> terms = aEQD.getTerms();
//ArrayList<Term> rterms = aEQD.getRTerms();
//ArrayList terms = aEQD.getTerms();
ArrayList rterms = aEQD.getRTerms();
Term t = (Term) rterms.get(0);
StringBuffer leftArrayBuilder = new StringBuffer("'{\"" + t.left + "\"");
StringBuffer opArrayBuilder = new StringBuffer("'{\"" + t.op + "\"");
StringBuffer rightArrayBuilder = new StringBuffer("'{\"" + t.right + "\"");
// Construct the left, op and right arrays needed for the call to project_select
for (int i = 1; i < rterms.size(); ++i)
{
Term term = (Term) rterms.get(i);
leftArrayBuilder.append(",\"" + term.left + "\"");
opArrayBuilder.append(",\"" + term.op + "\"");
rightArrayBuilder.append(",\"" + term.right + "\"");
}
leftArrayBuilder.append("}'");
opArrayBuilder.append("}'");
rightArrayBuilder.append("}'");
// Construct command for retrieving the rows not satisfying the given dependency
StringBuffer commandBuilder = new StringBuffer("select project_select(");
commandBuilder.append("'" + templateRelName + "','" + mappingRelName + "','" +
compRelName + "','" + worldRelName + "',");
commandBuilder.append("'eqRel', 'eqMap', 'eqComp', 'eqWorld', ");
commandBuilder.append(leftArrayBuilder.toString() + "," + leftArrayBuilder.toString() + ",");
commandBuilder.append(opArrayBuilder.toString() + "," + rightArrayBuilder.toString() + ")");
String command = commandBuilder.toString();
//String command = constructStatement(rterms);
try
{
//System.out.println("Executing query " + commandBuilder.toString());
sqlStatement.execute(command);
StringBuffer sql = new StringBuffer("SELECT cid, hid FROM " + mappingRelName);
sql.append(" WHERE tid = ? AND Col = ?;");
PreparedStatement holePropertiesStatement = dbConnection.prepareStatement(sql.toString());
//Statement statement = dbConnection.createStatement();
ResultSet rs = sqlStatement.executeQuery("SELECT * FROM eqRel");
//ResultSet rs = sqlStatement.executeQuery(command);
// Prepare ArrayList that will hold information for the bulk operations
ArrayList bulkDeleteInfo = new ArrayList();
ArrayList bulkMergeInfo = new ArrayList();
int s = 0;
while (rs.next())
{
s++;
/*ArrayList<String> compIDs = new ArrayList<String>();
ArrayList<String> holeIDs = new ArrayList<String>();
ArrayList<Integer> inds = new ArrayList<Integer>(); // indexes of terms that are not true in all worlds
*/
ArrayList compIDs = new ArrayList();
ArrayList holeIDs = new ArrayList();
ArrayList inds = new ArrayList(); // indexes of terms that are not true in all worlds
ArrayList invalidTerms = new ArrayList();
ArrayList columns = aEQD.getColumns();
for (int i = 0; i < columns.size(); ++i)
{
String col = columns.get(i).toString();
// The current column contains a hole
String val = rs.getString(col);
if (val == null)
{
continue;
}
if (val.equals("-1"))
{
// Retrieve the cid and hid for the hole
holePropertiesStatement.setInt(1, rs.getInt("tid"));
holePropertiesStatement.setString(2, col);
ResultSet cidhid = holePropertiesStatement.executeQuery();
if (cidhid.next())
{
String cid = cidhid.getString("cid");
String hid = cidhid.getString("hid");
compIDs.add(cid);
holeIDs.add(hid);
inds.add(new Integer(i));
invalidTerms.add(rterms.get(i));
}
else
{
// System.out.println(holePropertiesStatement.toString());
System.out.println("Warning: No info found for the current hole (" + rs.getInt("tid")
+ ", " + col + ").");
//return false;
}
} // end if (rs.getObject(col) == null)
} // end for(int i = 0; i < columns.size; ++i)
// Database is inconsistent
if (compIDs.isEmpty())
{
System.out.println("Incosistent database. Aborting...");
return false;
}
if (compIDs.size() == 1)
{
bulkDeleteInfo.add(new InvalidInfo(compIDs.get(0),
holeIDs.get(0), invalidTerms.get(0)));
}
else // More than one component involved - merge
{
// !!!Dirty fix - there are exactly two components to merge,
// which are independent of the components in the rest of the tuples
// TODO: Figure out what to do when this is not the case
String c1 = compIDs.get(0).toString();
String c2 = compIDs.get(1).toString();
String h1 = holeIDs.get(0).toString();
String h2 = holeIDs.get(1).toString();
Term t1 = (Term) invalidTerms.get(0);
Term t2 = (Term) invalidTerms.get(1);
String c = c1 + "x" + c2;
bulkMergeInfo.add(new InvalidInfoPair(c,c1,c2,h1,h2,t1,t2));
}
} // end while(rs.next)
System.out.println("Size of intermediate results: " + s);
// Perform bulk operations
Statement bulkStatement = dbConnection.createStatement();
if (!bulkDeleteInfo.isEmpty())
{
bulkDelete(bulkStatement, bulkDeleteInfo);
}
if (!bulkMergeInfo.isEmpty())
{
bulkMerge(bulkStatement, bulkMergeInfo);
}
// Drop intermediate results
try
{
sqlStatement.executeUpdate("DROP TABLE eqRel; DROP TABLE eqMap; DROP TABLE eqComp;");
sqlStatement.executeUpdate("DROP TABLE eqWorld;");
}
catch (SQLException e)
{
// Ignore
}
}
catch (SQLException e)
{
System.out.println("Error occured when chasing dependency " + aEQD.toString());
e.printStackTrace();
return false;
}
return true;
}
/**
* Updates the entries in the mapping and components tables after the components
* in the given ArrayList have been merged.
* @param statement The Statement used to perform the updates.
* @param oldCompIDs ArrayList of component IDs that have been merged.
* @param newCompID Name of the new component that replaced the old ones.
*/
private void updateTables(Statement statement, ArrayList oldCompIDs, String newCompID)
throws SQLException
{
StringBuffer sb = new StringBuffer("('" + oldCompIDs.get(0) + "'");
for (int i = 1; i < oldCompIDs.size(); ++i)
{
sb.append(",'" + oldCompIDs.get(i).toString() + "'");
}
sb.append(")");
String st = "DELETE FROM " + compRelName + " WHERE cid IN " + sb.toString();
// System.out.println(st);
statement.executeUpdate(st);
st = "UPDATE " + mappingRelName + " SET cid = '" + newCompID + "' WHERE cid IN " + sb.toString();
statement.executeUpdate(st);
}
/**
* Deletes all worlds in a component that do not satisfy the given conjunctive formula.
* @param statement The Statement used to perform the updates.
* @param comp The name of the component to filter.
* @param rterms A conjunction of terms.
* @param holeIDs IDs of the holes that corespond to the column names in the formula.
* @param inds Indices of the terms that should be taken into account when filtering.
* @throws SQLException
*/
private void deleteInvalid(Statement statement, String comp, ArrayList rterms, ArrayList holeIDs, ArrayList inds)
throws SQLException
{
StringBuffer delSQL = new StringBuffer();
if (holeIDs.size() == 1)
{
delSQL.append("CREATE TABLE INVALID AS SELECT cid, wid FROM " + compRelName);
delSQL.append(" WHERE cid = '" + comp + "'");
delSQL.append(" AND hid = '" + holeIDs.get(0).toString() + "'");
int i = (new Integer(inds.get(0).toString())).intValue();
String op = ((Term) rterms.get(i)).op;
String c = ((Term) rterms.get(i)).right;
// System.out.println("Deleting Values " + op + " " + c);
delSQL.append(" AND Value " + op + " " + c);
statement.executeUpdate(delSQL.toString());
delSQL.delete(0, delSQL.length());
delSQL.append("DELETE FROM " + compRelName + " WHERE oid IN (SELECT c.oid FROM INVALID NATURAL JOIN ");
delSQL.append(compRelName + " AS c)");
statement.executeUpdate(delSQL.toString());
}
else
{
StringBuffer fromClause = new StringBuffer(compRelName + " C0");
StringBuffer cond1 = new StringBuffer("C0.cid = '" + comp +
"' AND C0.hid = '" + holeIDs.get(0) + "'");
int ind = (new Integer(inds.get(0).toString())).intValue();
Term t = (Term) rterms.get(ind);
StringBuffer cond2 = new StringBuffer("C0.Value " + t.op + " " +
t.right);
StringBuffer cond3 = new StringBuffer();
for (int i = 1; i < holeIDs.size(); ++i)
{
fromClause.append("," + compRelName + " C" + i);
cond1.append(" AND C" + i + ".cid = '" + comp +
"' AND C" + i + ".hid = '" + holeIDs.get(i) + "'");
ind = (new Integer(inds.get(i).toString())).intValue();
t = (Term) rterms.get(ind);
cond2.append(" AND C" + i + ".Value " + t.op + " " +
t.right);
cond3.append(" AND C" + (i-1) + ".wid = C" + i + ".wid");
}
String st = "CREATE TABLE INVALID AS SELECT c0.cid,c0.wid FROM " + fromClause.toString() +
" WHERE " + cond1.toString() + " AND " + cond2.toString() + cond3.toString();
statement.executeUpdate(st);
st = "DELETE FROM " + compRelName + " WHERE oid in (SELECT c.oid FROM INVALID NATURAL JOIN " +
compRelName + " AS c);";
// System.out.println(st);
statement.executeUpdate(st);
} // end else
// Delete intermediate results
statement.executeUpdate("DROP TABLE INVALID;");
}
/**
* Performs a bulk delete to remove inconsistencies from the database
* @param invalid ArrayList of InvalidInfo items, denoting which worlds in which components
* @param statement Statement to execute the SQL commands with
* should be deleted.
*/
private void bulkDelete(Statement statement, ArrayList invalid) throws SQLException
{
StringBuffer sb = new StringBuffer();
int size = invalid.size();
int j;
int n;
int i = 0;
while (i < size)
{
j = i;
n = i + 250;
if (n > size)
{
n = size;
}
for (; j < n; ++j)
{
InvalidInfo info = (InvalidInfo) invalid.get(j);
if (j > i)
{
sb.append(" OR ");
}
sb.append("(cid = '" + info.cid + "' AND hid = '" + info.hid + "'");
sb.append(" AND Value " + info.t.op + " '" + info.t.right + "')");
}
// Create table of invalid worlds
String createInvalid = "CREATE TABLE Invalid AS SELECT cid,wid FROM " + compRelName
+ " WHERE " + sb.toString();
statement.executeUpdate(createInvalid);
String delete = "DELETE FROM " + compRelName
+ " WHERE oid IN (SELECT c.oid FROM Invalid NATURAL JOIN " + compRelName + " AS c);";
statement.executeUpdate(delete);
// Drop table of invalid worlds
statement.executeUpdate("DROP TABLE Invalid;");
i += (n-i);
sb.delete(0, sb.length());
}
}
private String generateDelete(InvalidInfoPair info)
{
StringBuffer sb = new StringBuffer();
sb.append("(c1.cid = '" + info.c + "'" + " AND c1.hid ='" + info.h1 + "'");
sb.append(" AND c1.Value " + info.t1.op + " '" + info.t1.right + "'");
sb.append(" AND c2.hid = '" + info.h2 + "'");
sb.append(" AND c2.Value " + info.t2.op + " '" + info.t2.right + "')");
return sb.toString();
}
/**
* Performs a bulk merge of given pairs of components.
* @param statement
* @param mergeInfo
* @throws SQLException
*/
private void bulkMerge(Statement statement, ArrayList mergeInfo) throws SQLException
{
int size = mergeInfo.size();
int i = 0;
int n;
int j;
while (i < size)
{
StringBuffer sb1 = new StringBuffer();
sb1.append("CREATE TABLE Invalid AS SELECT c1.cid,c1.wid FROM ");
sb1.append(compRelName + " AS c1 JOIN " + compRelName
+ " AS c2 ON(c1.cid = c2.cid AND c1.wid = c2.wid)");
sb1.append(" WHERE ");
n = i + 250;
if (n > size)
{
n = size;
}
j = i;
for (;j < n; ++j)
{
InvalidInfoPair info = (InvalidInfoPair) mergeInfo.get(j);
// The components were already merged
if (info.c1.equals(info.c2))
{
info.c = info.c1;
}
else
{
System.out.println("Merging " + info.c1 + " and " + info.c2);
// Merge components
StringBuffer sb2 = new StringBuffer("INSERT INTO " + compRelName + " ");
sb2.append("SELECT '" + info.c + "',c1.hid,c1.wid || c2.wid,c1.Value FROM " + compRelName + " c1," + compRelName + " c2 ");
sb2.append("WHERE c1.cid = '" + info.c1 + "' AND c2.cid = '" + info.c2 +
"' AND c2.hid = '" + info.h2 + "'");
statement.addBatch(sb2.toString());
sb2.delete(0, sb2.length());
sb2.append("INSERT INTO " + compRelName + " ");
sb2.append("SELECT '" + info.c + "',c2.hid,c1.wid || c2.wid,c2.Value FROM " + compRelName + " c1," + compRelName + " c2 ");
sb2.append("WHERE c1.cid = '" + info.c1 + "' AND c2.cid = '" + info.c2 + "' AND c1.hid = '" + info.h1 + "'");
statement.addBatch(sb2.toString());
// Delete old component entries
statement.addBatch("DELETE FROM " + compRelName + " WHERE cid IN ('" + info.c1 + "','" + info.c2 + "')");
// Update mapping relation
statement.addBatch("UPDATE " + mappingRelName + " SET cid = '" + info.c
+ "' WHERE cid IN ('" + info.c1 + "','" + info.c2 + "')");
merged ++;
}
// Delete worlds not satisfying the dependency
if (j > i)
{
sb1.append(" OR ");
}
sb1.append(generateDelete(info));
}
// Perform merging of components
statement.executeBatch();
// Create table of invalid worlds
statement.clearBatch();
//System.out.println(sb1.toString());
statement.executeUpdate(sb1.toString());
String delete = "DELETE FROM " + compRelName
+ " WHERE oid IN (SELECT c.oid FROM Invalid NATURAL JOIN " + compRelName + " AS c);";
statement.executeUpdate(delete);
// Drop table of invalid worlds
statement.executeUpdate("DROP TABLE Invalid;");
i += (n - i);
}
}
/**
* Merges two given components into one and returns the name of the new component.
* @param statement The statement used to perform the updates.
* @param cid1 Name of the first component to merge.
* @param cid2 Name of the second component to merge.
* @param hid1 ID of a hole in the first component.
* @param hid2 ID of a hole in the second component.
* @return Name of the new component.
*/
private String mergeComponents(Statement statement, String cid1, String cid2, String hid1, String hid2)
throws SQLException
{
System.out.println("Merging " + cid1 + " and " + cid2);
// c1 and c2 were already merged or are the same component
if (cid1.indexOf(cid2) != -1 || cid2.indexOf(cid1) != -1)
{
System.out.println("Already merged!");
return cid1;
}
String comp = cid1 + "x" + cid2;
//System.out.println("done");
StringBuffer sb = new StringBuffer("INSERT INTO " + compRelName + " ");
sb.append("SELECT '" + comp + "',c1.hid,c1.wid || c2.wid,c1.Value FROM " + compRelName + " c1," + compRelName + " c2 ");
sb.append("WHERE c1.cid = '" + cid1 + "' AND c2.cid = '" + cid2 + "' AND c2.hid = '" + hid2 + "'");
statement.executeUpdate(sb.toString());
sb.delete(0, sb.length());
sb.append("INSERT INTO " + compRelName + " ");
sb.append("SELECT '" + comp + "',c2.hid,c1.wid || c2.wid,c2.Value FROM " + compRelName + " c1," + compRelName + " c2 ");
sb.append("WHERE c1.cid = '" + cid1 + "' AND c2.cid = '" + cid2 + "' AND c1.hid = '" + hid1 + "'");
statement.executeUpdate(sb.toString());
// Update mapping and components tables
//ArrayList<String> oldCompIDs = new ArrayList<String>();
ArrayList oldCompIDs = new ArrayList();
oldCompIDs.add(cid1);
oldCompIDs.add(cid2);
updateTables(statement, oldCompIDs, comp);
merged++;
return comp;
}
/**
*
* @param terms
* @return
*/
private String constructStatement(ArrayList terms)
{
String result;
StringBuffer selClause = new StringBuffer("SELECT TID, ");
StringBuffer cond = new StringBuffer(" WHERE ");
for (int i = 0; i < terms.size(); ++i)
{
Term t = (Term) terms.get(i);
if (i > 0)
{
selClause.append(",");
cond.append(" AND ");
}
selClause.append(t.left);
cond.append("(" + t.left + " " + t.op + " " + t.right + " OR ");
cond.append("(" + t.left + " IS NULL AND EXISTS (");
cond.append(" SELECT '1' FROM " + mappingRelName + " f," + compRelName + " c ");
cond.append(" WHERE r.tid = f.tid AND f.col = '" + t.left + "' AND f.hid = c.hid");
cond.append(" AND c.value " + t.op + " " + t.right);
cond.append(")))");
}
result = selClause.toString() + " FROM " + templateRelName + " r " + cond;
return result;
}
public boolean chase(ArrayList aDependencies)
{
while(true)
{
for (int i = 0; i < aDependencies.size(); ++i)
{
Dependency d = (Dependency) aDependencies.get(i);
System.out.println("Chasing " + d.toString());
// Functional dependency
if (d.type == 0)
{
if (applyFD((FDependency) d) == false)
{
return false;
}
}
// Equlaity-generating dependency
else if (d.type == 1)
{
if (applyEQD((EQDependency) d) == false)
{
return false;
}
}
}
// TODO: Only break if nothing was changed in the last iteration
break;
}
return true;
}
/**
* Retrieves the IDs of all component and for each component
* one of the holes defined in it.
* @param statement Statement used to perform the query.
* @return HashMap of (component ID, hole ID) pairs.
* @throws SQLException
*/
private HashMap getCompIDs(Statement statement) throws SQLException
{
HashMap result = new HashMap();
String sql = "SELECT cid,hid FROM " + mappingRelName + " WHERE Rel = '"
+ templateRelName + "';";
ResultSet rs = statement.executeQuery(sql);
while (rs.next())
{
result.put(rs.getString("cid"), rs.getString("hid"));
}
return result;
}
/**
* Encapsulates the information needed to delete invalid worlds, such as component ID,
* hole ID and a condition that should be fulfilled in all worlds.
* @author Lublena
*
*/
class InvalidInfo
{
public String cid;
public String hid;
public Term t;
public InvalidInfo(String aCid, String aHid, Term aTerm)
{
cid = aCid;
hid = aHid;
t = aTerm;
}
public InvalidInfo(Object aCid, Object aHid, Object aTerm)
{
cid = aCid.toString();
hid = aHid.toString();
t = (Term) aTerm;
}
}
/**
* Encapsulates the information needed to merge two components and
* delete invalid worlds with respect to a conjunctive formula of two conditions.
* @author Lublena
*/
class InvalidInfoPair
{
String c;
String c1;
String c2;
String h1;
String h2;
Term t1;
Term t2;
public InvalidInfoPair(String c, String c1, String c2, String h1, String h2, Term t1, Term t2)
{
this.c = c;
this.c1 = c1;
this.c2 = c2;
this.h1 = h1;
this.h2 = h2;
this.t1 = t1;
this.t2 = t2;
}
}
}

View File

@ -0,0 +1,13 @@
/**
*
*/
/**
* @author Lublena
*
*/
public abstract class Dependency
{
public int type; // 0 = fd; 1 = eqd
public abstract String toString();
}

View File

@ -0,0 +1,125 @@
import java.util.ArrayList;
/**
*
*/
/**
* @author Lublena
*
*/
public class EQDependency extends Dependency
{
//ArrayList<Term> left; // the terms on the left side of the dependency
//ArrayList<Term> right; // the terms on the right side of the dependency
ArrayList left; // the terms on the left side of the dependency
ArrayList right; // the terms on the right side of the dependency
public EQDependency()
{
type = 1;
//left = new ArrayList<Term>();
//right = new ArrayList<Term>();
left = new ArrayList();
right = new ArrayList();
}
/* (non-Javadoc)
* @see Dependency#toString()
*/
public String toString()
{
if (left.isEmpty() || right.isEmpty())
{
return null;
}
StringBuffer sb = new StringBuffer(left.get(0).toString());
for (int i = 1; i < left.size(); ++i)
{
sb.append(" and " + left.get(i).toString());
}
// Append the right side of the functional dependency
sb.append(" => " + right.get(0).toString());
return sb.toString();
}
/*
* Returns an ArrayList of the column names appearing in the dependency.
*/
public ArrayList getColumns()
{
// ArrayList<String> result = new ArrayList<String>();
ArrayList result = new ArrayList();
for (int i = 0; i < left.size(); ++i)
{
String col = ((Term) left.get(i)).left;
result.add(col);
}
result.add(((Term) right.get(0)).left);
return result;
}
/*
* Returns an ArrayList of the terms in the dependency.
*/
public ArrayList getTerms()
{
//ArrayList<Term> result = new ArrayList<Term>();
ArrayList result = new ArrayList();
result.addAll(left);
result.addAll(right);
return result;
}
/*
* Returns an ArrayList of the terms in the dependency, where the operation in the
* term on the right-hand side is reversed.
*/
public ArrayList getRTerms()
{
ArrayList result = new ArrayList();
result.addAll(left);
Term t = (Term) right.get(0);
result.add(new Term(t.left, reverseOp(t.op), t.right));
return result;
}
/**
* @param op The comparison operator to reverse.
* @return The reversed comparison operator for the input operator.
*/
private String reverseOp(String op)
{
String result = null;
if (op.equals("="))
{
result = "!=";
}
else if (op.equals("!=") || op.equals("<>"))
{
result = "=";
}
else if (op.equals("<"))
{
result = ">=";
}
else if (op.equals("<="))
{
result = ">";
}
else if (op.equals(">"))
{
result = "<=";
}
else if (op.equals(">="))
{
result = "<";
}
return result;
}
}

View File

@ -0,0 +1,51 @@
import java.util.ArrayList;
/**
*
*/
/**
* @author Lublena
*
*/
public class FDependency extends Dependency
{
//ArrayList<String> left;
//ArrayList<String> right; // the column name on the right hand side of the dependency
ArrayList left;
ArrayList right; // the column name on the right hand side of the dependency
public FDependency()
{
type = 0;
//left = new ArrayList<String>();
//right = new ArrayList<String>();
left = new ArrayList();
right = new ArrayList();
}
/* (non-Javadoc)
* @see Dependency#toString()
*/
public String toString()
{
if (left.isEmpty() || right.isEmpty())
{
return null;
}
StringBuffer sb = new StringBuffer(left.get(0).toString());
for (int i = 1; i < left.size(); ++i)
{
sb.append(", " + left.get(i).toString());
}
// Append the right side of the functional dependency
sb.append(" -> " + right.get(0).toString());
return sb.toString();
}
}

View File

@ -0,0 +1,58 @@
public class Main {
/**
* @param args
*/
public static void main(String[] args)
{
//Chaser c = new Chaser();
//c.call("r");
if (args.length != 4)
{
System.out.println("Please specify table names and settings file!");
return;
}
long start = System.currentTimeMillis();
String templateName = args[0];
String mappingName = args[1];
String compName = args[2];
String fileName = args[3];
Settings settings = new Settings(fileName);
System.out.print("Loading settings...");
if (settings.loadSettings() == false)
{
System.out.println("The specified file could not be loaded.");
return;
}
System.out.println("done");
// System.out.println(settings.dependencies.size());
// for (int i = 0; i < settings.dependencies.size(); ++i)
// {
// System.out.println(settings.dependencies.get(i).toString());
// }
System.out.println("Cleaning data...");
Chaser chaser = new Chaser(settings.getDBSetting("dbname"),
settings.getDBSetting("username"),
settings.getDBSetting("password"),
templateName,
mappingName,
compName,
"");
long chaseStart = System.currentTimeMillis();
chaser.chase(settings.dependencies);
System.out.println(chaser.merged + " components were merged during the chase.");
long end = System.currentTimeMillis();
System.out.println("Operation completed in " + ((double)(end - chaseStart) / 1000.0) + " seconds.");
System.out.println("(total: " + ((double)(end - start) / 1000.0) + " seconds)");
}
}

View File

@ -0,0 +1,283 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
*
*/
/**
* @author Lublena
*
*/
public class Settings
{
private String fileName;
//private HashMap<String, String> dbSettings;
//ArrayList<Dependency> dependencies;
private HashMap dbSettings;
ArrayList dependencies;
Settings(String aFileName)
{
fileName = aFileName;
//dbSettings = new HashMap<String, String>();
//dependencies = new ArrayList<Dependency>();
dbSettings = new HashMap();
dependencies = new ArrayList();
}
public boolean loadSettings()
{
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db;
Document doc;
try
{
db = dbf.newDocumentBuilder();
}
catch (ParserConfigurationException e)
{
return false;
}
try
{
doc = db.parse(fileName);
}
catch (IOException e)
{
// System.out.println("File not found!");
return false;
}
catch (SAXException e)
{
// System.out.println("Error parsing file!");
return false;
}
NodeList dbSettings = doc.getElementsByTagName("db");
if (dbSettings.getLength() == 1)
{
loadDBSettings(dbSettings.item(0));
}
else
{
return false;
}
NodeList dependencies = doc.getElementsByTagName("rule");
if (dependencies.getLength() > 0)
{
loadDependencies(dependencies);
}
else
{
return false;
}
return true;
}
private boolean loadDBSettings(Node aRoot)
{
NodeList children = aRoot.getChildNodes();
for (int i = 0; i < children.getLength(); ++i)
{
Node child = children.item(i);
if (child.getNodeType() != Node.ELEMENT_NODE)
{
continue;
}
dbSettings.put(child.getNodeName(), child.getFirstChild().getNodeValue());
}
return true;
}
private boolean loadDependencies(NodeList aRules)
{
for (int i = 0; i < aRules.getLength(); ++i)
{
if (aRules.item(i).getNodeType() == Node.ELEMENT_NODE)
{
loadDep(aRules.item(i));
}
}
return true;
}
private boolean loadDep(Node aDep)
{
NodeList ruleProperties = aDep.getChildNodes();
int i = 0;
// Get the dependency's type
while (ruleProperties.item(i).getNodeType() != Node.ELEMENT_NODE)
{
++i;
}
String type = ruleProperties.item(i).getFirstChild().getNodeValue();
i++;
if (type.equals("fd"))
{
// System.out.println("Functional dependency!");
FDependency fd = new FDependency();
// Load properties of the dependency
while (ruleProperties.item(i).getNodeType() != Node.ELEMENT_NODE)
{
++i;
}
// System.out.println(ruleProperties.item(i).getNodeName());
// Get the variables on the left-hand side of the dependency
NodeList leftVars = ruleProperties.item(i).getChildNodes();
// System.out.println("Leftvars: " + leftVars.getLength());
for (int j = 0; j < leftVars.getLength(); ++j)
{
if (leftVars.item(j).getNodeType() != Node.ELEMENT_NODE)
{
// System.out.println("Skipping");
continue;
}
// System.out.println(leftVars.item(j).getNodeName());
String varName = leftVars.item(j).getFirstChild().getNodeValue();
// System.out.println(varName);
fd.left.add(varName);
}
++i;
// Skip text nodes
while (ruleProperties.item(i).getNodeType() != Node.ELEMENT_NODE)
{
++i;
}
Node rightVar = ruleProperties.item(i);
String rightVarName = rightVar.getFirstChild().getNodeValue();
fd.right.add(rightVarName);
dependencies.add(fd);
}
else if (type.equals("eq"))
{
// System.out.println("Equality-generating dependency!");
EQDependency eqd = new EQDependency();
// TODO: load properties of the dependency
// Skip text nodes
while (ruleProperties.item(i).getNodeType() != Node.ELEMENT_NODE)
{
++i;
}
// Load if-part of dependency
Node ifNode = ruleProperties.item(i);
// System.out.println(ifNode.getNodeName());
++i;
NodeList ifTerms = ifNode.getChildNodes();
for (int j = 0; j < ifTerms.getLength(); ++j)
{
if (ifTerms.item(j).getNodeType() != Node.ELEMENT_NODE)
{
continue;
}
Term term = loadTerm(ifTerms.item(j));
// System.out.println(term.toString());
eqd.left.add(term);
}
// Skip text nodes
while (ruleProperties.item(i).getNodeType() != Node.ELEMENT_NODE)
{
++i;
}
// Load then-part of dependency
Node thenNode = ruleProperties.item(i);
// System.out.println(thenNode.getNodeName());
NodeList thenTerms = thenNode.getChildNodes();
for (int j = 0; j < thenTerms.getLength(); ++j)
{
if (thenTerms.item(j).getNodeType() != Node.ELEMENT_NODE)
{
continue;
}
Term term = loadTerm(thenTerms.item(j));
// System.out.println(term.toString());
eqd.right.add(term);
}
dependencies.add(eqd);
}
return true;
}
/*
*
*/
private Term loadTerm(Node aTermNode)
{
String left = null;
String op = null;
String right = null;
NodeList termProperties = aTermNode.getChildNodes();
for (int i = 0; i < termProperties.getLength(); ++i)
{
if (termProperties.item(i).getNodeType() != Node.ELEMENT_NODE)
{
continue;
}
if (termProperties.item(i).getNodeName().equals("left"))
{
left = termProperties.item(i).getFirstChild().getNodeValue();
// System.out.println(left);
}
else if (termProperties.item(i).getNodeName().equals("op"))
{
op = termProperties.item(i).getFirstChild().getNodeValue();
// System.out.println(op);
}
else // (termProperties.item(i).getNodeName().equals("right"))
{
right = termProperties.item(i).getFirstChild().getNodeValue();
// System.out.println(right);
}
}
if (left == null || op == null || right == null)
{
return null;
}
Term result = new Term(left, op, right);
return result;
}
public String getDBSetting(String aParameter)
{
if (dbSettings.containsKey(aParameter))
{
return dbSettings.get(aParameter).toString();
}
return null;
}
}

View File

@ -0,0 +1,26 @@
/**
*
*/
/**
* @author Lublena
*
*/
public class Term
{
public String left;
public String op;
public String right;
public Term(String aLeft, String aOp, String aRight)
{
left = aLeft;
op = aOp;
right = aRight;
}
public String toString()
{
return left + " " + op + " " + right;
}
}

BIN
census/DataNoise/jdbc3.jar Normal file

Binary file not shown.

View File

@ -0,0 +1,774 @@
<settings>
<db>
<dbname>postgres</dbname>
<username>lantova</username>
<password>abcd</password>
</db>
<noise>
<maxholespertuple>4</maxholespertuple>
<maxholesize>8</maxholesize>
</noise>
<vars>
<var><name>ABIRTHPL</name><value>0</value><value>1</value></var>
<var>
<name>CITIZEN</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
</var>
<var>
<name>CLASS</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
<value>5</value>
<value>6</value>
<value>7</value>
<value>8</value>
<value>9</value>
</var>
<var>
<name>ENGLISH</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
</var>
<var><name>FEB55</name><value>0</value><value>1</value></var>
<var>
<name>FERTIL</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>03</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>07</value>
<value>08</value>
<value>09</value>
<value>10</value>
<value>11</value>
<value>12</value>
<value>13</value>
</var>
<var>
<name>HISPANIC</name>
<value>000</value>
<value>001</value>
<value>002</value>
<value>003</value>
<value>004</value>
</var>
<var>
<name>IMMIGR</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>03</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>07</value>
<value>08</value>
<value>09</value>
<value>10</value>
</var>
<var><name>KOREAN</name><value>0</value><value>1</value></var>
<var>
<name>LANG1</name>
<value>0</value>
<value>1</value>
<value>2</value>
</var>
<var>
<name>MARITAL</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
</var>
<var><name>MAY75880</name><value>0</value><value>1</value></var>
<var>
<name>MEANS</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>03</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>07</value>
<value>08</value>
<value>09</value>
<value>10</value>
<value>11</value>
<value>12</value>
</var>
<var>
<name>MIGPUMA</name>
<value>00000</value>
<value>99900</value>
</var>
<var>
<name>MIGSTATE</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>08</value>
<value>09</value>
<value>10</value>
<value>11</value>
<value>12</value>
<value>13</value>
<value>15</value>
<value>16</value>
<value>17</value>
<value>18</value>
<value>19</value>
<value>20</value>
<value>21</value>
<value>22</value>
<value>23</value>
<value>24</value>
<value>25</value>
<value>26</value>
<value>27</value>
<value>28</value>
<value>29</value>
<value>30</value>
<value>31</value>
<value>32</value>
<value>33</value>
<value>34</value>
<value>35</value>
<value>36</value>
<value>37</value>
<value>38</value>
<value>39</value>
<value>40</value>
<value>41</value>
<value>42</value>
<value>44</value>
<value>45</value>
<value>46</value>
<value>47</value>
<value>48</value>
<value>49</value>
<value>50</value>
<value>51</value>
<value>53</value>
<value>54</value>
<value>55</value>
<value>56</value>
<value>72</value>
<value>98</value>
<value>99</value>
</var>
<var>
<name>MILITARY</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
</var>
<var><name>OCCUP</name><value>000</value></var>
<var><name>OTHRSERV</name><value>0</value><value>1</value></var>
<var>
<name>POB</name>
<value>001</value>
<value>002</value>
<value>004</value>
<value>005</value>
<value>006</value>
<value>008</value>
<value>009</value>
<value>010</value>
<value>011</value>
<value>012</value>
<value>013</value>
<value>015</value>
<value>016</value>
<value>017</value>
<value>018</value>
<value>019</value>
<value>020</value>
<value>021</value>
<value>022</value>
<value>023</value>
<value>024</value>
<value>025</value>
<value>026</value>
<value>027</value>
<value>028</value>
<value>029</value>
<value>030</value>
<value>031</value>
<value>032</value>
<value>033</value>
<value>034</value>
<value>035</value>
<value>036</value>
<value>037</value>
<value>038</value>
<value>039</value>
<value>040</value>
<value>041</value>
<value>042</value>
<value>044</value>
<value>045</value>
<value>046</value>
<value>047</value>
<value>048</value>
<value>049</value>
<value>050</value>
<value>051</value>
<value>053</value>
<value>054</value>
<value>055</value>
<value>056</value>
<value>060</value>
<value>066</value>
<value>067</value>
<value>069</value>
<value>071</value>
<value>072</value>
<value>076</value>
<value>078</value>
<value>079</value>
<value>081</value>
<value>084</value>
<value>086</value>
<value>089</value>
<value>095</value>
<value>096</value>
<value>100</value>
<value>101</value>
<value>102</value>
<value>103</value>
<value>104</value>
<value>105</value>
<value>106</value>
<value>107</value>
<value>108</value>
<value>109</value>
<value>110</value>
<value>111</value>
<value>112</value>
<value>113</value>
<value>114</value>
<value>115</value>
<value>116</value>
<value>117</value>
<value>118</value>
<value>119</value>
<value>120</value>
<value>121</value>
<value>122</value>
<value>123</value>
<value>124</value>
<value>125</value>
<value>126</value>
<value>127</value>
<value>128</value>
<value>129</value>
<value>130</value>
<value>131</value>
<value>132</value>
<value>133</value>
<value>134</value>
<value>135</value>
<value>136</value>
<value>137</value>
<value>138</value>
<value>139</value>
<value>140</value>
<value>141</value>
<value>142</value>
<value>143</value>
<value>144</value>
<value>145</value>
<value>146</value>
<value>147</value>
<value>148</value>
<value>149</value>
<value>150</value>
<value>151</value>
<value>152</value>
<value>153</value>
<value>154</value>
<value>180</value>
<value>181</value>
<value>182</value>
<value>183</value>
<value>184</value>
<value>200</value>
<value>201</value>
<value>202</value>
<value>203</value>
<value>204</value>
<value>205</value>
<value>206</value>
<value>207</value>
<value>208</value>
<value>209</value>
<value>210</value>
<value>211</value>
<value>212</value>
<value>213</value>
<value>214</value>
<value>215</value>
<value>216</value>
<value>217</value>
<value>218</value>
<value>219</value>
<value>220</value>
<value>221</value>
<value>222</value>
<value>223</value>
<value>224</value>
<value>225</value>
<value>226</value>
<value>227</value>
<value>228</value>
<value>229</value>
<value>230</value>
<value>231</value>
<value>232</value>
<value>233</value>
<value>234</value>
<value>235</value>
<value>236</value>
<value>237</value>
<value>238</value>
<value>239</value>
<value>240</value>
<value>241</value>
<value>242</value>
<value>243</value>
<value>244</value>
<value>245</value>
<value>246</value>
<value>247</value>
<value>248</value>
<value>249</value>
<value>250</value>
<value>251</value>
<value>252</value>
<value>253</value>
<value>254</value>
<value>255</value>
<value>256</value>
<value>300</value>
<value>301</value>
<value>302</value>
<value>303</value>
<value>304</value>
<value>310</value>
<value>311</value>
<value>312</value>
<value>313</value>
<value>314</value>
<value>315</value>
<value>316</value>
<value>317</value>
<value>318</value>
<value>330</value>
<value>331</value>
<value>332</value>
<value>333</value>
<value>334</value>
<value>335</value>
<value>336</value>
<value>337</value>
<value>338</value>
<value>339</value>
<value>340</value>
<value>341</value>
<value>342</value>
<value>343</value>
<value>344</value>
<value>345</value>
<value>346</value>
<value>347</value>
<value>348</value>
<value>349</value>
<value>350</value>
<value>351</value>
<value>352</value>
<value>353</value>
<value>354</value>
<value>355</value>
<value>356</value>
<value>357</value>
<value>358</value>
<value>359</value>
<value>375</value>
<value>376</value>
<value>377</value>
<value>378</value>
<value>379</value>
<value>380</value>
<value>381</value>
<value>382</value>
<value>383</value>
<value>384</value>
<value>385</value>
<value>386</value>
<value>387</value>
<value>388</value>
<value>389</value>
<value>400</value>
<value>401</value>
<value>402</value>
<value>403</value>
<value>404</value>
<value>405</value>
<value>406</value>
<value>407</value>
<value>408</value>
<value>409</value>
<value>410</value>
<value>411</value>
<value>412</value>
<value>413</value>
<value>414</value>
<value>415</value>
<value>416</value>
<value>417</value>
<value>418</value>
<value>419</value>
<value>420</value>
<value>421</value>
<value>422</value>
<value>423</value>
<value>424</value>
<value>425</value>
<value>426</value>
<value>427</value>
<value>428</value>
<value>429</value>
<value>430</value>
<value>431</value>
<value>432</value>
<value>433</value>
<value>434</value>
<value>435</value>
<value>436</value>
<value>437</value>
<value>438</value>
<value>439</value>
<value>440</value>
<value>441</value>
<value>442</value>
<value>443</value>
<value>444</value>
<value>445</value>
<value>446</value>
<value>447</value>
<value>448</value>
<value>449</value>
<value>450</value>
<value>451</value>
<value>452</value>
<value>453</value>
<value>454</value>
<value>455</value>
<value>456</value>
<value>457</value>
<value>458</value>
<value>459</value>
<value>460</value>
<value>461</value>
<value>462</value>
<value>463</value>
<value>464</value>
<value>465</value>
<value>466</value>
<value>467</value>
<value>468</value>
<value>469</value>
<value>470</value>
<value>500</value>
<value>501</value>
<value>502</value>
<value>503</value>
<value>504</value>
<value>505</value>
<value>506</value>
<value>507</value>
<value>508</value>
<value>509</value>
<value>510</value>
<value>511</value>
<value>512</value>
<value>513</value>
<value>514</value>
<value>515</value>
<value>516</value>
<value>517</value>
<value>518</value>
<value>519</value>
<value>520</value>
<value>521</value>
<value>522</value>
<value>523</value>
<value>524</value>
<value>525</value>
<value>526</value>
<value>527</value>
<value>528</value>
<value>529</value>
<value>550</value>
<value>551</value>
<value>552</value>
<value>553</value>
<value>554</value>
<value>555</value>
</var>
<var>
<name>POWSTATE</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>08</value>
<value>09</value>
<value>10</value>
<value>11</value>
<value>12</value>
<value>13</value>
<value>15</value>
<value>16</value>
<value>17</value>
<value>18</value>
<value>19</value>
<value>20</value>
<value>21</value>
<value>22</value>
<value>23</value>
<value>24</value>
<value>25</value>
<value>26</value>
<value>27</value>
<value>28</value>
<value>29</value>
<value>30</value>
<value>31</value>
<value>32</value>
<value>33</value>
<value>34</value>
<value>35</value>
<value>36</value>
<value>37</value>
<value>38</value>
<value>39</value>
<value>40</value>
<value>41</value>
<value>42</value>
<value>44</value>
<value>45</value>
<value>46</value>
<value>47</value>
<value>48</value>
<value>49</value>
<value>50</value>
<value>51</value>
<value>53</value>
<value>54</value>
<value>55</value>
<value>56</value>
<value>98</value>
<value>99</value>
</var>
<var>
<name>RACE</name>
<value>001</value>
<value>002</value>
<value>004</value>
<value>005</value>
<value>006</value>
<value>007</value>
<value>008</value>
<value>009</value>
<value>010</value>
<value>011</value>
<value>012</value>
<value>013</value>
<value>014</value>
<value>015</value>
<value>016</value>
<value>017</value>
<value>018</value>
<value>019</value>
<value>020</value>
<value>021</value>
<value>022</value>
<value>023</value>
<value>024</value>
<value>025</value>
<value>026</value>
<value>027</value>
<value>028</value>
<value>029</value>
<value>030</value>
<value>031</value>
<value>032</value>
<value>033</value>
<value>034</value>
<value>035</value>
<value>036</value>
<value>037</value>
<value>301</value>
<value>302</value>
<value>303</value>
<value>304</value>
<value>305</value>
<value>306</value>
<value>307</value>
<value>308</value>
<value>309</value>
<value>310</value>
<value>311</value>
<value>312</value>
<value>313</value>
<value>314</value>
<value>315</value>
<value>316</value>
<value>317</value>
<value>318</value>
<value>319</value>
<value>320</value>
<value>321</value>
<value>322</value>
<value>323</value>
<value>324</value>
<value>325</value>
<value>326</value>
<value>327</value>
</var>
<var>
<name>RAGECHLD</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
</var>
<var>
<name>RLABOR</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
<value>5</value>
<value>6</value>
</var>
<var><name>ROWNCHLD</name><value>0</value><value>1</value></var>
<var>
<name>RPOB</name>
<value>10</value>
<value>21</value>
<value>22</value>
<value>23</value>
<value>24</value>
<value>31</value>
<value>32</value>
<value>33</value>
<value>34</value>
<value>35</value>
<value>36</value>
<value>40</value>
<value>51</value>
<value>52</value>
</var>
<var>
<name>RSPOUSE</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
<value>5</value>
<value>6</value>
</var>
<var>
<name>RVETSERV</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>03</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>07</value>
<value>08</value>
<value>09</value>
<value>10</value>
<value>11</value>
</var>
<var>
<name>SCHOOL</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
</var>
<var><name>SEPT80</name><value>0</value><value>1</value></var>
<var><name>SEX</name><value>0</value><value>1</value></var>
<var><name>VIETNAM</name><value>0</value><value>1</value></var>
<var><name>WWII</name><value>0</value><value>1</value></var>
<var>
<name>YEARSCH</name>
<value>00</value>
<value>01</value>
<value>02</value>
<value>03</value>
<value>04</value>
<value>05</value>
<value>06</value>
<value>07</value>
<value>08</value>
<value>09</value>
<value>10</value>
<value>11</value>
<value>12</value>
<value>13</value>
<value>14</value>
<value>15</value>
<value>16</value>
<value>17</value>
</var>
<var>
<name>YEARWRK</name>
<value>0</value>
<value>1</value>
<value>2</value>
<value>3</value>
<value>4</value>
<value>5</value>
<value>6</value>
<value>7</value>
</var>
<var>
<name>YRSSERV</name>
<value>00</value>
<value>01</value>
<value>50</value>
</var>
</vars>
</settings>

View File

@ -0,0 +1,261 @@
import java.sql.*;
import java.util.ArrayList;
public class DBConnector
{
private String mappingSchema = "Relation text, TID int4, Col text, CID text, HID text";
private String compSchema = "CID text, HID text, WID text, Value int";
private String worldSchema = "CID text, WID text";
private String mappingAttrs = "Relation, TID, Col, CID, HID";
private String compAttrs = "CID, HID, WID, Value";
private String worldAttrs = "CID, WID";
private static int id = 1;
private Connection dbConnection;
private Statement sqlStatement;
private String templateRelName;
private String mappingRelName;
private String compRelName;
private String worldRelName;
public double worldCount;
public DBConnector(String aDatabase, String aUser, String aPassword,
String aTemplateRelName, String aMappingRelName, String aCompRelName,
String aWorldRelName)
{
templateRelName = aTemplateRelName;
mappingRelName = aMappingRelName;
compRelName = aCompRelName;
worldRelName = aWorldRelName;
worldCount = 1;
init(aDatabase, aUser, aPassword);
}
private void init(String aDatabase, String aUser, String aPassword)
{
try
{
Class.forName("org.postgresql.Driver"); //load the driver
String connString = "jdbc:postgresql:" + aDatabase;
//System.out.println("Connecting to " + connString);
dbConnection = DriverManager.getConnection(connString,
aUser, aPassword);
sqlStatement = dbConnection.createStatement();
// Create mapping relation
createTable(mappingRelName, mappingSchema);
// Create component relation
createTable(compRelName, compSchema);
// Create world relation
//createTable(worldRelName, worldSchema);
}
catch (SQLException e)
{
System.err.println("Could not connect to the database!");
e.printStackTrace();
return;
}
catch (ClassNotFoundException e)
{
System.err.println("Could not load database driver!");
return;
}
}
public void closeConnection()
{
try
{
sqlStatement.close();
dbConnection.close();
}
catch (SQLException e)
{
// Ignore
return;
}
}
private void createTable(String aName, String aSchema)
throws SQLException
{
String sql = "CREATE TABLE " + aName + " (" + aSchema + ");";
System.out.println(sql);
//sqlStatement.executeUpdate(sql);
}
private String getValue(String aRelName, int aTid, String aColumnName)
throws SQLException
{
String sql = "SELECT " + aColumnName + " FROM " + aRelName + " WHERE tid = " + aTid;
ResultSet rs = sqlStatement.executeQuery(sql);
if (rs.next())
{
if (rs.getObject(1) == null)
{
return "-1";
}
return rs.getString(1);
}
return null;
}
private void setHole(int aTid, String aColumnName, ArrayList aHoleValues,
StringBuffer aRBuffer, StringBuffer aFBuffer, StringBuffer aCBuffer)
throws SQLException
{
// Get current value of the specified column
String value = getValue(templateRelName, aTid, aColumnName);
if (value == null || value.equals("-1"))
{
// //System.err.print("+");
return;
}
// Add the real value of the column if it is not among the generated values
if (!aHoleValues.contains(value))
{
aHoleValues.add(value);
}
// Create a hole in the template relation
String sql = "UPDATE " + templateRelName + " SET " + aColumnName + " = -1 " +
" WHERE tid = " + aTid + ";";
aRBuffer.append(sql);
// Insert an entry for the new hole in the mapping relation
String cid = "c" + id;
String hid = "h" + id;
String mappingEntry = templateRelName + "\t" + aTid + "\t" +
aColumnName + "\t" + cid + "\t" + hid + "\n";
//insertTuple(mappingRelName, mappingAttrs, mappingEntry, aFStatement);
aFBuffer.append(mappingEntry);
id++;
// Insert values for the new hole in the component relation
insertHoleValues(cid, hid, aColumnName, aHoleValues, aCBuffer);
}
private void insertHoleValues(String aCid, String aHid, String aColumnName, ArrayList aValues, StringBuffer aBuffer)
throws SQLException
{
for (int i = 0; i < aValues.size(); ++i)
{
String wid = "w" + (i + 1);
String compEntry = aCid + "\t" + aHid + "\t"
+ wid + "\t" + aValues.get(i).toString() + "\n";
aBuffer.append(compEntry);
//insertTuple(compRelName, compAttrs, compEntry, aBuffer);
}
}
private void insertTuple(String aRelName, String aSchema, String aValues, Statement aStatement)
throws SQLException
{
String sql = "INSERT INTO " + aRelName + " (" + aSchema +
") VALUES (" + aValues + ")";
aStatement.addBatch(sql);
}
private int getRelSize(String aRelName)
{
ResultSet rs;
try
{
rs = sqlStatement.executeQuery("SELECT count(*) from " + aRelName);
}
catch (SQLException e)
{
System.err.println("Could not execute statement!");
return 0;
}
try
{
rs.next();
return (new Integer(rs.getString(1))).intValue();
}
catch (SQLException e)
{
System.err.println("Error getting the result!");
return 0;
}
}
public int getTemplateRelSize()
{
return getRelSize(templateRelName);
}
public void introduceNoise(ArrayList aHoles)
throws SQLException
{
StringBuffer rBuffer = new StringBuffer();
StringBuffer fBuffer = new StringBuffer();
StringBuffer cBuffer = new StringBuffer();
int size = aHoles.size();
int step = 1000;
//System.err.println(size);
int i = 0;
for (; i < size;)
{
fBuffer.append("COPY " + mappingRelName + "(" + mappingAttrs + ") FROM stdin;\n");
cBuffer.append("COPY " + compRelName + " FROM stdin;\n");
int n = i + step;
if (n > size)
{
n = size;
}
int j = i;
for (; j < n; ++j)
{
Hole holeInfo = (Hole) aHoles.get(j);
setHole(holeInfo.tid, holeInfo.columnName, holeInfo.values, rBuffer, fBuffer, cBuffer);
}
//System.err.print(j + " ");
fBuffer.append("\\.");
cBuffer.append("\\.");
System.out.println(rBuffer.toString());
System.out.flush();
System.out.println(fBuffer.toString());
System.out.flush();
System.out.println(cBuffer.toString());
System.out.flush();
//sqlStatement.executeUpdate(fBuffer.toString());
//sqlStatement.executeUpdate(cBuffer.toString());
rBuffer.delete(0, rBuffer.length());
fBuffer.delete(0, fBuffer.length());
cBuffer.delete(0, cBuffer.length());
i += step;
}
//System.err.println(i);
// rBuffer.close();
// fStatement.close();
// cStatement.close();
}
public void createWorldTable() throws SQLException
{
String sql = "CREATE TABLE " + worldRelName + " AS SELECT DISTINCT cid, wid FROM " + compRelName + ";";
System.out.println(sql);
}
}

View File

@ -0,0 +1,8 @@
import java.util.ArrayList;
public class Hole
{
public int tid;
public String columnName;
public ArrayList values;
}

View File

@ -0,0 +1,106 @@
import java.sql.SQLException;
import java.util.ArrayList;
/**
*
*/
/**
* @author Lublena
*
*/
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
if (args.length != 7)
{
System.err.println("Please specify table names and settings file!");
return;
}
String templateName = args[0];
String mappingName = args[1];
String compName = args[2];
int relSize = (new Integer(args[3])).intValue();
int arity = (new Integer(args[4])).intValue();
double density = (new Double(args[5])).doubleValue();
String fileName = args[6];
// TODO: check whether arguments are valid values
Settings settings = new Settings(fileName);
//System.out.print("Loading settings...");
if (settings.loadSettings() == false)
{
System.err.println("The specified file could not be loaded.");
return;
}
//System.out.println("done");
//System.out.print("Initializing database connection...");
// Init database connection
// DBConnector db = new DBConnector(settings.getDBSetting("dbname"),
// settings.getDBSetting("username"), settings.getDBSetting("password"),
// settings.getDBSetting("templaterel"), settings.getDBSetting("mappingrel"),
// settings.getDBSetting("comprel"),
// settings.getDBSetting("worldrel"));
DBConnector db = new DBConnector(settings.getDBSetting("dbname"),
settings.getDBSetting("username"), settings.getDBSetting("password"),
templateName, mappingName,
compName,
settings.getDBSetting("worldrel"));
if (db == null)
{
System.err.println("failed");
return;
}
//System.out.print("Retrieving relation size...");
// int relSize;
// if (settings.getDBSetting("size") != null)
// {
// // Size available in settings file
// relSize = (new Integer(settings.getDBSetting("size"))).intValue();
// }
// else
// {
// relSize = db.getTemplateRelSize();
// }
//
//System.out.println("done");
int maxHolesPerTuple = (new Integer(settings.getNoiseSetting("maxholespertuple"))).intValue();
double avgHolesPerTuple = 1 + (maxHolesPerTuple - 1)/ 2.0;
int tuplesWithHolesCount = (int) (relSize * density * arity / avgHolesPerTuple);
int maxHoleSize = (new Integer(settings.getNoiseSetting("maxholesize"))).intValue();
//System.out.print("Introducing holes...");
try
{
ArrayList holes = NoiseGenerator.generateNoise(db, settings.getVars(), relSize,
tuplesWithHolesCount, maxHolesPerTuple, maxHoleSize);
//ArrayList holes = NoiseGenerator.generateNoiseUniform(settings.getVars(), relSize,
// holeCount, maxHoleSize);
}
catch(SQLException e)
{
System.err.println("Error occurred while introducing noise:");
e.printStackTrace();
}
//System.out.println("done");
db.closeConnection();
//long end = System.currentTimeMillis();
//System.out.println("Total runningtime: " + (end - start) / 1000 + " seconds.");
// System.out.println("Generated " + db.worldCount + " worlds.");
}
}

View File

@ -0,0 +1,251 @@
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
public class NoiseGenerator
{
/**
* Generates a random number in the range specified between the two parameters
* @param a
* @param b
* @return
*/
private static int random(int a, int b)
{
if (a > b)
{
int c = a;
a = b;
b = c;
}
double r = Math.random();
int n = (int) (a + r * (b - a + 1));
if (n < a)
{
n = a;
}
else if (n > b)
{
n = b;
}
return n;
}
/**
* This method introduces noise in the database.
* @param aVars Names of the attributes to generate noise in and range of allowed
* values for each attribute.
* @param aRelSize Size of the relation to generate noise in.
* @param aTuplesCount Number of tuples that should contain or-sets.
* @param aMaxHolesPerTuples Maximal number of holes per tuple.
* @param aMaxHoleSize Maximal number of entries in each or-set
* @return ArrayList with information about the holes that should be
* introduced in the relation.
*/
public static ArrayList generateNoise (DBConnector aDB, HashMap aVars, int aRelSize,
int aTuplesCount, int aMaxHolesPerTuple, int aMaxHoleSize)
throws SQLException
{
ArrayList holes = new ArrayList();
Set keys = aVars.keySet();
Object[] varNames = keys.toArray();
// IDs of tuples, to which holes were already introduced
HashSet tids = new HashSet();
int i = 0;
int j = 0;
while (i < aTuplesCount)
{
int n = i + 100000;
if (n > aTuplesCount)
{
n = aTuplesCount;
}
//System.out.println(holes.size());
//System.out.println(n);
try
{
for (j = i; j < n; ++j)
{
int t;
if (j > aRelSize)
{
break;
}
if (aRelSize <= aTuplesCount)
{
t = j;
}
else
{
do
{
t = random(1, aRelSize);
}
while (tids.contains(new Integer(t)));
}
tids.add(new Integer(t));
ArrayList columns = generateVarNames(varNames, aMaxHolesPerTuple);
for (int k = 0; k < columns.size(); ++k)
{
ArrayList values = generateValues((ArrayList) aVars.get(columns.get(k)),
aMaxHoleSize);
Hole hole = new Hole();
hole.tid = t;
hole.columnName = columns.get(k).toString();
hole.values = values;
holes.add(hole);
}
}
//System.err.println("holes: " + holes.size());
aDB.introduceNoise(holes);
holes.clear();
i += (j - i);
//System.err.println("j: " + j);
//System.err.println("rel size: " + aRelSize);
if (j > aRelSize)
{
break;
}
}
catch(OutOfMemoryError e)
{
aDB.introduceNoise(holes);
holes.clear();
i += (j - i);
}
}
aDB.createWorldTable();
return holes;
}
/**
* This method randomly picks the attributes to generate noise in.
* @param aVarNames Names of the attributes.
* @param aMaxHolesPerTuple Maximal number of holes to generate.
* @return ArrayList with the names of the attributes to generate noise in.
*/
private static ArrayList generateVarNames(Object[] aVarNames, int aMaxHolesPerTuple)
{
ArrayList varNames = new ArrayList();
int varCount = aVarNames.length;
int holesCount;
if (varCount > aMaxHolesPerTuple)
{
holesCount = random(1, aMaxHolesPerTuple);
}
else
{
holesCount = random(1, varCount);
}
for (int j = 0; j < holesCount; ++j)
{
String varName;
do
{
varName = (String) aVarNames[random(0, varCount - 1)];
}
while (varNames.contains(varName));
varNames.add(varName);
}
return varNames;
}
/**
* Generates values for a given attribute.
* @param aVarValues Values allowed for the given attribute.
* @param aMaxHoleSize Maximal number of values to generate for the attribute.
* @return ArrayList with possible values for the given attribute.
*/
private static ArrayList generateValues(ArrayList aVarValues, int aMaxHoleSize)
{
ArrayList values = new ArrayList();
int valuesCount = aVarValues.size();
int holeSize;
if (aMaxHoleSize < valuesCount)
{
holeSize = random(1, aMaxHoleSize);
}
else
{
holeSize = random(1, valuesCount);
}
for (int i = 0; i < holeSize; ++i)
{
String value;
do
{
int n = random(0, valuesCount - 1);
value = (String) aVarValues.get(n);
}
while (values.contains(value));
values.add(value);
}
return values;
}
/**
* This method generates noise by uniformly selecting a certain number of fields to
* generate or-sets to.
* @param aVars
* @param aRelSize Size of the relation.
* @param aHolesCount
* @param aMaxHoleSize
* @return ArrayList with information about the generated holes.
*/
public static ArrayList generateNoiseUniform(HashMap aVars, int aRelSize,
int aHolesCount, int aMaxHoleSize)
{
ArrayList result = new ArrayList();
// TODO: Implement
ArrayList varNames = new ArrayList();
int varCount = varNames.size();
if (varCount * aRelSize < aHolesCount)
{
aHolesCount = varCount * aRelSize;
}
for (int i = 0; i < aHolesCount; ++i)
{
// Generate tuple id
int tid = random(1, aRelSize);
// Generate attribute name
int j = random(0, varCount - 1);
String varName = varNames.get(j).toString();
// TODO: Check whether the hole was already generated
// ...
ArrayList values = generateValues((ArrayList) aVars.get(varName),
aMaxHoleSize);
Hole h = new Hole();
h.tid = tid;
h.columnName = varName;
h.values = values;
result.add(h);
}
return result;
}
}

View File

@ -0,0 +1,205 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
public class Settings
{
private String fileName = "..\\settings.xml";
private HashMap dbSettings;
private HashMap noiseSettings;
private HashMap vars;
public Settings(String aFileName)
{
fileName = aFileName;
dbSettings = new HashMap();
noiseSettings = new HashMap();
vars = new HashMap();
}
public boolean loadSettings()
{
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db;
Document doc;
try
{
db = dbf.newDocumentBuilder();
}
catch (ParserConfigurationException e)
{
return false;
}
try
{
doc = db.parse(fileName);
}
catch (IOException e)
{
System.out.println("File not found!");
return false;
}
catch (SAXException e)
{
System.out.println("Error parsing file!");
return false;
}
NodeList dbSettings = doc.getElementsByTagName("db");
if (dbSettings.getLength() == 1)
{
loadDBSettings(dbSettings.item(0));
}
else
{
return false;
}
NodeList noiseSettings = doc.getElementsByTagName("noise");
if (noiseSettings.getLength() == 1)
{
loadNoiseSettings(noiseSettings.item(0));
}
else
{
return false;
}
NodeList varSettings = doc.getElementsByTagName("vars");
if (varSettings.getLength() == 1)
{
loadVars(varSettings.item(0));
}
else
{
return false;
}
return true;
}
private boolean loadDBSettings(Node aRoot)
{
NodeList children = aRoot.getChildNodes();
for (int i = 0; i < children.getLength(); ++i)
{
Node child = children.item(i);
if (child.getNodeType() == Node.TEXT_NODE)
{
continue;
}
dbSettings.put(child.getNodeName(), child.getFirstChild().getNodeValue());
}
return true;
}
private boolean loadNoiseSettings(Node aRoot)
{
NodeList children = aRoot.getChildNodes();
for (int i = 0; i < children.getLength(); ++i)
{
Node child = children.item(i);
if (child.getNodeType() == Node.TEXT_NODE)
{
continue;
}
noiseSettings.put(child.getNodeName(), child.getFirstChild().getNodeValue());
}
return true;
}
private boolean loadVars(Node aRoot)
{
NodeList vars = aRoot.getChildNodes();
for (int i = 0; i < vars.getLength(); ++i)
{
if (vars.item(i).getNodeType() == Node.ELEMENT_NODE)
{
loadVar(vars.item(i));
}
}
return true;
}
private boolean loadVar(Node aVar)
{
aVar.normalize();
NodeList varSettings = aVar.getChildNodes();
int i = 0;
// Get the variable's name
if (varSettings.item(i).getNodeType() == Node.TEXT_NODE)
{
i++;
}
String name = varSettings.item(i).getFirstChild().getNodeValue();
i++;
// Get the values for the variable
ArrayList values = new ArrayList();
for(; i < varSettings.getLength(); ++i)
{
Node valueNode = varSettings.item(i);
if (valueNode.getNodeType() == Node.TEXT_NODE)
{
continue;
}
String value = valueNode.getFirstChild().getNodeValue();
values.add(value);
}
vars.put(name, values);
return true;
}
public ArrayList getVarValues(String aVarName)
{
if (!vars.containsKey(aVarName))
{
return null;
}
return (ArrayList) vars.get(aVarName);
}
public HashMap getVars()
{
return vars;
}
public Object[] getVarNames()
{
return vars.keySet().toArray();
}
public String getDBSetting(String aParameter)
{
if (dbSettings.containsKey(aParameter))
{
return (String) dbSettings.get(aParameter);
}
return null;
}
public String getNoiseSetting(String aParameter)
{
if (noiseSettings.containsKey(aParameter))
{
return (String) noiseSettings.get(aParameter);
}
return null;
}
}

View File

@ -0,0 +1,4 @@
DROP table c1 cascade;
DROP table f1 cascade;
DROP table r1 cascade;
DROP table del1 cascade;

View File

@ -0,0 +1,5 @@
DROP table c2 cascade;
DROP table f2 cascade;
DROP table r2 cascade;
DROP table twins2 cascade;
DROP table del2 cascade;

View File

@ -0,0 +1,6 @@
DROP table c3 cascade;
DROP table f3 cascade;
DROP table r3 cascade;
DROP table twins3 cascade;
DROP table del3 cascade;
DROP TABLE consts3 cascade;

View File

@ -0,0 +1,4 @@
DROP table c4 cascade;
DROP table f4 cascade;
DROP table r4 cascade;
DROP table del4 cascade;

View File

@ -0,0 +1,5 @@
DROP table c5 cascade;
DROP table f5 cascade;
DROP table r5 cascade;
DROP TABLE consts5 cascade;
DROP TABLE del5 cascade;

View File

@ -0,0 +1,5 @@
DROP table c6 cascade;
DROP table f6 cascade;
DROP table r6 cascade;
DROP table twins6 cascade;
DROP table del6 cascade;

97
census/Queries/fun-q3.sql Normal file
View File

@ -0,0 +1,97 @@
CREATE OR REPLACE FUNCTION filter3() RETURNS void AS $$
DECLARE
filler RECORD;
i int4;
BEGIN
--CREATE TABLE xc (id int4, hid text, wid text, value int4) WITHOUT OIDS;
--i:=0;
LOOP
SELECT INTO filler
_f1.cid as cleft, _f2.cid as cright, _f1.hid as hleft, _f2.hid as hright
FROM f3 _f1, f3 _f2
WHERE _f1.tid = _f2.tid and _f1.Col IN ('POWSTATE','POB') and
_f2.Col IN ('POWSTATE','POB') and _f1.cid > _f2.cid;
IF NOT FOUND THEN EXIT; END IF;
EXECUTE '
create table k1 without oids as
select _c.* from c3 _c, f3 _f
where ' || quote_literal(filler.cleft) || ' = _f.cid and _f.hid = _c.hid;
';
EXECUTE '
create table k2 without oids as
select _c.* from c3 _c, f3 _f
where ' || quote_literal(filler.cright) || ' = _f.cid and _f.hid = _c.hid;
';
EXECUTE '
update f3
set cid = ' || quote_literal(filler.cleft) || '|| \'x\' || ' || quote_literal(filler.cright) || '
where cid =' || quote_literal(filler.cleft) || ' or cid=' || quote_literal(filler.cright) || ';
create table ctemp3 as
select distinct a.hid, a.wid || \'x\' || b.wid as wid, a.value
from k1 a, k2 b
union
select distinct b.hid, a.wid || \'x\' || b.wid as wid, b.value
from k1 a, k2 b;
delete from c3 where
hid in (select hid from k1) or hid in (select hid from k2);
';
EXECUTE '
create table toinsert3 as
select _c1.value as value, _c1.hid as hid, _c1.wid as wid, _f2.hid as fhid
from ctemp3 _c1, f3 _f1, f3 _f2
where _f1.tid = _f2.tid and _f1.hid = _c1.hid and _f1.cid = _f2.cid and
_f1.Col in (\'POWSTATE\',\'POB\')
and _f2.Col in (\'POWSTATE\',\'POB\');
';
EXECUTE '
insert into c3
select * from ctemp3
where value in (
select value from toinsert3
where toinsert3.wid = ctemp3.wid and toinsert3.fhid = ctemp3.hid
and toinsert3.hid <> ctemp3.hid);
';
--EXECUTE '
--insert into c3
--select * from ctemp3
--where value in (
--select _c1.value from ctemp3 _c1, f3 _f1, f3 _f2
--where _c1.hid <> ctemp3.hid and _f1.tid = _f2.tid and
-- _f1.hid = _c1.hid and _f2.hid = ctemp3.hid and
-- _c1.wid = ctemp3.wid and _f1.cid = _f2.cid and
-- _f1.Col in (\'POWSTATE\',\'POB\')
-- and _f2.Col in (\'POWSTATE\',\'POB\'));
--';
--EXECUTE '
--insert into xc
--SELECT '|| quote_literal(i) || ', * FROM c3;';
EXECUTE '
drop table k1;
drop table k2;
drop table ctemp3;
drop table toinsert3;
';
--i := i+1;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;

88
census/Queries/fun-q5.sql Normal file
View File

@ -0,0 +1,88 @@
CREATE OR REPLACE FUNCTION filter5() RETURNS void AS $$
DECLARE
filler RECORD;
i int4;
BEGIN
--CREATE TABLE xc (id int4, hid text, wid text, value int4) WITHOUT OIDS;
--i:=0;
LOOP
SELECT INTO filler
_f1.cid as cleft, _f2.cid as cright, _f1.hid as hleft, _f2.hid as hright
FROM f5 _f1, f5 _f2
WHERE _f1.tid = _f2.tid and _f1.Col IN ('POWSTATE1','POWSTATE2') and
_f2.Col IN ('POWSTATE1','POWSTATE2') and _f1.cid > _f2.cid;
IF NOT FOUND THEN EXIT; END IF;
EXECUTE '
create table k1 without oids as
select _c.* from c5 _c, f5 _f
where ' || quote_literal(filler.cleft) || ' = _f.cid and _f.hid = _c.hid;
';
EXECUTE '
create table k2 without oids as
select _c.* from c5 _c, f5 _f
where ' || quote_literal(filler.cright) || ' = _f.cid and _f.hid = _c.hid;
';
EXECUTE '
update f5
set cid = ' || quote_literal(filler.cleft) || '|| \'x\' || ' || quote_literal(filler.cright) || '
where cid =' || quote_literal(filler.cleft) || ' or cid=' || quote_literal(filler.cright) || ';';
EXECUTE '
create table ctemp5 as
select distinct a.hid, a.wid || \'x\' || b.wid as wid, a.value
from k1 a, k2 b
union all
select distinct b.hid, a.wid || \'x\' || b.wid as wid, b.value
from k1 a, k2 b;
delete from c5 where
hid in (select hid from k1) or hid in (select hid from k2);
';
EXECUTE '
create table toinsert5 as
select _c1.value as value, _c1.hid as hid, _c1.wid as wid, _f2.hid as fhid
from ctemp5 _c1, f5 _f1, f5 _f2
where _f1.tid = _f2.tid and _f1.hid = _c1.hid and _f1.cid = _f2.cid and
_f1.Col in (\'POWSTATE1\',\'POWSTATE2\')
and _f2.Col in (\'POWSTATE1\',\'POWSTATE2\');
insert into c5
select * from ctemp5
where value in (
select value from toinsert5
where toinsert5.wid = ctemp5.wid and toinsert5.fhid = ctemp5.hid
and toinsert5.hid <> ctemp5.hid);
';
--EXECUTE '
--insert into xc
--SELECT '|| quote_literal(i) || ', * FROM c5;';
EXECUTE '
drop table k1;
drop table k2;
drop table ctemp5;
drop table toinsert5;
';
--i := i+1;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;

View File

@ -0,0 +1 @@
DROP table rr1 cascade;

View File

@ -0,0 +1 @@
DROP table rr2 cascade;

View File

@ -0,0 +1,2 @@
DROP table rr3 cascade;

View File

@ -0,0 +1 @@
DROP table rr4 cascade;

View File

@ -0,0 +1 @@
DROP table rr5 cascade;

View File

@ -0,0 +1 @@
DROP table rr6 cascade;

View File

@ -0,0 +1,4 @@
CREATE TABLE rr1 WITHOUT OIDS AS
SELECT * FROM rt
WHERE (YEARSCH = 17 OR YEARSCH = -1) AND
(CITIZEN = 0 OR CITIZEN = -1);

View File

@ -0,0 +1,3 @@
CREATE TABLE rr2 WITHOUT OIDS AS
SELECT TID,POWSTATE, CITIZEN, IMMIGR FROM rt
WHERE CITIZEN <> 0 AND (ENGLISH > 3 OR ENGLISH =-1);

View File

@ -0,0 +1,6 @@
CREATE TABLE rr3 WITHOUT OIDS AS
SELECT TID, POWSTATE,MARITAL,FERTIL FROM rt
WHERE (FERTIL > 4 OR FERTIL= -1) AND
(MARITAL = 1 OR MARITAL= -1) AND
(POWSTATE = POB OR POWSTATE= -1 OR POB= -1);

View File

@ -0,0 +1,4 @@
CREATE TABLE rr4 WITHOUT OIDS AS
SELECT * FROM rt
WHERE (FERTIL = 1 OR FERTIL = -1) AND
(RSPOUSE = 1 OR RSPOUSE = -1);

View File

@ -0,0 +1,23 @@
--CREATE TABLE rr2 WITHOUT OIDS AS
--SELECT TID,POWSTATE,CITIZEN,IMMIGR FROM rt
--WHERE (CITIZEN <> 0 OR CITIZEN = -1) AND
-- (ENGLISH > 3 OR ENGLISH = -1) AND
-- (POWSTATE > 50 OR POWSTATE = -1);
--CREATE TABLE rr3 WITHOUT OIDS AS
--SELECT TID, POWSTATE,MARITAL,FERTIL FROM rt
--WHERE (FERTIL > 4 OR FERTIL = -1) AND
-- (MARITAL = 1 OR MARITAL = -1) AND
-- (POWSTATE = POB OR POWSTATE = -1 OR POB = -1) AND
-- (POWSTATE > 50 OR POWSTATE = -1);
CREATE TABLE rr5 WITHOUT OIDS AS
SELECT rr2.tid || 'x' || rr3.tid as tid, rr2.powstate1, rr3.powstate2, rr2.citizen, rr2.immigr, rr3.marital,rr3.fertil
FROM rr2 JOIN rr3 ON rr2.powstate1 = rr3.powstate2
WHERE (rr2.powstate1 > 50 or rr2.powstate1 = -1) AND (rr3.powstate2 > 50 or rr3.powstate2 = -1);
--CREATE TABLE rr5 WITHOUT OIDS AS
--SELECT rr2.tid || 'x' || rr3.tid as tid, rr2.powstate1, rr3.powstate2, rr2.citizen, rr2.immigr, rr3.marital,rr3.fertil
--FROM rr2, rr3
--WHERE rr2.powstate1 = rr3.powstate2 and (rr2.powstate1 > 50 or rr2.powstate1 = -1) AND (rr3.powstate2 > 50 or rr3.powstate2 = -1);

View File

@ -0,0 +1,3 @@
CREATE TABLE rr6 AS
SELECT TID,POWSTATE,POB FROM rt
WHERE (ENGLISH = 3 OR ENGLISH = -1);

44
census/Queries/q1.sql Normal file
View File

@ -0,0 +1,44 @@
-- Query 1: 2 sel-const
CREATE TABLE r1 WITHOUT OIDS AS
SELECT * FROM rt
WHERE (YEARSCH = 17 OR YEARSCH = -1) AND (CITIZEN < 1);
CREATE TABLE f1 WITHOUT OIDS AS
SELECT ft.* FROM ft, r1
WHERE ft.tid = r1.tid;
CREATE TABLE c1 WITHOUT OIDS AS
SELECT ct.* FROM ct, f1
WHERE ct.hid=f1.hid and
(CASE WHEN f1.Col = 'YEARSCH'
THEN ct.VALUE = 17
ELSE (CASE WHEN f1.Col = 'CITIZEN'
THEN ct.VALUE = 0
ELSE true
END)
END);
--CREATE TABLE DEL1 WITHOUT OIDS AS
--SELECT TID FROM f1
--WHERE HID NOT IN (SELECT HID FROM c1);
CREATE TABLE DEL1 WITHOUT OIDS AS
SELECT f1.TID FROM f1 left join c1 on (f1.hid = c1.hid)
WHERE c1.hid is null;
DELETE FROM r1
WHERE TID IN (SELECT TID FROM DEL1);
DELETE FROM f1
WHERE TID IN (SELECT TID FROM DEL1);
DELETE FROM c1
WHERE HID NOT IN (SELECT HID FROM f1);

69
census/Queries/q2.sql Normal file
View File

@ -0,0 +1,69 @@
-- Query 2: persons born outside USA that can not speak English well
CREATE TABLE r2 WITHOUT OIDS AS
SELECT TID,POWSTATE,CITIZEN,IMMIGR FROM rt
WHERE (CITIZEN <> 0) AND (ENGLISH > 3 OR ENGLISH = -1);
CREATE TABLE f2 WITHOUT OIDS AS
SELECT ft.* FROM ft, r2
WHERE ft.tid = r2.tid and ft.Col IN ('IMMIGR','CITIZEN','ENGLISH','POWSTATE');
CREATE TABLE c2 WITHOUT OIDS AS
SELECT ct.* FROM ct, f2
WHERE ct.hid=f2.hid and
(CASE WHEN f2.Col = 'CITIZEN'
THEN ct.VALUE <> 0
ELSE (CASE WHEN f2.Col = 'ENGLISH'
THEN ct.VALUE > 3
ELSE f2.Col IN ('POWSTATE','IMMIGR')
END)
END);
--DELETE FROM c2
--where
--exists(select 1 from f2 _a, f2 _b
-- where _a.cid = _b.cid and _a.tid = _b.tid and
-- _a.hid <> _b.hid and _a.hid = c2.hid and
-- c2.wid not in (select wid from c2 _c
-- where _b.hid = _c.hid));
CREATE TABLE twins2 WITHOUT OIDS AS
SELECT _a.hid as left, _b.hid as right
FROM f2 _a, f2 _b
WHERE _a.cid = _b.cid and _a.tid = _b.tid and _a.hid <> _b.hid;
DELETE FROM c2
where
exists(select 1 from twins2
where twins2.left = c2.hid and
c2.wid not in (select wid from c2 _c
where twins2.right = _c.hid));
--CREATE TABLE DEL2 WITHOUT OIDS AS
--SELECT TID FROM f2
--WHERE HID NOT IN (SELECT HID FROM c2);
CREATE TABLE DEL2 WITHOUT OIDS AS
SELECT f2.TID FROM f2 left join c2 on (f2.hid = c2.hid)
WHERE c2.hid is null;
DELETE FROM r2
WHERE TID IN (SELECT TID FROM DEL2);
DELETE FROM f2
WHERE TID IN (SELECT TID FROM DEL2) OR Col = 'ENGLISH';
--DELETE FROM f2
--WHERE TID IN (SELECT TID FROM DEL2) OR Col NOT IN ('POWSTATE','IMMIGR','CITIZEN');
DELETE FROM c2
WHERE HID NOT IN (SELECT HID FROM f2);

99
census/Queries/q3.sql Normal file
View File

@ -0,0 +1,99 @@
-- Query 3: prj(sel-const+sel-join)
CREATE TABLE r3 WITHOUT OIDS AS
SELECT TID, POWSTATE,POB,MARITAL,FERTIL FROM rt
WHERE (FERTIL > 4 OR FERTIL = -1) AND
(MARITAL = 1 OR MARITAL = -1) AND
(POWSTATE = POB OR POWSTATE = -1 OR POB = -1);
CREATE TABLE f3 WITHOUT OIDS AS
SELECT ft.* FROM ft, r3
WHERE ft.tid = r3.tid AND ft.Col IN ('MARITAL','FERTIL','POB','POWSTATE');
CREATE TABLE consts3 WITHOUT OIDS AS
SELECT HID, POWSTATE, POB
FROM r3, f3
WHERE r3.TID = f3.TID AND
(POWSTATE = -1 AND POB <> -1 OR POWSTATE <> -1 AND POB = -1) AND
(CASE WHEN POWSTATE = -1 THEN f3.COL = 'POWSTATE' ELSE f3.COL = 'POB' END);
----------
create unique index consts3idx on consts3(hid);
SET ENABLE_SEQSCAN=OFF;
----------
CREATE TABLE c3 WITHOUT OIDS AS
select ct.* from ct, f3
where ct.hid=f3.hid and
(CASE WHEN f3.Col = 'FERTIL'
THEN ct.VALUE > 4
ELSE (CASE WHEN f3.Col = 'MARITAL'
THEN ct.VALUE = 1
ELSE f3.Col IN ('POWSTATE','POB') AND
(NOT EXISTS (SELECT 1 FROM consts3 where consts3.hid = f3.hid) OR
(CASE WHEN f3.Col = 'POWSTATE' THEN
EXISTS (SELECT 1 from consts3 WHERE consts3.hid = f3.hid AND consts3.POB = ct.VALUE)
ELSE (CASE WHEN f3.Col = 'POB' THEN EXISTS (SELECT 1 from consts3 WHERE consts3.hid = f3.hid AND consts3.POWSTATE = ct.VALUE) END)
END))
END)
END);
----------
SET ENABLE_SEQSCAN=ON;
----------
DELETE FROM f3
WHERE HID NOT IN (SELECT HID FROM c3);
------------------------------------
-- apply the join condition POWSTATE=POB
SELECT 1 from filter3();
-------------------------------------
--DELETE FROM c3
--where
--exists(select 1 from f3 _a, f3 _b
-- where _a.cid = _b.cid and _a.tid = _b.tid and
-- _a.hid <> _b.hid and _a.hid = c3.hid and
-- c3.wid not in (select wid from c3 _c
-- where _b.hid = _c.hid));
CREATE TABLE twins3 WITHOUT OIDS AS
SELECT _a.hid as left, _b.hid as right
FROM f3 _a, f3 _b
WHERE _a.cid = _b.cid and _a.tid = _b.tid and _a.hid <> _b.hid;
DELETE FROM c3
where
exists(select 1 from twins3
where twins3.left = c3.hid and
c3.wid not in (select wid from c3 _c
where twins3.right = _c.hid));
--CREATE TABLE DEL3 WITHOUT OIDS AS
--SELECT TID FROM f3
--WHERE HID NOT IN (SELECT HID FROM c3);
CREATE TABLE DEL3 WITHOUT OIDS AS
SELECT f3.TID FROM f3 left join c3 on (f3.hid = c3.hid)
WHERE c3.hid is null;
DELETE FROM r3
WHERE TID IN (SELECT TID FROM DEL3);
DELETE FROM f3
WHERE TID IN (SELECT TID FROM DEL3) OR Col = 'POB';
--DELETE FROM f3
--WHERE TID IN (SELECT TID FROM DEL3) OR Col NOT IN ('POWSTATE','MARITAL','FERTIL');
DELETE FROM c3
WHERE HID NOT IN (SELECT HID FROM f3);

43
census/Queries/q4.sql Normal file
View File

@ -0,0 +1,43 @@
-- Query 4: 3 sel-const
CREATE TABLE r4 WITHOUT OIDS AS
SELECT * FROM rt
WHERE (FERTIL = 1 OR FERTIL = -1) AND
(RSPOUSE = 1 OR RSPOUSE = 2 OR RSPOUSE = -1);
CREATE TABLE f4 WITHOUT OIDS AS
SELECT ft.* FROM ft, r4
WHERE ft.tid = r4.tid;
CREATE TABLE c4 WITHOUT OIDS AS
SELECT ct.* FROM ct, f4
WHERE ct.hid = f4.hid and
(CASE WHEN f4.Col = 'FERTIL'
THEN ct.VALUE = 1
ELSE (CASE WHEN f4.Col = 'RSPOUSE'
THEN ct.VALUE = 1 OR ct.VALUE = 2
ELSE true
END)
END);
--CREATE TABLE DEL4 WITHOUT OIDS AS
--SELECT TID FROM f4
--WHERE HID NOT IN (SELECT HID FROM c4);
CREATE TABLE DEL4 WITHOUT OIDS AS
SELECT f4.TID FROM f4 left join c4 on (f4.hid = c4.hid)
WHERE c4.hid is null;
DELETE FROM r4
WHERE TID IN (SELECT TID FROM DEL4);
DELETE FROM f4
WHERE TID IN (SELECT TID FROM DEL4);
DELETE FROM c4
WHERE HID NOT IN (SELECT HID FROM f4);

127
census/Queries/q5.sql Normal file
View File

@ -0,0 +1,127 @@
-- Query 5: join of r2(tid,powstate1,citizen,immigr) and r3(tid,powstate2,marital,fertil) on powstate1 <> powstate2
-- the following two should be done outside
update f2
SET Col='POWSTATE1' WHERE Col='POWSTATE';
update f3
SET Col='POWSTATE2' WHERE Col='POWSTATE';
-- clean r2 and r3 with the selection condition
DELETE FROM r2
WHERE not(powstate1 > 50 or powstate1 = -1);
DELETE FROM r3
WHERE not(powstate2 > 50 or powstate2 = -1);
-- create r5 as r2 x r3
CREATE TABLE r5 WITHOUT OIDS AS
SELECT r2.TID || 'x' || r3.TID as TID, r2.tid as tid1, r3.tid as tid2, r2.POWSTATE1, r2.CITIZEN, r2.IMMIGR, r3.POWSTATE2, r3.MARITAL,r3.FERTIL
FROM r2, r3
WHERE
r2.POWSTATE1 = r3.POWSTATE2 OR r2.POWSTATE1 = -1 OR r3.POWSTATE2 = -1;
DELETE FROM f2
WHERE tid not in (select tid1 from r5);
DELETE FROM f3
WHERE tid not in (select tid2 from r5);
DELETE FROM c2
WHERE
hid not in (
select hid from f2
where (case when f2.col in ('POWSTATE1','POWSTATE2')
then c2.value > 50
else true end)
);
DELETE FROM c3
WHERE
hid not in (
select hid from f3
where (case when f3.col in ('POWSTATE1','POWSTATE2')
then c3.value > 50
else true end)
);
-- ALTER TABLE r5 DROP tid1;
-- ALTER TABLE r5 DROP tid2;
-- create new holes: f5 = f2 x r3 union all f3 x r2
CREATE TABLE f5 WITHOUT OIDS AS
SELECT 'r5' as relation, f2.tid || 'x' || r3.tid as tid, f2.col, f2.cid, '1.' || f2.hid || 'x' || r3.tid as hid
FROM f2,r3
union all
SELECT 'r5' as relation, f3.tid || 'x' || r2.tid as tid, f3.col, f3.cid, '2.' || f3.hid || 'x' || r2.tid as hid
FROM f3,r2;
--create unique index f5hididx on f5(hid);
CREATE TABLE consts5 WITHOUT OIDS AS
SELECT HID, POWSTATE1, POWSTATE2
FROM r5, f5
WHERE r5.TID = f5.TID AND
(POWSTATE1 = -1 AND POWSTATE2 <> -1 OR POWSTATE1 <> -1 AND POWSTATE2 = -1) AND
(CASE WHEN POWSTATE1 = -1 THEN f5.COL = 'POWSTATE1' ELSE f5.COL = 'POWSTATE2' END);
create unique index consts5idx on consts5(hid);
SET ENABLE_SEQSCAN=OFF;
-- create the new component relation
create table c5 without oids as
SELECT c2.cid, '1.' || c2.hid || 'x' || r3.tid as hid, c2.wid, c2.value
FROM c2,r3
union all
SELECT c3.cid, '2.' || c3.hid || 'x' || r2.tid as hid, c3.wid, c3.value
FROM c3,r2;
delete from c5 where
hid in
(select hid from consts5
where consts5.POWSTATE2 <> c5.VALUE and consts5.POWSTATE1 <> c5.VALUE);
SET ENABLE_SEQSCAN=ON;
DELETE FROM f5
WHERE HID NOT IN (SELECT HID FROM c5);
-------------------------------------
-- apply now the join condition POWSTATE1 = POWSTATE2 on the relation c5
SELECT 1 from filter5();
-------------------------------------
CREATE TABLE DEL5 WITHOUT OIDS AS
SELECT f5.TID FROM f5 left join c5 on (f5.hid = c5.hid)
WHERE c5.hid is null;
DELETE FROM r5
WHERE TID IN (SELECT TID FROM DEL5);
DELETE FROM f5
WHERE TID IN (SELECT TID FROM DEL5);
DELETE FROM c5
WHERE HID NOT IN (SELECT HID FROM f5);

62
census/Queries/q6.sql Normal file
View File

@ -0,0 +1,62 @@
-- Q6: US citizens or foreigners speaking English well
CREATE TABLE r6 WITHOUT OIDS AS
SELECT TID, POWSTATE,POB FROM rt
WHERE (ENGLISH = 3 OR ENGLISH = -1);
CREATE TABLE f6 WITHOUT OIDS AS
SELECT ft.* FROM ft, r6
WHERE ft.tid = r6.tid AND ft.Col IN ('ENGLISH','POWSTATE','POB');
CREATE TABLE c6 WITHOUT OIDS AS
SELECT ct.* FROM ct, f6
WHERE ct.hid=f6.hid and
(CASE WHEN f6.Col = 'ENGLISH'
THEN ct.VALUE = 3
ELSE f6.Col IN ('POWSTATE','POB')
END);
--DELETE FROM c6
--where
--exists(select 1 from f6 _a, f6 _b
-- where _a.cid = _b.cid and _a.tid = _b.tid and
-- _a.hid <> _b.hid and _a.hid = c6.hid and
-- c6.wid not in (select wid from c6 _c
-- where _b.hid = _c.hid));
CREATE TABLE twins6 WITHOUT OIDS AS
SELECT _a.hid as left, _b.hid as right
FROM f6 _a, f6 _b
WHERE _a.cid = _b.cid and _a.tid = _b.tid and _a.hid <> _b.hid;
DELETE FROM c6
where
exists(select 1 from twins6
where twins6.left = c6.hid and
c6.wid not in (select wid from c6 _c
where twins6.right = _c.hid));
--CREATE TABLE DEL6 WITHOUT OIDS AS
--SELECT TID FROM f6
--WHERE HID NOT IN (SELECT HID FROM c6);
CREATE TABLE DEL6 WITHOUT OIDS AS
SELECT f6.TID FROM f6 left join c6 on (f6.hid = c6.hid)
WHERE c6.hid is null;
DELETE FROM r6
WHERE TID IN (SELECT TID FROM DEL6);
--DELETE FROM f6
--WHERE TID IN (SELECT TID FROM DEL6) OR Col NOT IN ('POWSTATE','POB');
DELETE FROM f6
WHERE TID IN (SELECT TID FROM DEL6) OR Col = 'ENGLISH';
DELETE FROM c6
WHERE HID NOT IN (SELECT HID FROM f6);

View File

@ -0,0 +1,56 @@
create or replace view comp_sizes as
select size, count(*) as cnt
from (select cid, count(*) as size from ft group by cid) f
group by size;
create or replace function stat(sc varchar) returns void as
$$
DECLARE
BEGIN
insert into results
select distinct
now() as t,
sc as scenario,
(select count(*) from rt) as r_size,
(select count(*) from ft) as f_size,
(select count(*) from ct) as c_size,
(select count(distinct cid) from ft) as num_components,
coalesce((select cnt from comp_sizes where size = 1), 0) as comp_size1,
coalesce((select cnt from comp_sizes where size = 2), 0) as comp_size2,
coalesce((select cnt from comp_sizes where size = 3), 0) as comp_size3,
coalesce((select sum(cnt) from comp_sizes where size >= 4), 0) as comp_size4_and_more;
RETURN;
END;
$$
language plpgsql;
create or replace function result_stat(sc varchar, Q varchar, R varchar, F varchar, C varchar) returns void as
$$
DECLARE
BEGIN
EXECUTE 'create or replace view q_comp_sizes as
select size, count(*) as cnt
from (select cid, count(*) as size from ' || F || ' group by cid) f
group by size;';
EXECUTE 'insert into q_results
select distinct
now() as t,' ||
quote_literal(sc) || ' as scenario, ' ||
quote_literal(Q) || ' as query,
(select count(*) from ' || R || ') as r_size,
(select count(*) from ' || F || ') as f_size,
(select count(*) from ' || C || ') as c_size,
(select count(distinct cid) from ' || F || ') as num_components,
coalesce((select cnt from q_comp_sizes where size = 1), 0) as comp_size1,
coalesce((select cnt from q_comp_sizes where size = 2), 0) as comp_size2,
coalesce((select cnt from q_comp_sizes where size = 3), 0) as comp_size3,
coalesce((select sum(cnt) from q_comp_sizes where size >= 4), 0) as comp_size4_and_more;';
RETURN;
END;
$$
language plpgsql;

100
census/README Normal file
View File

@ -0,0 +1,100 @@
This is a noise generator for the census data set.
The generator is implemented in Java & JDBC and uses as storage engine PostgreSQL.
----------------
Data
----------------
The generator works with the anonymized US census data set, which can be obtained from [2].
----------------
Data Format
----------------
The generator generates data in UWSDT format as used by MayBMS v1 and described in reference [1]. In short, the schema of the database contains the following relations:
rt(<column1>, ..., tid int)
ft(Relation text, tid int, col text, cid text, hid text)
ct(cid text, hid text, wid text, value int)
wt(cid text, wid text)
----------------
Files:
----------------
* DataNoise/
introduces noise in the data set by inserting additional values in randomly selected fields, that is, by replacing certain values by or-sets.
* Chase/
implements the chase procedure to ensures a given set of equality-generating dependencies hold on the data.
* Queries/
example queries on the census data.
* generate_data.sh
example script to generate uncertain data.
* run_queries.sh
example script to run queries on the census data.
Note: DataNoise and Chase are not constrained on the census data set; the configuration files allow for use with other data sets and schemata. Currently the programs are constrained to work with a single relation only but the data model allows for multiple relations and there is no conceptual difficulty in extending the code to support that.
Below is a detailed description of the subprojects.
----------------
DataNoise
----------------
Inserts additional values to randomly selected fields of the relation.
To compile:
javac -sourcepath DataNoise/src -d DataNoise/bin/ DataNoise/src/*.java
To run follow the steps:
1. Create index on the template relation <rt> if one does not exist:
create index rtididx on rt(tid);
2. java -classpath DataNoise/bin:DataNoise/jdbc3.jar Main <rt> <ft> <ct> <size> <arity> <noise_ratio> <settings.xml> | psql -d <database>
The following parameters have to be specified:
- rt, ft, ct: names of the template, mapping and component relation, respectively
- size: size of the template relation rt where noise should be introduced. Tuples in rt are expected to have tuple id tid ranging from 1 to size.
- arity: arity of the relation (not counting the tid column)
- noise_ratio: percentage of the fields in the relation that should contain uncertainty.
- settings.xml: file with additional settings in xml format, including the following:
* dbname, username, password: database name, user name and password, respectively
* maxholespertuple: max number of uncertain fields per tuple
* maxholesize: max number of values to generate for an uncertain field
* vars: description of attribute names and possible values
See DataNoise/settings.xml for more information.
In short, the algorithm works as follows. Given the desired <noise_ratio>, it selects an appropriate number of tuples to add noise to, and then chooses for each of them uniformly at random between 1 and <maxholespertuple> fields. Those fields are then replaced by or-sets by inserting additional values from the attribute domain, where the max number of values to insert is controlled by the <maxholesize> argument and is selected uniformly at random between 1 and the min of <maxholsize> and domain size. The original value of the field is preserved. The output of the program is the sequence SQL functions needed to execute the updates, which can be streamed to psql or saved in a file for later execution.
----------------
Chase
----------------
Chases a given set of dependencies on a UWSDT, see [1] for more details. Assumes that the UWSDT relations (template rt, mappping ft, component ct) exist. The UWSDT need not be an or-set database, it can be a general UWSDT. Currently, only one (template) relation is supported.
To compile:
javac -sourcepath Chase/src -d Chase/bin/ Chase/src/*.java
To run follow the steps:
1. Load language pl/pgSQL in PostgreSQL (in case not already done):
psql -d <database> -c 'create language plpgsql'
2. Load the prsel pl/pgSQL function needed for the chase procedure:
psql -d <database> -f Chase/prsel.sql
2. Create indices on relations:
psql -d <database> -c 'create index ftididx on ft(tid);'
psql -d <database> -c 'create index fcididx on ft(cid);'
psql -d <database> -c 'create index fhididx on ft(hid);'
psql -d <database> -c 'create index ccididx on ct(cid);'
psql -d <database> -c 'create index chididx on ct(hid);'
java -classpath Chase/bin:Chase/jdbc3.jar Main <rt> <ft> <ct> deps.xml
The following parameters have to be specified:
- rt, ft, ct: names of the template, mapping and component relation, respectively
- dependencies.xml: xml file specifying the following
* dbname, username, password: database name, user name and password, respectively
* rules: description of the dependencies to be chased. The currently supported type of dependencies is 'eq', equality-generating dependencies of the form
IF Attr1 \theta c1 [and ...] THEN Attr0 \theta c0
See Chase/dependencies.xml for more information.
----------------
References:
----------------
[1] "10^10^6 Worlds and Beyond: Efficient Representation and Processing of Incomplete Information", Lyublena Antova, Christoph Koch, Dan Olteanu, Proc. ICDE 2007
[2] "Integrated Public Use Microdata Series: V3.0", Steven Ruggles et al., 2004, http://www.ipums.org

73
census/generate_data.sh Normal file
View File

@ -0,0 +1,73 @@
# GENERATE DATA
# define scenarios
DB=postgres
USER=postgres
DATA_DIR=data
declare sizes_s=( 100K 500K 750K 1M 5M 7.5M 10M 12.5M )
declare sizes=( 100000 500000 750000 1000000 5000000 7500000 10000000 12491667 )
declare noise=( 0.00005 0.0001 0.0005 0.001 0.005 0.01 0.05 0.1 )
for (( i = 0; i < 8; i++ ))
do
SIZE=${sizes[$i]}
SIZE_S=${sizes_s[$i]}
for (( j = 0; j < 4; j++ ))
do
NOISE=${noise[$j]}
SCENARIO=n${SIZE_S}x${NOISE}
echo
echo '---------------------------------------------'
echo $SCENARIO
echo 'Time: ' `date`
echo '---------------------------------------------'
# drop tables
psql -q -d $DB -U $USER -c 'drop table rt cascade;'
psql -q -d $DB -U $USER -c 'drop table ft cascade;'
psql -q -d $DB -U $USER -c 'drop table ct cascade;'
psql -q -d $DB -U $USER -c 'drop table wt cascade;'
# create tables
psql -q -d $DB -U $USER -c 'create table rt AS SELECT * FROM pums WHERE tid <= '${SIZE}';'
psql -q -d $DB -U $USER -c 'create index rtididx on rt(tid);'
# introduce noise
echo "Generating Noise: $SCENARIO ... "
/usr/bin/time -f "%e sec" java -classpath DataNoise/bin:DataNoise/jdbc3.jar Main rt ft ct $SIZE $NOISE DataNoise/settings.xml | psql -U $USER -d $DB
echo "Dumping or-set relations..."
# dump or-set relations before chase
pg_dump -U $USER -O -t rt -t ft -t ct -t wt $DB | gzip > $DATA_DIR/$SCENARIO.or.dump.gz
# chase
echo
echo "Chasing..."
# make sure temporary results are cleaned
psql -U $USER -d $DB -c 'drop table eqrel;'
psql -U $USER -d $DB -c 'drop table eqmap;'
psql -U $USER -d $DB -c 'drop table eqcomp;'
psql -U $USER -d $DB -c 'drop table eqworld;'
psql -U $USER -d $DB -c 'drop table invalid;'
psql -d $DB -U $USER -f Chase/prsel.sql
psql -U $USER -d $DB -c 'create index ftididx on ft(tid);'
psql -U $USER -d $DB -c 'create index fcididx on ft(cid);'
psql -U $USER -d $DB -c 'create index fhididx on ft(hid);'
psql -U $USER -d $DB -c 'create index ccididx on ct(cid);'
psql -U $USER -d $DB -c 'create index chididx on ct(hid);'
/usr/bin/time -f "%e sec" java -classpath Chase/bin:Chase/jdbc3.jar Main rt ft ct Chase/dependencies.xml
# dump tables
echo
echo "Dumping chased relations..."
pg_dump -U $USER -O -t rt -t ft -t ct -t wt $DB | gzip > $DATA_DIR/$SCENARIO.dump.gz
done
done

125
census/run_queries.sh Normal file
View File

@ -0,0 +1,125 @@
# RUN QUERIES
# define scenarios
DB=postgres
USER=postgres
DATA_DIR=data
QUERIES_DIR=queries
declare sizes_s=( 100K 500K 750K 1M 5M 7.5M 10M 12.5M )
declare sizes=( 100000 500000 750000 1000000 5000000 7500000 10000000 12491667 )
declare noise=( 0.00005 0.0001 0.0005 0.001 )
declare queries=( q1.sql q2.sql q3.sql q4.sql q5.sql q6.sql )
# create tables for storing results statistics
psql -d $DB -U $USER -c 'create table results(t timestamp, scenario varchar, r_size int, f_size int, c_size int,
num_components int, comp_size1 int, comp_size2 int,
comp_size3 int, comp_size4_and_more int);'
psql -d $DB -U $USER -c 'create table q_results(t timestamp, scenario varchar,
query varchar, r_size int, f_size int, c_size int,
num_components int, comp_size1 int, comp_size2 int,
comp_size3 int, comp_size4_and_more int);'
for (( i = 0; i < 8; i++ ))
do
SIZE=${sizes[$i]}
SIZE_S=${sizes_s[$i]}
for (( j = 0; j < 4; j++ ))
do
NOISE=${noise[$j]}
SCENARIO=n${SIZE_S}x${NOISE}
echo
echo '---------------------------------------------'
echo $SCENARIO
echo 'Time: ' `date`
echo '---------------------------------------------'
# drop tables
psql --quiet -d $DB -U $USER -c 'drop table rt cascade;'
psql --quiet -d $DB -U $USER -c 'drop table ft cascade;'
psql --quiet -d $DB -U $USER -c 'drop table ct cascade;'
psql --quiet -d $DB -U $USER -c 'drop table wt cascade;'
# load data
DUMP_FILE=$SCENARIO.dump.gz
echo 'Load data...'
/usr/bin/time -f "%e sec" gunzip -c $DATA_DIR/$DUMP_FILE | psql --quiet -d $DB -U $USER
echo 'done.'
#TODO: vacuum + analyze
echo 'Vacuum + analyze ...'
psql --quiet -d $DB -U $USER -c 'vacuum analyze;'
echo 'done.'
# gather statistics about the data
echo 'Gather statistics...'
psql --quiet -d $DB -U $USER -f $QUERIES_DIR/statistics.sql
psql --quiet -d $DB -U $USER -c "select stat('$SCENARIO') as input_statistics;"
echo 'done.'
# run queries
for (( k = 1; k <= 6; k++ ))
do
echo "Query $k"
echo "---------"
QUERY=${queries[$k-1]}
CLEAN_QUERY="clean_$QUERY"
ONEWORLD_QUERY="oneworld_$QUERY"
ONEWORLD_CLEAN_QUERY="oneworld_$CLEAN_QUERY"
if [ $k -eq 5 ] # rename columns for the join in Q5
then
echo 'renaming columns'
psql -d $DB -U $USER -c 'alter table r2 rename POWSTATE to POWSTATE1;'
psql -d $DB -U $USER -c 'alter table r3 rename POWSTATE to POWSTATE2;'
fi
if [ $l -eq 1 ] # only run the one-world query the first time for the given size
then
# clean old oneworld results
psql -d $DB -U $USER -f $QUERIES_DIR/$ONEWORLD_CLEAN_QUERY
if [ $k -eq 5 ] # rename columns for the join in Q5
then
echo 'renaming columns'
psql -d $DB -U $USER -c 'alter table rr2 rename POWSTATE to POWSTATE1;'
psql -d $DB -U $USER -c 'alter table rr3 rename POWSTATE to POWSTATE2;'
fi
# run oneworld query
echo "Query $k one world"
echo "---------"
/usr/bin/time -f "%e sec" psql -d $DB -U $USER -f $QUERIES_DIR/$ONEWORLD_QUERY
fi
# clean old results
psql --quiet -d $DB -U $USER -f $QUERIES_DIR/$CLEAN_QUERY
# load pg/plSQL scripts for joins
if [ $k -eq 3 ];
then
psql -d $DB -U $USER -f $QUERIES_DIR/fun-q3.sql;
elif [ $k -eq 5 ];
then
psql -d $DB -U $USER -f $QUERIES_DIR/fun-q5.sql;
fi
# run query
echo "Query $k world-set, run $l"
echo "-------------------------"
/usr/bin/time -f "%e sec" psql -d $DB -U $USER -f $QUERIES_DIR/$QUERY
# gather statistics about the results
if [ $l -eq 1 ]
then
R="r$k"
F="f$k"
C="c$k"
psql -d $DB -U $USER -f $QUERIES_DIR/statistics.sql
psql -d $DB -U $USER -c "select result_stat('$SCENARIO', '$k', '$R', '$F', '$C') as result_statistics;"
fi
done
done
done

17
import-www-data.sql Normal file
View File

@ -0,0 +1,17 @@
/**
** Imports data from the web graph dataset.
** Creates two relations n(u) and e(u,v) storing the nodes and edges, respectively.
*/
drop table n;
drop table e;
create table n(u int);
create table e(u int, v int);
copy e from './www.dat' with delimiter as ' ';
insert into n
select u from e
union
select v from e;

91
init-www-graph.sql Normal file
View File

@ -0,0 +1,91 @@
/**
** Initializes a random graph from the subgraph of the WWW graph by
** assigning probabilities to the edges in the following way:
** p(u,v) = degree(u) / max degree(w)
**
** Assumes as input two tables n0(u) and e0(u,v).
*/
drop table to_subset cascade;
drop table in_degree;
drop table out_degree;
drop table e1;
drop table edge0;
drop table no_edge0;
drop table edge;
drop table no_edge;
/*
Create random graph example
*/
create table in_degree as
(
select v, count(*) as d
from e0
group by v
)
union
(
select u, 0 as d
from e0
where u not in (select v from e0)
);
create table out_degree as
(
select u, count(*) as d
from e0
group by u
)
union
(
select v, 0 as d
from e0
where v not in (select u from e0)
);
create table e1
as select e0.u, e0.v, 1 as bit, o.d::float4 / (select max(d) from out_degree)::float4 as p
from e0, out_degree o
where e0.u = o.u;
insert into e1
select u, v, 0 as bit, 1 - p as p
from e1
where p != 1;
/* This table represents all subsets of the total order over node as possible
worlds.
*/
create table to_subset as
(
repair key u,v in e1
weight by p
);
create table edge0 as (select u,v from to_subset where bit=1);
create table no_edge0 as (select u,v from to_subset where bit=0);
/* add the missing edges to no_edge0 */
insert into no_edge0
select n1.u as u, n2.u as v
from n0 n1, n0 n2
where (n1.u,n2.u) not in (select u,v from e0);
--create table edge as (select * from edge0);
--insert into edge (select v as u, u as v from edge0);
--create table no_edge as (select * from no_edge0);
--insert into no_edge (select v as u, u as v from no_edge0);

39
init-www-nodes.sql Normal file
View File

@ -0,0 +1,39 @@
/**
** Initializes relaitons storing a subgraph of the WWW graph.
** Creates tables n0(u) and e0(u,v) for the nodes and edges of the subgraph,
** respectively. The function expects the WWW graph to be given by relations
** n and e, and accepts as parameter number r giving the ratio of the original
** graph nodes to retain.
**/
drop table n0;
drop table e0;
/*
Create a node relation
*/
create table n0 (u integer);
create table e0 (u integer, v integer);
drop function init_www_nodes(float4 );
create or replace function init_www_nodes(r float4) returns void as
$$
DECLARE
i int;
BEGIN
/* create a subset of the node relation by keepeing nodes uniformly at random
with the specified probability r */
insert into n0
select * from n
where random() < r;
/* compute the edge relation induced by the subset of the nodes n0 */
insert into e0
select * from e
where u in (select u from n0) and v in (select u from n0);
RETURN;
END;
$$
language plpgsql;

11920
movie_data.sql Normal file

File diff suppressed because it is too large Load Diff

57
movie_query.triql Normal file
View File

@ -0,0 +1,57 @@
-- Create new uncertain Trio tables (without lineage)
-- from the initial relational tables
-- The new tables will be treated as base tables in Trio,
-- with tuple alternatives and confidences attached to them
create table T_Movies as
select nolineage groupalts(movie_id) *, uniform as conf from Movies;
create table T_Ratings as
select nolineage groupalts(movie_id, cust_id, date)
movie_id, cust_id, date, rating, confidence as conf from Ratings;
-- Select very uncertain movies: wide range of possible years
-- Assign confidence values uniformly to alternatives
create table U_Movies as
select *, uniform as conf
from T_Movies
where [max(year) - min(year)] > 30;
-- Join in recent high-confidence ratings for these movies
create table M_Ratings as
select merged title, year, date, rating
from T_Ratings R, U_Movies M
where R.movie_id = M.movie_id
and R.date like '2005%'
and conf(R) >= 0.4;
-- Get directors back using lineage
create table Dir_Ratings as
select R.title, R.year, R.rating, M.director
from M_Ratings R, U_Movies M
where R ==> M;
-- Find directors of what are probably series: more than
-- one alternative with same director, different year
create table Series as
select merged movie_id, director, uniform as conf
from T_Movies M1
where exists
[select * from T_Movies M2
where M2.director = M1.director
and M2.year <> M1.year];
-- Find controversial series: high variance in ratings
select merged S.movie_id, S.director,
R1.rating as rating1,
R2.rating as rating2
from Series S, T_Ratings R1, T_Ratings R2
where S.movie_id = R1.movie_id
and S.movie_id = R2.movie_id
and R1.rating - R2.rating >=3;

71
pdbench/Datasets Normal file
View File

@ -0,0 +1,71 @@
Note: This page is under construction! Explanations and further entries will be added soon!
==Data Generator: Uncertain TPC-H for MayBMS (and Trio)==
|| Availability: || [[http://pdbench.sourceforge.net/MayBMS-tpch.tgz|here]] (580kb .tgz file) ||
|| Contributors: || MayBMS team ||
|| Tags: || Discrete distributions; complex conditions aka "external lineage" ||
|| Comments: || This is a modification of the standard TPC-H data generator. Includes a data generator (C code, an extension of the standard TPC-H data generator), queries, a translator from attribute-level to tuple-level U-relations (PLSQL script), and a translator from tuple-level U-relations to Trio ULDBs (PLSQL script). ||
|| References: || Lyublena Antova, Thomas Jansen, Christoph Koch, Dan Olteanu. "Fast and Simple Relational Processing of Uncertain Data". //Proc. ICDE 2008//. ||
==Data Generator: Uncertain TPC-H for Trio==
|| Availability: || [[http://pdbench.sourceforge.net/TPCH-Trio.zip|here]] (3kB .zip file) ||
|| Contributors: || Trio team ||
|| Tags: || ||
|| Comments: || Two Python data generators for generating vertical and horizontal partitionings of the various TPCH tables into Trio tables with x-tuples ("run_horizontal_partitioner.sh" shows how to run the horizontal partitioner, for example). The archive also contains some DDL commands for some example schema definitions in Trio. (1) The horizontal partitioner simply groups the relational tuples from a given TPCH table into Trio alternatives with a uniform distribution of confidences. That is, all alternatives of the same group become mutually exclusive (which basically ignores the original TPCH keys for the resulting possible worlds). The number of alternatives per x-tuple, the number of partitions, etc., can be adjusted via various parameters. (2) The vertical partitioner can furthermore split a TPCH table into its individual attributes. Here, the original TPCH keys may be maintained and can be used to reconstruct the original PW's of the TPCH database. For the TPCH settings the Trio team can also provide readily extracted data dumps (> 1 GB) and the queries that were used for two previous papers. The queries are in also TriQL syntax and probably quite specific to our settings, i.e., they would already be actual benchmarks for measuring confidence computations and update performances in Trio. ||
|| References: || ||
==Dataset: IMDB Movies==
|| Availability: || [[http://pdbench.sourceforge.net/movie_data.zip|IMDB Movies and Netflix ratings dataset]] (100kB zipped SQL file); [[http://pdbench.sourceforge.net/movie_query.triql|TriQL queries]] ||
|| Contributors: || Trio team ||
|| Tags: || ||
|| Comments: || This is the IMDB data and query set which is also used for the Trio online demo. It models an "uncertain" data integration scenario between IMDB movies and Netflix ratings, along with a few queries that have been used for the demo (both the data and query files are in TriQL syntax). IMDB movies have been grouped together into x-tuples by similar titles, and each original Netflix rating has been artificially extended by additional rating alternatives according to some normal confidence distribution around the original (i.e., actual Netflix) rating. ||
|| References: || ||
==Dataset: RFID Sensor Data==
|| Availability: || soon ||
|| Contributors: || Mystiq team ||
|| Tags: || ||
|| Comments: || ||
|| References: || ||
==Dataset: Data integration==
|| Availability: || soon ||
|| Contributors: || Twente team ||
|| Tags: || ||
|| Comments: || Tasks: Turn the Twente probabilistic information integrator into a data generator; produce a dataset in the movie rating domain. **Source 1**: XML-file constructed by extracting data from [[http://www.tvguide.com/|http://www.tvguide.com]] (unfortunately, TVguide.com discontinued its movies recommendation service; changing to top 100 most popular): http://library.cs.utwente.nl/xquery/docs/tvguidemostpopular.xml (100 movies; 700kB) **Source 2**: XML-file constructed from moviedb-3.24 (see for example uiarchive.cso.uiuc.edu in /pub/info/imdb/tools) http://library.cs.utwente.nl/xquery/docs/ImdbRestricted.xml (243856 movies; 241MB; all movies with at least one genre which is not "Documentary" or "Adult). **[ToDo]** Data generator based on probabilistic XML information integrator for these two sources. ||
|| References: || de Keijzer, A. and van Keulen, M. (2008) [[http://eprints.eemcs.utwente.nl/11232/|//IMPrECISE: Good-is-good-enough data integration.//]] ICDE 2008 (demo). van Keulen, M. and de Keijzer, A. and Alink, W. (2005) [[http://eprints.eemcs.utwente.nl/7273/|//A probabilistic XML approach to data integration.//]] ICDE 2005. ||
==Dataset, Generator: IPUMS US census data==
|| Availability: || Anonymized subset of the US census: http://usa.ipums.org/usa/; uncertainty generator: soon; small example involving vertical decompositioning and data cleaning: [[http://maybms.cvs.sourceforge.net/viewvc/maybms/maybms/examples/census.sql?view=markup|here]] ||
|| Contributors: || MayBMS team ||
|| Tags: || Discrete distributions; or-sets; data cleaning; conditional probability tables ||
|| Comments: || Tasks: Contribute MayBMS data generator which reintroduces uncertainty into the census data, and queries. ||
|| References: || ||
==Nascent Use Case: Skills Management==
|| Availability: || [[http://maybms.cvs.sourceforge.net/viewvc/maybms/maybms/examples/companies.sql?view=markup|here]] ||
|| Contributors: || MayBMS team ||
|| Tags: || Discrete distributions; conditional probability tables ||
|| Comments: || ||
|| References: || ||
==Nascent Use Case: Random Graphs and Social Networks==
|| Availability: || [[http://maybms.cvs.sourceforge.net/viewvc/maybms/maybms/examples/randgraph.sql?view=markup|here]] ||
|| Contributors: || MayBMS team ||
|| Tags: || Discrete distributions ||
|| Comments: || Confidence computation is challenging because the DNFs/the lineage get very large and does not decompose using any of the known techniques. ||
|| References: || ||
==Nascent Use Case: Analyzing Web Graphs==
|| Availability: || WWW data set: [[http://www.nd.edu/~networks/resources/www/www.dat.gz|here]], SQL scripts: [[http://pdbench.sourceforge.net/webgraph.zip|here]] (3kB .zip file) ||
|| Contributors: || MayBMS team ||
|| Tags: || Discrete distributions ||
|| Comments: || A variation of the random graph example using web graph data. The edges are assigned probability relative to the degree of the end nodes, thus the graph has few edges with high probability, and the majority of edges have low probability. The dataset contains example queries finding the probability for occurrence of a pattern in the random graph, such as for example a triangle etc. ||
|| References: || Dataset due to Réka Albert, Hawoong Jeong and Albert-László Barabási: Diameter of the World Wide Web Nature 401, 130 (1999) See [[http://www.nd.edu/~networks/resources.htm]] for more details on the data. ||
==Use Case: Data cleaning==
|| Availability: || Generator for uncertain data: [[http://pdbench.sourceforge.net/census.zip|here]] (424kB .zip file) ||
|| Contributors: || MayBMS team ||
|| Tags: || ||
|| Comments: || This is a noise generator for the census data set. Contains two tools: for inserting noise in the form of or-sets in the census data set (DataNoise), and for data cleaning using dependencies on the data set (Chase). More information available in the README file supplied with the archive. ||
|| References: || Original dataset available for download at [[http://www.ipums.org]]. See "10^10^6 Worlds and Beyond: Efficient Representation and Processing of Incomplete Information", Lyublena Antova, Christoph Koch, Dan Olteanu, Proc. ICDE 2007 for more details on the representation and query rewriting. ||

BIN
pdbench/files/webgraph.zip Normal file

Binary file not shown.

31
pdbench/home Normal file
View File

@ -0,0 +1,31 @@
=The Probabilistic Database Use Cases and Benchmarking Project=
Probabilistic databases are a new type of database systems that have created a substantial amount of excitement among data management researchers recently. Probabilistic databases will allow for important new database applications both in science and industry. The goal of this project is to create and assemble resources for advancing the state of the art in the nascent technology of probabilistic databases. Our long term goal is to establish a credible and representative set of **use cases** and develop a **benchmark** for probabilistic databases.
To date, it is still too early for a serious benchmark. Probabilistic databases are still in their infancy, and we cannot see a convergence of approaches yet. Such a convergence should at this point also not be considered a priority, because an attempt to create standardized functionality and database languages may discourage creativity.
However, to establish the credibility of probabilistic database research and to make further progress in creating scalable probabilistic database technology, it is important to establish realistic use cases. The existence of many exciting applications is postulated in various research papers, but few realistic use cases have been proposed and described in sufficient detail to make them verifiable. Such use cases will be needed to convince the data management research community in the wider sense as well, the IT industry, and users. The success of probabilistic databases will ultimately depend on clearly stated and verified use cases. Moreover, it is important to collect real or at least realistic datasets and query workloads to test prototype systems against, and to make the research advances required to turn probabilistic databases into a useful technology.
==Resources==
* [[Datasets|Usecases, Datasets, data generators, and queries]] for experimenting with probabilistic databases
==Members of the Working Group==
* [[http://www.cs.cornell.edu/%7Elantova/|Lyublena Antova]], Cornell University ([[http://www.cs.cornell.edu/bigreddata/maybms/|MayBMS Project]])
* [[http://i.stanford.edu/%7Eanishds/|Anish Das Sarma]], Stanford University ([[http://infolab.stanford.edu/trio/|Trio Project]])
* [[http://www.cs.umd.edu/%7Eamol/|Amol Deshpande]], University of Maryland (PrDB Project, [[http://www.cs.umd.edu/%7Eamol/MauveDB/|MauveDB Project]])
* [[http://www.almaden.ibm.com/cs/people/peterh/|Peter Haas]], IBM Almaden Research Center (MCDB Project)
* [[http://www.comlab.ox.ac.uk/people/dan.olteanu/|Dan Olteanu]], Oxford University ([[http://www.cs.cornell.edu/bigreddata/maybms/|MayBMS Project]], [[http://web.comlab.ox.ac.uk/projects/SPROUT/|SPROUT Project]])
* [[http://www.cs.washington.edu/homes/chrisre/|Christopher Ré]], University of Washington ([[http://mystiq.cs.washington.edu/|Mystiq Project]])
* [[http://infolab.stanford.edu/%7Etheobald/|Martin Theobald]], Max Planck Institut fuer Informatik ([[http://infolab.stanford.edu/trio/|Trio Project]])
* [[http://wwwhome.cs.utwente.nl/%7Ekeulen/|Maurice van Keulen]], University of Twente
* [[http://www.cs.cornell.edu/%7Ekoch/|Christoph Koch]], Cornell University ([[http://www.cs.cornell.edu/bigreddata/maybms/|MayBMS Project]]), moderator, contact
==Affiliates==
* [[http://www.cs.umd.edu/%7Egetoor/|Lise Getoor]], University of Maryland
* [[http://www.cs.washington.edu/homes/suciu/|Dan Suciu]], University of Washington ([[http://mystiq.cs.washington.edu/|Mystiq Project]])

Some files were not shown because too many files have changed in this diff Show More