pdbench/census/DataNoise/src/NoiseGenerator.java

252 lines
5.8 KiB
Java

import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
public class NoiseGenerator
{
/**
* Generates a random number in the range specified between the two parameters
* @param a
* @param b
* @return
*/
private static int random(int a, int b)
{
if (a > b)
{
int c = a;
a = b;
b = c;
}
double r = Math.random();
int n = (int) (a + r * (b - a + 1));
if (n < a)
{
n = a;
}
else if (n > b)
{
n = b;
}
return n;
}
/**
* This method introduces noise in the database.
* @param aVars Names of the attributes to generate noise in and range of allowed
* values for each attribute.
* @param aRelSize Size of the relation to generate noise in.
* @param aTuplesCount Number of tuples that should contain or-sets.
* @param aMaxHolesPerTuples Maximal number of holes per tuple.
* @param aMaxHoleSize Maximal number of entries in each or-set
* @return ArrayList with information about the holes that should be
* introduced in the relation.
*/
public static ArrayList generateNoise (DBConnector aDB, HashMap aVars, int aRelSize,
int aTuplesCount, int aMaxHolesPerTuple, int aMaxHoleSize)
throws SQLException
{
ArrayList holes = new ArrayList();
Set keys = aVars.keySet();
Object[] varNames = keys.toArray();
// IDs of tuples, to which holes were already introduced
HashSet tids = new HashSet();
int i = 0;
int j = 0;
while (i < aTuplesCount)
{
int n = i + 100000;
if (n > aTuplesCount)
{
n = aTuplesCount;
}
//System.out.println(holes.size());
//System.out.println(n);
try
{
for (j = i; j < n; ++j)
{
int t;
if (j > aRelSize)
{
break;
}
if (aRelSize <= aTuplesCount)
{
t = j;
}
else
{
do
{
t = random(1, aRelSize);
}
while (tids.contains(new Integer(t)));
}
tids.add(new Integer(t));
ArrayList columns = generateVarNames(varNames, aMaxHolesPerTuple);
for (int k = 0; k < columns.size(); ++k)
{
ArrayList values = generateValues((ArrayList) aVars.get(columns.get(k)),
aMaxHoleSize);
Hole hole = new Hole();
hole.tid = t;
hole.columnName = columns.get(k).toString();
hole.values = values;
holes.add(hole);
}
}
//System.err.println("holes: " + holes.size());
aDB.introduceNoise(holes);
holes.clear();
i += (j - i);
//System.err.println("j: " + j);
//System.err.println("rel size: " + aRelSize);
if (j > aRelSize)
{
break;
}
}
catch(OutOfMemoryError e)
{
aDB.introduceNoise(holes);
holes.clear();
i += (j - i);
}
}
aDB.createWorldTable();
return holes;
}
/**
* This method randomly picks the attributes to generate noise in.
* @param aVarNames Names of the attributes.
* @param aMaxHolesPerTuple Maximal number of holes to generate.
* @return ArrayList with the names of the attributes to generate noise in.
*/
private static ArrayList generateVarNames(Object[] aVarNames, int aMaxHolesPerTuple)
{
ArrayList varNames = new ArrayList();
int varCount = aVarNames.length;
int holesCount;
if (varCount > aMaxHolesPerTuple)
{
holesCount = random(1, aMaxHolesPerTuple);
}
else
{
holesCount = random(1, varCount);
}
for (int j = 0; j < holesCount; ++j)
{
String varName;
do
{
varName = (String) aVarNames[random(0, varCount - 1)];
}
while (varNames.contains(varName));
varNames.add(varName);
}
return varNames;
}
/**
* Generates values for a given attribute.
* @param aVarValues Values allowed for the given attribute.
* @param aMaxHoleSize Maximal number of values to generate for the attribute.
* @return ArrayList with possible values for the given attribute.
*/
private static ArrayList generateValues(ArrayList aVarValues, int aMaxHoleSize)
{
ArrayList values = new ArrayList();
int valuesCount = aVarValues.size();
int holeSize;
if (aMaxHoleSize < valuesCount)
{
holeSize = random(1, aMaxHoleSize);
}
else
{
holeSize = random(1, valuesCount);
}
for (int i = 0; i < holeSize; ++i)
{
String value;
do
{
int n = random(0, valuesCount - 1);
value = (String) aVarValues.get(n);
}
while (values.contains(value));
values.add(value);
}
return values;
}
/**
* This method generates noise by uniformly selecting a certain number of fields to
* generate or-sets to.
* @param aVars
* @param aRelSize Size of the relation.
* @param aHolesCount
* @param aMaxHoleSize
* @return ArrayList with information about the generated holes.
*/
public static ArrayList generateNoiseUniform(HashMap aVars, int aRelSize,
int aHolesCount, int aMaxHoleSize)
{
ArrayList result = new ArrayList();
// TODO: Implement
ArrayList varNames = new ArrayList();
int varCount = varNames.size();
if (varCount * aRelSize < aHolesCount)
{
aHolesCount = varCount * aRelSize;
}
for (int i = 0; i < aHolesCount; ++i)
{
// Generate tuple id
int tid = random(1, aRelSize);
// Generate attribute name
int j = random(0, varCount - 1);
String varName = varNames.get(j).toString();
// TODO: Check whether the hole was already generated
// ...
ArrayList values = generateValues((ArrayList) aVars.get(varName),
aMaxHoleSize);
Hole h = new Hole();
h.tid = tid;
h.columnName = varName;
h.values = values;
result.add(h);
}
return result;
}
}