252 lines
5.8 KiB
Java
252 lines
5.8 KiB
Java
import java.sql.SQLException;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.HashSet;
|
|
import java.util.Set;
|
|
|
|
public class NoiseGenerator
|
|
{
|
|
/**
|
|
* Generates a random number in the range specified between the two parameters
|
|
* @param a
|
|
* @param b
|
|
* @return
|
|
*/
|
|
private static int random(int a, int b)
|
|
{
|
|
if (a > b)
|
|
{
|
|
int c = a;
|
|
a = b;
|
|
b = c;
|
|
}
|
|
double r = Math.random();
|
|
int n = (int) (a + r * (b - a + 1));
|
|
if (n < a)
|
|
{
|
|
n = a;
|
|
}
|
|
else if (n > b)
|
|
{
|
|
n = b;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/**
|
|
* This method introduces noise in the database.
|
|
* @param aVars Names of the attributes to generate noise in and range of allowed
|
|
* values for each attribute.
|
|
* @param aRelSize Size of the relation to generate noise in.
|
|
* @param aTuplesCount Number of tuples that should contain or-sets.
|
|
* @param aMaxHolesPerTuples Maximal number of holes per tuple.
|
|
* @param aMaxHoleSize Maximal number of entries in each or-set
|
|
* @return ArrayList with information about the holes that should be
|
|
* introduced in the relation.
|
|
*/
|
|
public static ArrayList generateNoise (DBConnector aDB, HashMap aVars, int aRelSize,
|
|
int aTuplesCount, int aMaxHolesPerTuple, int aMaxHoleSize)
|
|
throws SQLException
|
|
{
|
|
ArrayList holes = new ArrayList();
|
|
|
|
Set keys = aVars.keySet();
|
|
Object[] varNames = keys.toArray();
|
|
|
|
// IDs of tuples, to which holes were already introduced
|
|
HashSet tids = new HashSet();
|
|
|
|
int i = 0;
|
|
int j = 0;
|
|
|
|
while (i < aTuplesCount)
|
|
{
|
|
int n = i + 100000;
|
|
if (n > aTuplesCount)
|
|
{
|
|
n = aTuplesCount;
|
|
}
|
|
//System.out.println(holes.size());
|
|
//System.out.println(n);
|
|
|
|
try
|
|
{
|
|
for (j = i; j < n; ++j)
|
|
{
|
|
int t;
|
|
|
|
if (j > aRelSize)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (aRelSize <= aTuplesCount)
|
|
{
|
|
t = j;
|
|
}
|
|
else
|
|
{
|
|
do
|
|
{
|
|
t = random(1, aRelSize);
|
|
}
|
|
while (tids.contains(new Integer(t)));
|
|
}
|
|
|
|
tids.add(new Integer(t));
|
|
|
|
ArrayList columns = generateVarNames(varNames, aMaxHolesPerTuple);
|
|
for (int k = 0; k < columns.size(); ++k)
|
|
{
|
|
ArrayList values = generateValues((ArrayList) aVars.get(columns.get(k)),
|
|
aMaxHoleSize);
|
|
Hole hole = new Hole();
|
|
hole.tid = t;
|
|
hole.columnName = columns.get(k).toString();
|
|
hole.values = values;
|
|
holes.add(hole);
|
|
}
|
|
}
|
|
//System.err.println("holes: " + holes.size());
|
|
aDB.introduceNoise(holes);
|
|
holes.clear();
|
|
i += (j - i);
|
|
|
|
//System.err.println("j: " + j);
|
|
//System.err.println("rel size: " + aRelSize);
|
|
|
|
if (j > aRelSize)
|
|
{
|
|
break;
|
|
}
|
|
|
|
}
|
|
catch(OutOfMemoryError e)
|
|
{
|
|
aDB.introduceNoise(holes);
|
|
holes.clear();
|
|
i += (j - i);
|
|
}
|
|
}
|
|
aDB.createWorldTable();
|
|
return holes;
|
|
}
|
|
|
|
/**
|
|
* This method randomly picks the attributes to generate noise in.
|
|
* @param aVarNames Names of the attributes.
|
|
* @param aMaxHolesPerTuple Maximal number of holes to generate.
|
|
* @return ArrayList with the names of the attributes to generate noise in.
|
|
*/
|
|
private static ArrayList generateVarNames(Object[] aVarNames, int aMaxHolesPerTuple)
|
|
{
|
|
ArrayList varNames = new ArrayList();
|
|
int varCount = aVarNames.length;
|
|
|
|
int holesCount;
|
|
if (varCount > aMaxHolesPerTuple)
|
|
{
|
|
holesCount = random(1, aMaxHolesPerTuple);
|
|
}
|
|
else
|
|
{
|
|
holesCount = random(1, varCount);
|
|
}
|
|
|
|
for (int j = 0; j < holesCount; ++j)
|
|
{
|
|
String varName;
|
|
do
|
|
{
|
|
varName = (String) aVarNames[random(0, varCount - 1)];
|
|
}
|
|
while (varNames.contains(varName));
|
|
|
|
varNames.add(varName);
|
|
}
|
|
|
|
return varNames;
|
|
}
|
|
|
|
/**
|
|
* Generates values for a given attribute.
|
|
* @param aVarValues Values allowed for the given attribute.
|
|
* @param aMaxHoleSize Maximal number of values to generate for the attribute.
|
|
* @return ArrayList with possible values for the given attribute.
|
|
*/
|
|
private static ArrayList generateValues(ArrayList aVarValues, int aMaxHoleSize)
|
|
{
|
|
ArrayList values = new ArrayList();
|
|
|
|
int valuesCount = aVarValues.size();
|
|
int holeSize;
|
|
|
|
if (aMaxHoleSize < valuesCount)
|
|
{
|
|
holeSize = random(1, aMaxHoleSize);
|
|
}
|
|
else
|
|
{
|
|
holeSize = random(1, valuesCount);
|
|
}
|
|
|
|
for (int i = 0; i < holeSize; ++i)
|
|
{
|
|
String value;
|
|
do
|
|
{
|
|
int n = random(0, valuesCount - 1);
|
|
value = (String) aVarValues.get(n);
|
|
}
|
|
while (values.contains(value));
|
|
values.add(value);
|
|
}
|
|
return values;
|
|
}
|
|
|
|
/**
|
|
* This method generates noise by uniformly selecting a certain number of fields to
|
|
* generate or-sets to.
|
|
* @param aVars
|
|
* @param aRelSize Size of the relation.
|
|
* @param aHolesCount
|
|
* @param aMaxHoleSize
|
|
* @return ArrayList with information about the generated holes.
|
|
*/
|
|
public static ArrayList generateNoiseUniform(HashMap aVars, int aRelSize,
|
|
int aHolesCount, int aMaxHoleSize)
|
|
{
|
|
ArrayList result = new ArrayList();
|
|
// TODO: Implement
|
|
|
|
ArrayList varNames = new ArrayList();
|
|
int varCount = varNames.size();
|
|
|
|
if (varCount * aRelSize < aHolesCount)
|
|
{
|
|
aHolesCount = varCount * aRelSize;
|
|
}
|
|
|
|
for (int i = 0; i < aHolesCount; ++i)
|
|
{
|
|
// Generate tuple id
|
|
int tid = random(1, aRelSize);
|
|
// Generate attribute name
|
|
int j = random(0, varCount - 1);
|
|
String varName = varNames.get(j).toString();
|
|
// TODO: Check whether the hole was already generated
|
|
// ...
|
|
|
|
ArrayList values = generateValues((ArrayList) aVars.get(varName),
|
|
aMaxHoleSize);
|
|
|
|
Hole h = new Hole();
|
|
h.tid = tid;
|
|
h.columnName = varName;
|
|
h.values = values;
|
|
result.add(h);
|
|
}
|
|
return result;
|
|
}
|
|
}
|