38 lines
1.0 KiB
Scala
38 lines
1.0 KiB
Scala
package org.mimirdb.pip.distribution.boolean
|
|
|
|
import org.apache.spark.sql.types.{ DataType, BooleanType }
|
|
import org.mimirdb.pip.distribution.DistributionFamily
|
|
|
|
/**
|
|
* A [Distribution] that specifically samples numbers
|
|
*/
|
|
trait BooleanDistributionFamily extends DistributionFamily
|
|
{
|
|
val baseType = BooleanType
|
|
|
|
def approximateProbability(params: Any, samples: Int): Double =
|
|
this match {
|
|
case c:ProbabilitySupported => c.probability(params)
|
|
case _ =>
|
|
{
|
|
val rand = new scala.util.Random()
|
|
(0 until samples).count { _ =>
|
|
sample(params, rand).asInstanceOf[Boolean]
|
|
}.toDouble / samples
|
|
}
|
|
}
|
|
|
|
def approximateProbabilityIsFast(params: Any): Boolean = this.isInstanceOf[ProbabilitySupported]
|
|
}
|
|
|
|
/**
|
|
* An add-on to NumericalDistributionFamily that indicates an exact CDF can be computed
|
|
*/
|
|
trait ProbabilitySupported
|
|
{
|
|
val baseType: DataType
|
|
|
|
assert(baseType == BooleanType, "Non-boolean distributions can not support probabilities")
|
|
|
|
def probability(params: Any): Double
|
|
} |