56 lines
1.4 KiB
Scala
56 lines
1.4 KiB
Scala
package org.mimirdb.pip.distribution.numerical
|
|
|
|
import org.apache.spark.sql.types.{ DataType, DoubleType }
|
|
import org.mimirdb.pip.distribution.DistributionFamily
|
|
|
|
/**
|
|
* A [Distribution] that specifically samples numbers
|
|
*/
|
|
trait NumericalDistributionFamily extends DistributionFamily
|
|
{
|
|
val baseType = DoubleType
|
|
|
|
/**
|
|
* Compute the CDF
|
|
*/
|
|
def approximateCDF(value: Double, params: Any, samples: Int): Double =
|
|
this match {
|
|
case c:CDFSupported => c.cdf(value, params)
|
|
case _ =>
|
|
{
|
|
val rand = new scala.util.Random()
|
|
(0 until samples).count { _ =>
|
|
sample(params, rand).asInstanceOf[Double] <= value
|
|
}.toDouble / samples
|
|
}
|
|
}
|
|
def approximateCDFIsFast(params: Any): Boolean = this.isInstanceOf[CDFSupported]
|
|
|
|
def min(params: Any): Double
|
|
def max(params: Any): Double
|
|
}
|
|
|
|
/**
|
|
* An add-on to NumericalDistributionFamily that indicates an exact CDF can be computed
|
|
*/
|
|
trait CDFSupported
|
|
{
|
|
val baseType: DataType
|
|
|
|
assert(baseType == DoubleType, "Non-numerical distributions can not support CDFs")
|
|
|
|
def cdf(value: Double, params: Any): Double
|
|
}
|
|
|
|
/**
|
|
* An add-on to NumericalDistributionFamily that indicates an exact Inverse CDF can be computed
|
|
*/
|
|
trait ICDFSupported
|
|
{
|
|
val baseType: DataType
|
|
|
|
assert(baseType == DoubleType, "Non-numerical distributions can not support ICDFs")
|
|
|
|
def icdf(value: Double, params: Any): Double
|
|
}
|