mimir-pip/lib/src/org/mimirdb/pip/distribution/numerical/package.scala

56 lines
1.4 KiB
Scala

package org.mimirdb.pip.distribution.numerical
import org.apache.spark.sql.types.{ DataType, DoubleType }
import org.mimirdb.pip.distribution.DistributionFamily
/**
* A [Distribution] that specifically samples numbers
*/
trait NumericalDistributionFamily extends DistributionFamily
{
val baseType = DoubleType
/**
* Compute the CDF
*/
def approximateCDF(value: Double, params: Any, samples: Int): Double =
this match {
case c:CDFSupported => c.cdf(value, params)
case _ =>
{
val rand = new scala.util.Random()
(0 until samples).count { _ =>
sample(params, rand).asInstanceOf[Double] <= value
}.toDouble / samples
}
}
def approximateCDFIsFast(params: Any): Boolean = this.isInstanceOf[CDFSupported]
def min(params: Any): Double
def max(params: Any): Double
}
/**
* An add-on to NumericalDistributionFamily that indicates an exact CDF can be computed
*/
trait CDFSupported
{
val baseType: DataType
assert(baseType == DoubleType, "Non-numerical distributions can not support CDFs")
def cdf(value: Double, params: Any): Double
}
/**
* An add-on to NumericalDistributionFamily that indicates an exact Inverse CDF can be computed
*/
trait ICDFSupported
{
val baseType: DataType
assert(baseType == DoubleType, "Non-numerical distributions can not support ICDFs")
def icdf(value: Double, params: Any): Double
}