[SPARK-10261][DOCUMENTATION, ML] Fixed @Since annotation to ml.evaluation

Author: Tijo Thomas <tijoparacka@gmail.com>
Author: tijo <tijo@ezzoft.com>

Closes #8554 from tijoparacka/SPARK-10261-2.
This commit is contained in:
Tijo Thomas 2015-10-20 16:13:34 -07:00 committed by Xiangrui Meng
parent 82e9d9c81b
commit 9f49895fef
4 changed files with 43 additions and 7 deletions

View file

@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation package org.apache.spark.ml.evaluation
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param._ import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
@ -30,10 +30,12 @@ import org.apache.spark.sql.types.DoubleType
* :: Experimental :: * :: Experimental ::
* Evaluator for binary classification, which expects two input columns: rawPrediction and label. * Evaluator for binary classification, which expects two input columns: rawPrediction and label.
*/ */
@Since("1.2.0")
@Experimental @Experimental
class BinaryClassificationEvaluator(override val uid: String) class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Evaluator with HasRawPredictionCol with HasLabelCol { extends Evaluator with HasRawPredictionCol with HasLabelCol {
@Since("1.2.0")
def this() = this(Identifiable.randomUID("binEval")) def this() = this(Identifiable.randomUID("binEval"))
/** /**
@ -41,6 +43,7 @@ class BinaryClassificationEvaluator(override val uid: String)
* Default: areaUnderROC * Default: areaUnderROC
* @group param * @group param
*/ */
@Since("1.2.0")
val metricName: Param[String] = { val metricName: Param[String] = {
val allowedParams = ParamValidators.inArray(Array("areaUnderROC", "areaUnderPR")) val allowedParams = ParamValidators.inArray(Array("areaUnderROC", "areaUnderPR"))
new Param( new Param(
@ -48,12 +51,15 @@ class BinaryClassificationEvaluator(override val uid: String)
} }
/** @group getParam */ /** @group getParam */
@Since("1.2.0")
def getMetricName: String = $(metricName) def getMetricName: String = $(metricName)
/** @group setParam */ /** @group setParam */
@Since("1.2.0")
def setMetricName(value: String): this.type = set(metricName, value) def setMetricName(value: String): this.type = set(metricName, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value) def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
/** /**
@ -61,13 +67,16 @@ class BinaryClassificationEvaluator(override val uid: String)
* @deprecated use [[setRawPredictionCol()]] instead * @deprecated use [[setRawPredictionCol()]] instead
*/ */
@deprecated("use setRawPredictionCol instead", "1.5.0") @deprecated("use setRawPredictionCol instead", "1.5.0")
@Since("1.2.0")
def setScoreCol(value: String): this.type = set(rawPredictionCol, value) def setScoreCol(value: String): this.type = set(rawPredictionCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.2.0")
def setLabelCol(value: String): this.type = set(labelCol, value) def setLabelCol(value: String): this.type = set(labelCol, value)
setDefault(metricName -> "areaUnderROC") setDefault(metricName -> "areaUnderROC")
@Since("1.2.0")
override def evaluate(dataset: DataFrame): Double = { override def evaluate(dataset: DataFrame): Double = {
val schema = dataset.schema val schema = dataset.schema
SchemaUtils.checkColumnType(schema, $(rawPredictionCol), new VectorUDT) SchemaUtils.checkColumnType(schema, $(rawPredictionCol), new VectorUDT)
@ -87,10 +96,12 @@ class BinaryClassificationEvaluator(override val uid: String)
metric metric
} }
@Since("1.5.0")
override def isLargerBetter: Boolean = $(metricName) match { override def isLargerBetter: Boolean = $(metricName) match {
case "areaUnderROC" => true case "areaUnderROC" => true
case "areaUnderPR" => true case "areaUnderPR" => true
} }
@Since("1.4.1")
override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra) override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra)
} }

View file

@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation package org.apache.spark.ml.evaluation
import org.apache.spark.annotation.DeveloperApi import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.ml.param.{ParamMap, Params} import org.apache.spark.ml.param.{ParamMap, Params}
import org.apache.spark.sql.DataFrame import org.apache.spark.sql.DataFrame
@ -25,6 +25,7 @@ import org.apache.spark.sql.DataFrame
* :: DeveloperApi :: * :: DeveloperApi ::
* Abstract class for evaluators that compute metrics from predictions. * Abstract class for evaluators that compute metrics from predictions.
*/ */
@Since("1.5.0")
@DeveloperApi @DeveloperApi
abstract class Evaluator extends Params { abstract class Evaluator extends Params {
@ -35,6 +36,7 @@ abstract class Evaluator extends Params {
* @param paramMap parameter map that specifies the input columns and output metrics * @param paramMap parameter map that specifies the input columns and output metrics
* @return metric * @return metric
*/ */
@Since("1.5.0")
def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = { def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {
this.copy(paramMap).evaluate(dataset) this.copy(paramMap).evaluate(dataset)
} }
@ -44,6 +46,7 @@ abstract class Evaluator extends Params {
* @param dataset a dataset that contains labels/observations and predictions. * @param dataset a dataset that contains labels/observations and predictions.
* @return metric * @return metric
*/ */
@Since("1.5.0")
def evaluate(dataset: DataFrame): Double def evaluate(dataset: DataFrame): Double
/** /**
@ -51,7 +54,9 @@ abstract class Evaluator extends Params {
* or minimized (false). * or minimized (false).
* A given evaluator may support multiple metrics which may be maximized or minimized. * A given evaluator may support multiple metrics which may be maximized or minimized.
*/ */
@Since("1.5.0")
def isLargerBetter: Boolean = true def isLargerBetter: Boolean = true
@Since("1.5.0")
override def copy(extra: ParamMap): Evaluator override def copy(extra: ParamMap): Evaluator
} }

View file

@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation package org.apache.spark.ml.evaluation
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param.{ParamMap, ParamValidators, Param} import org.apache.spark.ml.param.{ParamMap, ParamValidators, Param}
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol} import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
import org.apache.spark.ml.util.{SchemaUtils, Identifiable} import org.apache.spark.ml.util.{SchemaUtils, Identifiable}
@ -29,10 +29,12 @@ import org.apache.spark.sql.types.DoubleType
* :: Experimental :: * :: Experimental ::
* Evaluator for multiclass classification, which expects two input columns: score and label. * Evaluator for multiclass classification, which expects two input columns: score and label.
*/ */
@Since("1.5.0")
@Experimental @Experimental
class MulticlassClassificationEvaluator (override val uid: String) class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Evaluator with HasPredictionCol with HasLabelCol { extends Evaluator with HasPredictionCol with HasLabelCol {
@Since("1.5.0")
def this() = this(Identifiable.randomUID("mcEval")) def this() = this(Identifiable.randomUID("mcEval"))
/** /**
@ -40,6 +42,7 @@ class MulticlassClassificationEvaluator (override val uid: String)
* `"weightedPrecision"`, `"weightedRecall"`) * `"weightedPrecision"`, `"weightedRecall"`)
* @group param * @group param
*/ */
@Since("1.5.0")
val metricName: Param[String] = { val metricName: Param[String] = {
val allowedParams = ParamValidators.inArray(Array("f1", "precision", val allowedParams = ParamValidators.inArray(Array("f1", "precision",
"recall", "weightedPrecision", "weightedRecall")) "recall", "weightedPrecision", "weightedRecall"))
@ -48,19 +51,24 @@ class MulticlassClassificationEvaluator (override val uid: String)
} }
/** @group getParam */ /** @group getParam */
@Since("1.5.0")
def getMetricName: String = $(metricName) def getMetricName: String = $(metricName)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setMetricName(value: String): this.type = set(metricName, value) def setMetricName(value: String): this.type = set(metricName, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value) def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value) def setLabelCol(value: String): this.type = set(labelCol, value)
setDefault(metricName -> "f1") setDefault(metricName -> "f1")
@Since("1.5.0")
override def evaluate(dataset: DataFrame): Double = { override def evaluate(dataset: DataFrame): Double = {
val schema = dataset.schema val schema = dataset.schema
SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType) SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType)
@ -81,6 +89,7 @@ class MulticlassClassificationEvaluator (override val uid: String)
metric metric
} }
@Since("1.5.0")
override def isLargerBetter: Boolean = $(metricName) match { override def isLargerBetter: Boolean = $(metricName) match {
case "f1" => true case "f1" => true
case "precision" => true case "precision" => true
@ -89,5 +98,6 @@ class MulticlassClassificationEvaluator (override val uid: String)
case "weightedRecall" => true case "weightedRecall" => true
} }
@Since("1.5.0")
override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra) override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra)
} }

View file

@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation package org.apache.spark.ml.evaluation
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators} import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol} import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
@ -29,10 +29,12 @@ import org.apache.spark.sql.types.DoubleType
* :: Experimental :: * :: Experimental ::
* Evaluator for regression, which expects two input columns: prediction and label. * Evaluator for regression, which expects two input columns: prediction and label.
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class RegressionEvaluator(override val uid: String) final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Evaluator with HasPredictionCol with HasLabelCol { extends Evaluator with HasPredictionCol with HasLabelCol {
@Since("1.4.0")
def this() = this(Identifiable.randomUID("regEval")) def this() = this(Identifiable.randomUID("regEval"))
/** /**
@ -43,25 +45,31 @@ final class RegressionEvaluator(override val uid: String)
* we take and output the negative of this metric. * we take and output the negative of this metric.
* @group param * @group param
*/ */
@Since("1.4.0")
val metricName: Param[String] = { val metricName: Param[String] = {
val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae")) val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae"))
new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae)", allowedParams) new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae)", allowedParams)
} }
/** @group getParam */ /** @group getParam */
@Since("1.4.0")
def getMetricName: String = $(metricName) def getMetricName: String = $(metricName)
/** @group setParam */ /** @group setParam */
@Since("1.4.0")
def setMetricName(value: String): this.type = set(metricName, value) def setMetricName(value: String): this.type = set(metricName, value)
/** @group setParam */ /** @group setParam */
@Since("1.4.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value) def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.4.0")
def setLabelCol(value: String): this.type = set(labelCol, value) def setLabelCol(value: String): this.type = set(labelCol, value)
setDefault(metricName -> "rmse") setDefault(metricName -> "rmse")
@Since("1.4.0")
override def evaluate(dataset: DataFrame): Double = { override def evaluate(dataset: DataFrame): Double = {
val schema = dataset.schema val schema = dataset.schema
SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType) SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType)
@ -81,6 +89,7 @@ final class RegressionEvaluator(override val uid: String)
metric metric
} }
@Since("1.4.0")
override def isLargerBetter: Boolean = $(metricName) match { override def isLargerBetter: Boolean = $(metricName) match {
case "rmse" => false case "rmse" => false
case "mse" => false case "mse" => false
@ -88,5 +97,6 @@ final class RegressionEvaluator(override val uid: String)
case "mae" => false case "mae" => false
} }
@Since("1.5.0")
override def copy(extra: ParamMap): RegressionEvaluator = defaultCopy(extra) override def copy(extra: ParamMap): RegressionEvaluator = defaultCopy(extra)
} }