[SPARK-5730][ML] add doc groups to spark.ml components

This PR adds three groups to the ScalaDoc: `param`, `setParam`, and `getParam`. Params will show up in the generated Scala API doc as the top group. Setters/getters will be at the bottom.

Preview:

![screen shot 2015-02-13 at 2 47 49 pm](https://cloud.githubusercontent.com/assets/829644/6196657/5740c240-b38f-11e4-94bb-bd8ef5a796c5.png)

Author: Xiangrui Meng <meng@databricks.com>

Closes #4600 from mengxr/SPARK-5730 and squashes the following commits:

febed9a [Xiangrui Meng] add doc groups to spark.ml components
This commit is contained in:
Xiangrui Meng 2015-02-13 16:45:59 -08:00
parent d50a91d529
commit 4f4c6d5a5d
13 changed files with 235 additions and 26 deletions

View file

@ -62,7 +62,10 @@ abstract class Transformer extends PipelineStage with Params {
private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
extends Transformer with HasInputCol with HasOutputCol with Logging {
/** @group setParam */
def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T]
/** @group setParam */
def setOutputCol(value: String): T = set(outputCol, value).asInstanceOf[T]
/**

View file

@ -66,6 +66,7 @@ private[spark] abstract class Classifier[
extends Predictor[FeaturesType, E, M]
with ClassifierParams {
/** @group setParam */
def setRawPredictionCol(value: String): E =
set(rawPredictionCol, value).asInstanceOf[E]
@ -87,6 +88,7 @@ private[spark]
abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]]
extends PredictionModel[FeaturesType, M] with ClassifierParams {
/** @group setParam */
def setRawPredictionCol(value: String): M = set(rawPredictionCol, value).asInstanceOf[M]
/** Number of classes (values which the label can take). */

View file

@ -49,8 +49,13 @@ class LogisticRegression
setMaxIter(100)
setThreshold(0.5)
/** @group setParam */
def setRegParam(value: Double): this.type = set(regParam, value)
/** @group setParam */
def setMaxIter(value: Int): this.type = set(maxIter, value)
/** @group setParam */
def setThreshold(value: Double): this.type = set(threshold, value)
override protected def train(dataset: DataFrame, paramMap: ParamMap): LogisticRegressionModel = {
@ -93,6 +98,7 @@ class LogisticRegressionModel private[ml] (
setThreshold(0.5)
/** @group setParam */
def setThreshold(value: Double): this.type = set(threshold, value)
private val margin: Vector => Double = (features) => {

View file

@ -61,6 +61,7 @@ private[spark] abstract class ProbabilisticClassifier[
M <: ProbabilisticClassificationModel[FeaturesType, M]]
extends Classifier[FeaturesType, E, M] with ProbabilisticClassifierParams {
/** @group setParam */
def setProbabilityCol(value: String): E = set(probabilityCol, value).asInstanceOf[E]
}
@ -82,6 +83,7 @@ private[spark] abstract class ProbabilisticClassificationModel[
M <: ProbabilisticClassificationModel[FeaturesType, M]]
extends ClassificationModel[FeaturesType, M] with ProbabilisticClassifierParams {
/** @group setParam */
def setProbabilityCol(value: String): M = set(probabilityCol, value).asInstanceOf[M]
/**

View file

@ -35,13 +35,23 @@ import org.apache.spark.sql.types.DoubleType
class BinaryClassificationEvaluator extends Evaluator with Params
with HasRawPredictionCol with HasLabelCol {
/** param for metric name in evaluation */
/**
* param for metric name in evaluation
* @group param
*/
val metricName: Param[String] = new Param(this, "metricName",
"metric name in evaluation (areaUnderROC|areaUnderPR)", Some("areaUnderROC"))
/** @group getParam */
def getMetricName: String = get(metricName)
/** @group setParam */
def setMetricName(value: String): this.type = set(metricName, value)
/** @group setParam */
def setScoreCol(value: String): this.type = set(rawPredictionCol, value)
/** @group setParam */
def setLabelCol(value: String): this.type = set(labelCol, value)
override def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {

View file

@ -31,11 +31,18 @@ import org.apache.spark.sql.types.DataType
@AlphaComponent
class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
/** number of features */
/**
* number of features
* @group param
*/
val numFeatures = new IntParam(this, "numFeatures", "number of features", Some(1 << 18))
def setNumFeatures(value: Int) = set(numFeatures, value)
/** @group getParam */
def getNumFeatures: Int = get(numFeatures)
/** @group setParam */
def setNumFeatures(value: Int) = set(numFeatures, value)
override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = {
val hashingTF = new feature.HashingTF(paramMap(numFeatures))
hashingTF.transform

View file

@ -39,7 +39,10 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
@AlphaComponent
class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams {
/** @group setParam */
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)
override def fit(dataset: DataFrame, paramMap: ParamMap): StandardScalerModel = {
@ -75,7 +78,10 @@ class StandardScalerModel private[ml] (
scaler: feature.StandardScalerModel)
extends Model[StandardScalerModel] with StandardScalerParams {
/** @group setParam */
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)
override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {

View file

@ -85,8 +85,13 @@ private[spark] abstract class Predictor[
M <: PredictionModel[FeaturesType, M]]
extends Estimator[M] with PredictorParams {
/** @group setParam */
def setLabelCol(value: String): Learner = set(labelCol, value).asInstanceOf[Learner]
/** @group setParam */
def setFeaturesCol(value: String): Learner = set(featuresCol, value).asInstanceOf[Learner]
/** @group setParam */
def setPredictionCol(value: String): Learner = set(predictionCol, value).asInstanceOf[Learner]
override def fit(dataset: DataFrame, paramMap: ParamMap): M = {
@ -160,8 +165,10 @@ private[spark] abstract class Predictor[
private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
extends Model[M] with PredictorParams {
/** @group setParam */
def setFeaturesCol(value: String): M = set(featuresCol, value).asInstanceOf[M]
/** @group setParam */
def setPredictionCol(value: String): M = set(predictionCol, value).asInstanceOf[M]
/**

View file

@ -20,5 +20,19 @@ package org.apache.spark
/**
* Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
* assemble and configure practical machine learning pipelines.
*
* @groupname param Parameters
* @groupdesc param A list of (hyper-)parameter keys this algorithm can take. Users can set and get
* the parameter values through setters and getters, respectively.
* @groupprio param -5
*
* @groupname setParam Parameter setters
* @groupprio setParam 5
*
* @groupname getParam Parameter getters
* @groupprio getParam 6
*
* @groupname Ungrouped Members
* @groupprio Ungrouped 0
*/
package object ml

View file

@ -24,67 +24,117 @@ package org.apache.spark.ml.param
*/
private[ml] trait HasRegParam extends Params {
/** param for regularization parameter */
/**
* param for regularization parameter
* @group param
*/
val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter")
/** @group getParam */
def getRegParam: Double = get(regParam)
}
private[ml] trait HasMaxIter extends Params {
/** param for max number of iterations */
/**
* param for max number of iterations
* @group param
*/
val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
/** @group getParam */
def getMaxIter: Int = get(maxIter)
}
private[ml] trait HasFeaturesCol extends Params {
/** param for features column name */
/**
* param for features column name
* @group param
*/
val featuresCol: Param[String] =
new Param(this, "featuresCol", "features column name", Some("features"))
/** @group getParam */
def getFeaturesCol: String = get(featuresCol)
}
private[ml] trait HasLabelCol extends Params {
/** param for label column name */
/**
* param for label column name
* @group param
*/
val labelCol: Param[String] = new Param(this, "labelCol", "label column name", Some("label"))
/** @group getParam */
def getLabelCol: String = get(labelCol)
}
private[ml] trait HasPredictionCol extends Params {
/** param for prediction column name */
/**
* param for prediction column name
* @group param
*/
val predictionCol: Param[String] =
new Param(this, "predictionCol", "prediction column name", Some("prediction"))
/** @group getParam */
def getPredictionCol: String = get(predictionCol)
}
private[ml] trait HasRawPredictionCol extends Params {
/** param for raw prediction column name */
/**
* param for raw prediction column name
* @group param
*/
val rawPredictionCol: Param[String] =
new Param(this, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name",
Some("rawPrediction"))
/** @group getParam */
def getRawPredictionCol: String = get(rawPredictionCol)
}
private[ml] trait HasProbabilityCol extends Params {
/** param for predicted class conditional probabilities column name */
/**
* param for predicted class conditional probabilities column name
* @group param
*/
val probabilityCol: Param[String] =
new Param(this, "probabilityCol", "column name for predicted class conditional probabilities",
Some("probability"))
/** @group getParam */
def getProbabilityCol: String = get(probabilityCol)
}
private[ml] trait HasThreshold extends Params {
/** param for threshold in (binary) prediction */
/**
* param for threshold in (binary) prediction
* @group param
*/
val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in prediction")
/** @group getParam */
def getThreshold: Double = get(threshold)
}
private[ml] trait HasInputCol extends Params {
/** param for input column name */
/**
* param for input column name
* @group param
*/
val inputCol: Param[String] = new Param(this, "inputCol", "input column name")
/** @group getParam */
def getInputCol: String = get(inputCol)
}
private[ml] trait HasOutputCol extends Params {
/** param for output column name */
/**
* param for output column name
* @group param
*/
val outputCol: Param[String] = new Param(this, "outputCol", "output column name")
/** @group getParam */
def getOutputCol: String = get(outputCol)
}

View file

@ -49,43 +49,89 @@ import org.apache.spark.util.random.XORShiftRandom
private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam
with HasPredictionCol {
/** Param for rank of the matrix factorization. */
/**
* Param for rank of the matrix factorization.
* @group param
*/
val rank = new IntParam(this, "rank", "rank of the factorization", Some(10))
/** @group getParam */
def getRank: Int = get(rank)
/** Param for number of user blocks. */
/**
* Param for number of user blocks.
* @group param
*/
val numUserBlocks = new IntParam(this, "numUserBlocks", "number of user blocks", Some(10))
/** @group getParam */
def getNumUserBlocks: Int = get(numUserBlocks)
/** Param for number of item blocks. */
/**
* Param for number of item blocks.
* @group param
*/
val numItemBlocks =
new IntParam(this, "numItemBlocks", "number of item blocks", Some(10))
/** @group getParam */
def getNumItemBlocks: Int = get(numItemBlocks)
/** Param to decide whether to use implicit preference. */
/**
* Param to decide whether to use implicit preference.
* @group param
*/
val implicitPrefs =
new BooleanParam(this, "implicitPrefs", "whether to use implicit preference", Some(false))
/** @group getParam */
def getImplicitPrefs: Boolean = get(implicitPrefs)
/** Param for the alpha parameter in the implicit preference formulation. */
/**
* Param for the alpha parameter in the implicit preference formulation.
* @group param
*/
val alpha = new DoubleParam(this, "alpha", "alpha for implicit preference", Some(1.0))
/** @group getParam */
def getAlpha: Double = get(alpha)
/** Param for the column name for user ids. */
/**
* Param for the column name for user ids.
* @group param
*/
val userCol = new Param[String](this, "userCol", "column name for user ids", Some("user"))
/** @group getParam */
def getUserCol: String = get(userCol)
/** Param for the column name for item ids. */
/**
* Param for the column name for item ids.
* @group param
*/
val itemCol =
new Param[String](this, "itemCol", "column name for item ids", Some("item"))
/** @group getParam */
def getItemCol: String = get(itemCol)
/** Param for the column name for ratings. */
/**
* Param for the column name for ratings.
* @group param
*/
val ratingCol = new Param[String](this, "ratingCol", "column name for ratings", Some("rating"))
/** @group getParam */
def getRatingCol: String = get(ratingCol)
/**
* Param for whether to apply nonnegativity constraints.
* @group param
*/
val nonnegative = new BooleanParam(
this, "nonnegative", "whether to use nonnegative constraint for least squares", Some(false))
/** @group getParam */
val getNonnegative: Boolean = get(nonnegative)
/**
@ -181,20 +227,46 @@ class ALS extends Estimator[ALSModel] with ALSParams {
import org.apache.spark.ml.recommendation.ALS.Rating
/** @group setParam */
def setRank(value: Int): this.type = set(rank, value)
/** @group setParam */
def setNumUserBlocks(value: Int): this.type = set(numUserBlocks, value)
/** @group setParam */
def setNumItemBlocks(value: Int): this.type = set(numItemBlocks, value)
/** @group setParam */
def setImplicitPrefs(value: Boolean): this.type = set(implicitPrefs, value)
/** @group setParam */
def setAlpha(value: Double): this.type = set(alpha, value)
/** @group setParam */
def setUserCol(value: String): this.type = set(userCol, value)
/** @group setParam */
def setItemCol(value: String): this.type = set(itemCol, value)
/** @group setParam */
def setRatingCol(value: String): this.type = set(ratingCol, value)
/** @group setParam */
def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */
def setMaxIter(value: Int): this.type = set(maxIter, value)
/** @group setParam */
def setRegParam(value: Double): this.type = set(regParam, value)
/** @group setParam */
def setNonnegative(value: Boolean): this.type = set(nonnegative, value)
/** Sets both numUserBlocks and numItemBlocks to the specific value. */
/**
* Sets both numUserBlocks and numItemBlocks to the specific value.
* @group setParam
*/
def setNumBlocks(value: Int): this.type = {
setNumUserBlocks(value)
setNumItemBlocks(value)

View file

@ -44,7 +44,10 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
setRegParam(0.1)
setMaxIter(100)
/** @group setParam */
def setRegParam(value: Double): this.type = set(regParam, value)
/** @group setParam */
def setMaxIter(value: Int): this.type = set(maxIter, value)
override protected def train(dataset: DataFrame, paramMap: ParamMap): LinearRegressionModel = {

View file

@ -31,22 +31,42 @@ import org.apache.spark.sql.types.StructType
* Params for [[CrossValidator]] and [[CrossValidatorModel]].
*/
private[ml] trait CrossValidatorParams extends Params {
/** param for the estimator to be cross-validated */
/**
* param for the estimator to be cross-validated
* @group param
*/
val estimator: Param[Estimator[_]] = new Param(this, "estimator", "estimator for selection")
/** @group getParam */
def getEstimator: Estimator[_] = get(estimator)
/** param for estimator param maps */
/**
* param for estimator param maps
* @group param
*/
val estimatorParamMaps: Param[Array[ParamMap]] =
new Param(this, "estimatorParamMaps", "param maps for the estimator")
/** @group getParam */
def getEstimatorParamMaps: Array[ParamMap] = get(estimatorParamMaps)
/** param for the evaluator for selection */
/**
* param for the evaluator for selection
* @group param
*/
val evaluator: Param[Evaluator] = new Param(this, "evaluator", "evaluator for selection")
/** @group getParam */
def getEvaluator: Evaluator = get(evaluator)
/** param for number of folds for cross validation */
/**
* param for number of folds for cross validation
* @group param
*/
val numFolds: IntParam =
new IntParam(this, "numFolds", "number of folds for cross validation", Some(3))
/** @group getParam */
def getNumFolds: Int = get(numFolds)
}
@ -59,9 +79,16 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP
private val f2jBLAS = new F2jBLAS
/** @group setParam */
def setEstimator(value: Estimator[_]): this.type = set(estimator, value)
/** @group setParam */
def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value)
/** @group setParam */
def setEvaluator(value: Evaluator): this.type = set(evaluator, value)
/** @group setParam */
def setNumFolds(value: Int): this.type = set(numFolds, value)
override def fit(dataset: DataFrame, paramMap: ParamMap): CrossValidatorModel = {