[SPARK-5730][ML] add doc groups to spark.ml components
This PR adds three groups to the ScalaDoc: `param`, `setParam`, and `getParam`. Params will show up in the generated Scala API doc as the top group. Setters/getters will be at the bottom. Preview: ![screen shot 2015-02-13 at 2 47 49 pm](https://cloud.githubusercontent.com/assets/829644/6196657/5740c240-b38f-11e4-94bb-bd8ef5a796c5.png) Author: Xiangrui Meng <meng@databricks.com> Closes #4600 from mengxr/SPARK-5730 and squashes the following commits: febed9a [Xiangrui Meng] add doc groups to spark.ml components
This commit is contained in:
parent
d50a91d529
commit
4f4c6d5a5d
|
@ -62,7 +62,10 @@ abstract class Transformer extends PipelineStage with Params {
|
|||
private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
|
||||
extends Transformer with HasInputCol with HasOutputCol with Logging {
|
||||
|
||||
/** @group setParam */
|
||||
def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T]
|
||||
|
||||
/** @group setParam */
|
||||
def setOutputCol(value: String): T = set(outputCol, value).asInstanceOf[T]
|
||||
|
||||
/**
|
||||
|
|
|
@ -66,6 +66,7 @@ private[spark] abstract class Classifier[
|
|||
extends Predictor[FeaturesType, E, M]
|
||||
with ClassifierParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setRawPredictionCol(value: String): E =
|
||||
set(rawPredictionCol, value).asInstanceOf[E]
|
||||
|
||||
|
@ -87,6 +88,7 @@ private[spark]
|
|||
abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]]
|
||||
extends PredictionModel[FeaturesType, M] with ClassifierParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setRawPredictionCol(value: String): M = set(rawPredictionCol, value).asInstanceOf[M]
|
||||
|
||||
/** Number of classes (values which the label can take). */
|
||||
|
|
|
@ -49,8 +49,13 @@ class LogisticRegression
|
|||
setMaxIter(100)
|
||||
setThreshold(0.5)
|
||||
|
||||
/** @group setParam */
|
||||
def setRegParam(value: Double): this.type = set(regParam, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setMaxIter(value: Int): this.type = set(maxIter, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setThreshold(value: Double): this.type = set(threshold, value)
|
||||
|
||||
override protected def train(dataset: DataFrame, paramMap: ParamMap): LogisticRegressionModel = {
|
||||
|
@ -93,6 +98,7 @@ class LogisticRegressionModel private[ml] (
|
|||
|
||||
setThreshold(0.5)
|
||||
|
||||
/** @group setParam */
|
||||
def setThreshold(value: Double): this.type = set(threshold, value)
|
||||
|
||||
private val margin: Vector => Double = (features) => {
|
||||
|
|
|
@ -61,6 +61,7 @@ private[spark] abstract class ProbabilisticClassifier[
|
|||
M <: ProbabilisticClassificationModel[FeaturesType, M]]
|
||||
extends Classifier[FeaturesType, E, M] with ProbabilisticClassifierParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setProbabilityCol(value: String): E = set(probabilityCol, value).asInstanceOf[E]
|
||||
}
|
||||
|
||||
|
@ -82,6 +83,7 @@ private[spark] abstract class ProbabilisticClassificationModel[
|
|||
M <: ProbabilisticClassificationModel[FeaturesType, M]]
|
||||
extends ClassificationModel[FeaturesType, M] with ProbabilisticClassifierParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setProbabilityCol(value: String): M = set(probabilityCol, value).asInstanceOf[M]
|
||||
|
||||
/**
|
||||
|
|
|
@ -35,13 +35,23 @@ import org.apache.spark.sql.types.DoubleType
|
|||
class BinaryClassificationEvaluator extends Evaluator with Params
|
||||
with HasRawPredictionCol with HasLabelCol {
|
||||
|
||||
/** param for metric name in evaluation */
|
||||
/**
|
||||
* param for metric name in evaluation
|
||||
* @group param
|
||||
*/
|
||||
val metricName: Param[String] = new Param(this, "metricName",
|
||||
"metric name in evaluation (areaUnderROC|areaUnderPR)", Some("areaUnderROC"))
|
||||
|
||||
/** @group getParam */
|
||||
def getMetricName: String = get(metricName)
|
||||
|
||||
/** @group setParam */
|
||||
def setMetricName(value: String): this.type = set(metricName, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setScoreCol(value: String): this.type = set(rawPredictionCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setLabelCol(value: String): this.type = set(labelCol, value)
|
||||
|
||||
override def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {
|
||||
|
|
|
@ -31,11 +31,18 @@ import org.apache.spark.sql.types.DataType
|
|||
@AlphaComponent
|
||||
class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
|
||||
|
||||
/** number of features */
|
||||
/**
|
||||
* number of features
|
||||
* @group param
|
||||
*/
|
||||
val numFeatures = new IntParam(this, "numFeatures", "number of features", Some(1 << 18))
|
||||
def setNumFeatures(value: Int) = set(numFeatures, value)
|
||||
|
||||
/** @group getParam */
|
||||
def getNumFeatures: Int = get(numFeatures)
|
||||
|
||||
/** @group setParam */
|
||||
def setNumFeatures(value: Int) = set(numFeatures, value)
|
||||
|
||||
override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = {
|
||||
val hashingTF = new feature.HashingTF(paramMap(numFeatures))
|
||||
hashingTF.transform
|
||||
|
|
|
@ -39,7 +39,10 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
|
|||
@AlphaComponent
|
||||
class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setInputCol(value: String): this.type = set(inputCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setOutputCol(value: String): this.type = set(outputCol, value)
|
||||
|
||||
override def fit(dataset: DataFrame, paramMap: ParamMap): StandardScalerModel = {
|
||||
|
@ -75,7 +78,10 @@ class StandardScalerModel private[ml] (
|
|||
scaler: feature.StandardScalerModel)
|
||||
extends Model[StandardScalerModel] with StandardScalerParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setInputCol(value: String): this.type = set(inputCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setOutputCol(value: String): this.type = set(outputCol, value)
|
||||
|
||||
override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
|
||||
|
|
|
@ -85,8 +85,13 @@ private[spark] abstract class Predictor[
|
|||
M <: PredictionModel[FeaturesType, M]]
|
||||
extends Estimator[M] with PredictorParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setLabelCol(value: String): Learner = set(labelCol, value).asInstanceOf[Learner]
|
||||
|
||||
/** @group setParam */
|
||||
def setFeaturesCol(value: String): Learner = set(featuresCol, value).asInstanceOf[Learner]
|
||||
|
||||
/** @group setParam */
|
||||
def setPredictionCol(value: String): Learner = set(predictionCol, value).asInstanceOf[Learner]
|
||||
|
||||
override def fit(dataset: DataFrame, paramMap: ParamMap): M = {
|
||||
|
@ -160,8 +165,10 @@ private[spark] abstract class Predictor[
|
|||
private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
|
||||
extends Model[M] with PredictorParams {
|
||||
|
||||
/** @group setParam */
|
||||
def setFeaturesCol(value: String): M = set(featuresCol, value).asInstanceOf[M]
|
||||
|
||||
/** @group setParam */
|
||||
def setPredictionCol(value: String): M = set(predictionCol, value).asInstanceOf[M]
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,5 +20,19 @@ package org.apache.spark
|
|||
/**
|
||||
* Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
|
||||
* assemble and configure practical machine learning pipelines.
|
||||
*
|
||||
* @groupname param Parameters
|
||||
* @groupdesc param A list of (hyper-)parameter keys this algorithm can take. Users can set and get
|
||||
* the parameter values through setters and getters, respectively.
|
||||
* @groupprio param -5
|
||||
*
|
||||
* @groupname setParam Parameter setters
|
||||
* @groupprio setParam 5
|
||||
*
|
||||
* @groupname getParam Parameter getters
|
||||
* @groupprio getParam 6
|
||||
*
|
||||
* @groupname Ungrouped Members
|
||||
* @groupprio Ungrouped 0
|
||||
*/
|
||||
package object ml
|
||||
|
|
|
@ -24,67 +24,117 @@ package org.apache.spark.ml.param
|
|||
*/
|
||||
|
||||
private[ml] trait HasRegParam extends Params {
|
||||
/** param for regularization parameter */
|
||||
/**
|
||||
* param for regularization parameter
|
||||
* @group param
|
||||
*/
|
||||
val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter")
|
||||
|
||||
/** @group getParam */
|
||||
def getRegParam: Double = get(regParam)
|
||||
}
|
||||
|
||||
private[ml] trait HasMaxIter extends Params {
|
||||
/** param for max number of iterations */
|
||||
/**
|
||||
* param for max number of iterations
|
||||
* @group param
|
||||
*/
|
||||
val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
|
||||
|
||||
/** @group getParam */
|
||||
def getMaxIter: Int = get(maxIter)
|
||||
}
|
||||
|
||||
private[ml] trait HasFeaturesCol extends Params {
|
||||
/** param for features column name */
|
||||
/**
|
||||
* param for features column name
|
||||
* @group param
|
||||
*/
|
||||
val featuresCol: Param[String] =
|
||||
new Param(this, "featuresCol", "features column name", Some("features"))
|
||||
|
||||
/** @group getParam */
|
||||
def getFeaturesCol: String = get(featuresCol)
|
||||
}
|
||||
|
||||
private[ml] trait HasLabelCol extends Params {
|
||||
/** param for label column name */
|
||||
/**
|
||||
* param for label column name
|
||||
* @group param
|
||||
*/
|
||||
val labelCol: Param[String] = new Param(this, "labelCol", "label column name", Some("label"))
|
||||
|
||||
/** @group getParam */
|
||||
def getLabelCol: String = get(labelCol)
|
||||
}
|
||||
|
||||
private[ml] trait HasPredictionCol extends Params {
|
||||
/** param for prediction column name */
|
||||
/**
|
||||
* param for prediction column name
|
||||
* @group param
|
||||
*/
|
||||
val predictionCol: Param[String] =
|
||||
new Param(this, "predictionCol", "prediction column name", Some("prediction"))
|
||||
|
||||
/** @group getParam */
|
||||
def getPredictionCol: String = get(predictionCol)
|
||||
}
|
||||
|
||||
private[ml] trait HasRawPredictionCol extends Params {
|
||||
/** param for raw prediction column name */
|
||||
/**
|
||||
* param for raw prediction column name
|
||||
* @group param
|
||||
*/
|
||||
val rawPredictionCol: Param[String] =
|
||||
new Param(this, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name",
|
||||
Some("rawPrediction"))
|
||||
|
||||
/** @group getParam */
|
||||
def getRawPredictionCol: String = get(rawPredictionCol)
|
||||
}
|
||||
|
||||
private[ml] trait HasProbabilityCol extends Params {
|
||||
/** param for predicted class conditional probabilities column name */
|
||||
/**
|
||||
* param for predicted class conditional probabilities column name
|
||||
* @group param
|
||||
*/
|
||||
val probabilityCol: Param[String] =
|
||||
new Param(this, "probabilityCol", "column name for predicted class conditional probabilities",
|
||||
Some("probability"))
|
||||
|
||||
/** @group getParam */
|
||||
def getProbabilityCol: String = get(probabilityCol)
|
||||
}
|
||||
|
||||
private[ml] trait HasThreshold extends Params {
|
||||
/** param for threshold in (binary) prediction */
|
||||
/**
|
||||
* param for threshold in (binary) prediction
|
||||
* @group param
|
||||
*/
|
||||
val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in prediction")
|
||||
|
||||
/** @group getParam */
|
||||
def getThreshold: Double = get(threshold)
|
||||
}
|
||||
|
||||
private[ml] trait HasInputCol extends Params {
|
||||
/** param for input column name */
|
||||
/**
|
||||
* param for input column name
|
||||
* @group param
|
||||
*/
|
||||
val inputCol: Param[String] = new Param(this, "inputCol", "input column name")
|
||||
|
||||
/** @group getParam */
|
||||
def getInputCol: String = get(inputCol)
|
||||
}
|
||||
|
||||
private[ml] trait HasOutputCol extends Params {
|
||||
/** param for output column name */
|
||||
/**
|
||||
* param for output column name
|
||||
* @group param
|
||||
*/
|
||||
val outputCol: Param[String] = new Param(this, "outputCol", "output column name")
|
||||
|
||||
/** @group getParam */
|
||||
def getOutputCol: String = get(outputCol)
|
||||
}
|
||||
|
|
|
@ -49,43 +49,89 @@ import org.apache.spark.util.random.XORShiftRandom
|
|||
private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam
|
||||
with HasPredictionCol {
|
||||
|
||||
/** Param for rank of the matrix factorization. */
|
||||
/**
|
||||
* Param for rank of the matrix factorization.
|
||||
* @group param
|
||||
*/
|
||||
val rank = new IntParam(this, "rank", "rank of the factorization", Some(10))
|
||||
|
||||
/** @group getParam */
|
||||
def getRank: Int = get(rank)
|
||||
|
||||
/** Param for number of user blocks. */
|
||||
/**
|
||||
* Param for number of user blocks.
|
||||
* @group param
|
||||
*/
|
||||
val numUserBlocks = new IntParam(this, "numUserBlocks", "number of user blocks", Some(10))
|
||||
|
||||
/** @group getParam */
|
||||
def getNumUserBlocks: Int = get(numUserBlocks)
|
||||
|
||||
/** Param for number of item blocks. */
|
||||
/**
|
||||
* Param for number of item blocks.
|
||||
* @group param
|
||||
*/
|
||||
val numItemBlocks =
|
||||
new IntParam(this, "numItemBlocks", "number of item blocks", Some(10))
|
||||
|
||||
/** @group getParam */
|
||||
def getNumItemBlocks: Int = get(numItemBlocks)
|
||||
|
||||
/** Param to decide whether to use implicit preference. */
|
||||
/**
|
||||
* Param to decide whether to use implicit preference.
|
||||
* @group param
|
||||
*/
|
||||
val implicitPrefs =
|
||||
new BooleanParam(this, "implicitPrefs", "whether to use implicit preference", Some(false))
|
||||
|
||||
/** @group getParam */
|
||||
def getImplicitPrefs: Boolean = get(implicitPrefs)
|
||||
|
||||
/** Param for the alpha parameter in the implicit preference formulation. */
|
||||
/**
|
||||
* Param for the alpha parameter in the implicit preference formulation.
|
||||
* @group param
|
||||
*/
|
||||
val alpha = new DoubleParam(this, "alpha", "alpha for implicit preference", Some(1.0))
|
||||
|
||||
/** @group getParam */
|
||||
def getAlpha: Double = get(alpha)
|
||||
|
||||
/** Param for the column name for user ids. */
|
||||
/**
|
||||
* Param for the column name for user ids.
|
||||
* @group param
|
||||
*/
|
||||
val userCol = new Param[String](this, "userCol", "column name for user ids", Some("user"))
|
||||
|
||||
/** @group getParam */
|
||||
def getUserCol: String = get(userCol)
|
||||
|
||||
/** Param for the column name for item ids. */
|
||||
/**
|
||||
* Param for the column name for item ids.
|
||||
* @group param
|
||||
*/
|
||||
val itemCol =
|
||||
new Param[String](this, "itemCol", "column name for item ids", Some("item"))
|
||||
|
||||
/** @group getParam */
|
||||
def getItemCol: String = get(itemCol)
|
||||
|
||||
/** Param for the column name for ratings. */
|
||||
/**
|
||||
* Param for the column name for ratings.
|
||||
* @group param
|
||||
*/
|
||||
val ratingCol = new Param[String](this, "ratingCol", "column name for ratings", Some("rating"))
|
||||
|
||||
/** @group getParam */
|
||||
def getRatingCol: String = get(ratingCol)
|
||||
|
||||
/**
|
||||
* Param for whether to apply nonnegativity constraints.
|
||||
* @group param
|
||||
*/
|
||||
val nonnegative = new BooleanParam(
|
||||
this, "nonnegative", "whether to use nonnegative constraint for least squares", Some(false))
|
||||
|
||||
/** @group getParam */
|
||||
val getNonnegative: Boolean = get(nonnegative)
|
||||
|
||||
/**
|
||||
|
@ -181,20 +227,46 @@ class ALS extends Estimator[ALSModel] with ALSParams {
|
|||
|
||||
import org.apache.spark.ml.recommendation.ALS.Rating
|
||||
|
||||
/** @group setParam */
|
||||
def setRank(value: Int): this.type = set(rank, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setNumUserBlocks(value: Int): this.type = set(numUserBlocks, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setNumItemBlocks(value: Int): this.type = set(numItemBlocks, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setImplicitPrefs(value: Boolean): this.type = set(implicitPrefs, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setAlpha(value: Double): this.type = set(alpha, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setUserCol(value: String): this.type = set(userCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setItemCol(value: String): this.type = set(itemCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setRatingCol(value: String): this.type = set(ratingCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setPredictionCol(value: String): this.type = set(predictionCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setMaxIter(value: Int): this.type = set(maxIter, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setRegParam(value: Double): this.type = set(regParam, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setNonnegative(value: Boolean): this.type = set(nonnegative, value)
|
||||
|
||||
/** Sets both numUserBlocks and numItemBlocks to the specific value. */
|
||||
/**
|
||||
* Sets both numUserBlocks and numItemBlocks to the specific value.
|
||||
* @group setParam
|
||||
*/
|
||||
def setNumBlocks(value: Int): this.type = {
|
||||
setNumUserBlocks(value)
|
||||
setNumItemBlocks(value)
|
||||
|
|
|
@ -44,7 +44,10 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
|
|||
setRegParam(0.1)
|
||||
setMaxIter(100)
|
||||
|
||||
/** @group setParam */
|
||||
def setRegParam(value: Double): this.type = set(regParam, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setMaxIter(value: Int): this.type = set(maxIter, value)
|
||||
|
||||
override protected def train(dataset: DataFrame, paramMap: ParamMap): LinearRegressionModel = {
|
||||
|
|
|
@ -31,22 +31,42 @@ import org.apache.spark.sql.types.StructType
|
|||
* Params for [[CrossValidator]] and [[CrossValidatorModel]].
|
||||
*/
|
||||
private[ml] trait CrossValidatorParams extends Params {
|
||||
/** param for the estimator to be cross-validated */
|
||||
/**
|
||||
* param for the estimator to be cross-validated
|
||||
* @group param
|
||||
*/
|
||||
val estimator: Param[Estimator[_]] = new Param(this, "estimator", "estimator for selection")
|
||||
|
||||
/** @group getParam */
|
||||
def getEstimator: Estimator[_] = get(estimator)
|
||||
|
||||
/** param for estimator param maps */
|
||||
/**
|
||||
* param for estimator param maps
|
||||
* @group param
|
||||
*/
|
||||
val estimatorParamMaps: Param[Array[ParamMap]] =
|
||||
new Param(this, "estimatorParamMaps", "param maps for the estimator")
|
||||
|
||||
/** @group getParam */
|
||||
def getEstimatorParamMaps: Array[ParamMap] = get(estimatorParamMaps)
|
||||
|
||||
/** param for the evaluator for selection */
|
||||
/**
|
||||
* param for the evaluator for selection
|
||||
* @group param
|
||||
*/
|
||||
val evaluator: Param[Evaluator] = new Param(this, "evaluator", "evaluator for selection")
|
||||
|
||||
/** @group getParam */
|
||||
def getEvaluator: Evaluator = get(evaluator)
|
||||
|
||||
/** param for number of folds for cross validation */
|
||||
/**
|
||||
* param for number of folds for cross validation
|
||||
* @group param
|
||||
*/
|
||||
val numFolds: IntParam =
|
||||
new IntParam(this, "numFolds", "number of folds for cross validation", Some(3))
|
||||
|
||||
/** @group getParam */
|
||||
def getNumFolds: Int = get(numFolds)
|
||||
}
|
||||
|
||||
|
@ -59,9 +79,16 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP
|
|||
|
||||
private val f2jBLAS = new F2jBLAS
|
||||
|
||||
/** @group setParam */
|
||||
def setEstimator(value: Estimator[_]): this.type = set(estimator, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setEvaluator(value: Evaluator): this.type = set(evaluator, value)
|
||||
|
||||
/** @group setParam */
|
||||
def setNumFolds(value: Int): this.type = set(numFolds, value)
|
||||
|
||||
override def fit(dataset: DataFrame, paramMap: ParamMap): CrossValidatorModel = {
|
||||
|
|
Loading…
Reference in a new issue