[SPARK-10265][DOCUMENTATION, ML] Fixed @Since annotation to ml.regression
Here is my first commit. Author: Ehsan M.Kermani <ehsanmo1367@gmail.com> Closes #8728 from ehsanmok/SinceAnn.
This commit is contained in:
parent
6b87acd664
commit
f80f7b69a3
|
@ -17,7 +17,7 @@
|
|||
|
||||
package org.apache.spark.ml.regression
|
||||
|
||||
import org.apache.spark.annotation.Experimental
|
||||
import org.apache.spark.annotation.{Experimental, Since}
|
||||
import org.apache.spark.ml.{PredictionModel, Predictor}
|
||||
import org.apache.spark.ml.param.ParamMap
|
||||
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
|
||||
|
@ -36,30 +36,39 @@ import org.apache.spark.sql.DataFrame
|
|||
* for regression.
|
||||
* It supports both continuous and categorical features.
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
final class DecisionTreeRegressor(override val uid: String)
|
||||
final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
|
||||
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
|
||||
with DecisionTreeParams with TreeRegressorParams {
|
||||
|
||||
@Since("1.4.0")
|
||||
def this() = this(Identifiable.randomUID("dtr"))
|
||||
|
||||
// Override parameter setters from parent trait for Java API compatibility.
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMinInstancesPerNode(value: Int): this.type =
|
||||
super.setMinInstancesPerNode(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setImpurity(value: String): this.type = super.setImpurity(value)
|
||||
|
||||
override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = {
|
||||
|
@ -78,9 +87,11 @@ final class DecisionTreeRegressor(override val uid: String)
|
|||
subsamplingRate = 1.0)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
object DecisionTreeRegressor {
|
||||
/** Accessor for supported impurities: variance */
|
||||
|
@ -93,6 +104,7 @@ object DecisionTreeRegressor {
|
|||
* It supports both continuous and categorical features.
|
||||
* @param rootNode Root of the decision tree
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
final class DecisionTreeRegressionModel private[ml] (
|
||||
override val uid: String,
|
||||
|
@ -115,10 +127,12 @@ final class DecisionTreeRegressionModel private[ml] (
|
|||
rootNode.predictImpl(features).prediction
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): DecisionTreeRegressionModel = {
|
||||
copyValues(new DecisionTreeRegressionModel(uid, rootNode, numFeatures), extra).setParent(parent)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def toString: String = {
|
||||
s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes"
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
|
|||
import com.github.fommil.netlib.BLAS.{getInstance => blas}
|
||||
|
||||
import org.apache.spark.Logging
|
||||
import org.apache.spark.annotation.Experimental
|
||||
import org.apache.spark.annotation.{Experimental, Since}
|
||||
import org.apache.spark.ml.{PredictionModel, Predictor}
|
||||
import org.apache.spark.ml.param.{Param, ParamMap}
|
||||
import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
|
||||
|
@ -42,54 +42,65 @@ import org.apache.spark.sql.types.DoubleType
|
|||
* learning algorithm for regression.
|
||||
* It supports both continuous and categorical features.
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
final class GBTRegressor(override val uid: String)
|
||||
final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
|
||||
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
|
||||
with GBTParams with TreeRegressorParams with Logging {
|
||||
|
||||
@Since("1.4.0")
|
||||
def this() = this(Identifiable.randomUID("gbtr"))
|
||||
|
||||
// Override parameter setters from parent trait for Java API compatibility.
|
||||
|
||||
// Parameters from TreeRegressorParams:
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMinInstancesPerNode(value: Int): this.type =
|
||||
super.setMinInstancesPerNode(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
|
||||
|
||||
/**
|
||||
* The impurity setting is ignored for GBT models.
|
||||
* Individual trees are built using impurity "Variance."
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
override def setImpurity(value: String): this.type = {
|
||||
logWarning("GBTRegressor.setImpurity should NOT be used")
|
||||
this
|
||||
}
|
||||
|
||||
// Parameters from TreeEnsembleParams:
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setSeed(value: Long): this.type = {
|
||||
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
|
||||
super.setSeed(value)
|
||||
}
|
||||
|
||||
// Parameters from GBTParams:
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxIter(value: Int): this.type = super.setMaxIter(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setStepSize(value: Double): this.type = super.setStepSize(value)
|
||||
|
||||
// Parameters for GBTRegressor:
|
||||
|
@ -100,6 +111,7 @@ final class GBTRegressor(override val uid: String)
|
|||
* (default = squared)
|
||||
* @group param
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
|
||||
" tries to minimize (case-insensitive). Supported options:" +
|
||||
s" ${GBTRegressor.supportedLossTypes.mkString(", ")}",
|
||||
|
@ -108,9 +120,11 @@ final class GBTRegressor(override val uid: String)
|
|||
setDefault(lossType -> "squared")
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.4.0")
|
||||
def setLossType(value: String): this.type = set(lossType, value)
|
||||
|
||||
/** @group getParam */
|
||||
@Since("1.4.0")
|
||||
def getLossType: String = $(lossType).toLowerCase
|
||||
|
||||
/** (private[ml]) Convert new loss to old loss. */
|
||||
|
@ -135,13 +149,16 @@ final class GBTRegressor(override val uid: String)
|
|||
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures, numFeatures)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
object GBTRegressor {
|
||||
// The losses below should be lowercase.
|
||||
/** Accessor for supported loss settings: squared (L2), absolute (L1) */
|
||||
@Since("1.4.0")
|
||||
final val supportedLossTypes: Array[String] = Array("squared", "absolute").map(_.toLowerCase)
|
||||
}
|
||||
|
||||
|
@ -154,6 +171,7 @@ object GBTRegressor {
|
|||
* @param _trees Decision trees in the ensemble.
|
||||
* @param _treeWeights Weights for the decision trees in the ensemble.
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
final class GBTRegressionModel private[ml](
|
||||
override val uid: String,
|
||||
|
@ -172,11 +190,14 @@ final class GBTRegressionModel private[ml](
|
|||
* @param _trees Decision trees in the ensemble.
|
||||
* @param _treeWeights Weights for the decision trees in the ensemble.
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) =
|
||||
this(uid, _trees, _treeWeights, -1)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
|
||||
|
||||
@Since("1.4.0")
|
||||
override def treeWeights: Array[Double] = _treeWeights
|
||||
|
||||
override protected def transformImpl(dataset: DataFrame): DataFrame = {
|
||||
|
@ -194,11 +215,13 @@ final class GBTRegressionModel private[ml](
|
|||
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): GBTRegressionModel = {
|
||||
copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures),
|
||||
extra).setParent(parent)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def toString: String = {
|
||||
s"GBTRegressionModel (uid=$uid) with $numTrees trees"
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.spark.ml.regression
|
||||
|
||||
import org.apache.spark.Logging
|
||||
import org.apache.spark.annotation.Experimental
|
||||
import org.apache.spark.annotation.{Experimental, Since}
|
||||
import org.apache.spark.ml.{Estimator, Model}
|
||||
import org.apache.spark.ml.param._
|
||||
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol}
|
||||
|
@ -124,32 +124,42 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
|
|||
*
|
||||
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
@Experimental
|
||||
class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel]
|
||||
with IsotonicRegressionBase {
|
||||
class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
|
||||
extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase {
|
||||
|
||||
@Since("1.5.0")
|
||||
def this() = this(Identifiable.randomUID("isoReg"))
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setLabelCol(value: String): this.type = set(labelCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setPredictionCol(value: String): this.type = set(predictionCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setIsotonic(value: Boolean): this.type = set(isotonic, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setWeightCol(value: String): this.type = set(weightCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)
|
||||
|
||||
@Since("1.5.0")
|
||||
override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra)
|
||||
|
||||
@Since("1.5.0")
|
||||
override def fit(dataset: DataFrame): IsotonicRegressionModel = {
|
||||
validateAndTransformSchema(dataset.schema, fitting = true)
|
||||
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
|
||||
|
@ -163,6 +173,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
|
|||
copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this))
|
||||
}
|
||||
|
||||
@Since("1.5.0")
|
||||
override def transformSchema(schema: StructType): StructType = {
|
||||
validateAndTransformSchema(schema, fitting = true)
|
||||
}
|
||||
|
@ -178,6 +189,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
|
|||
* @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]]
|
||||
* model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
@Experimental
|
||||
class IsotonicRegressionModel private[ml] (
|
||||
override val uid: String,
|
||||
|
@ -185,27 +197,34 @@ class IsotonicRegressionModel private[ml] (
|
|||
extends Model[IsotonicRegressionModel] with IsotonicRegressionBase {
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setPredictionCol(value: String): this.type = set(predictionCol, value)
|
||||
|
||||
/** @group setParam */
|
||||
@Since("1.5.0")
|
||||
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)
|
||||
|
||||
/** Boundaries in increasing order for which predictions are known. */
|
||||
@Since("1.5.0")
|
||||
def boundaries: Vector = Vectors.dense(oldModel.boundaries)
|
||||
|
||||
/**
|
||||
* Predictions associated with the boundaries at the same index, monotone because of isotonic
|
||||
* regression.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
def predictions: Vector = Vectors.dense(oldModel.predictions)
|
||||
|
||||
@Since("1.5.0")
|
||||
override def copy(extra: ParamMap): IsotonicRegressionModel = {
|
||||
copyValues(new IsotonicRegressionModel(uid, oldModel), extra).setParent(parent)
|
||||
}
|
||||
|
||||
@Since("1.5.0")
|
||||
override def transform(dataset: DataFrame): DataFrame = {
|
||||
val predict = dataset.schema($(featuresCol)).dataType match {
|
||||
case DoubleType =>
|
||||
|
@ -217,6 +236,7 @@ class IsotonicRegressionModel private[ml] (
|
|||
dataset.withColumn($(predictionCol), predict(col($(featuresCol))))
|
||||
}
|
||||
|
||||
@Since("1.5.0")
|
||||
override def transformSchema(schema: StructType): StructType = {
|
||||
validateAndTransformSchema(schema, fitting = false)
|
||||
}
|
||||
|
|
|
@ -24,9 +24,9 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS,
|
|||
import breeze.stats.distributions.StudentsT
|
||||
|
||||
import org.apache.spark.{Logging, SparkException}
|
||||
import org.apache.spark.annotation.Experimental
|
||||
import org.apache.spark.ml.feature.Instance
|
||||
import org.apache.spark.ml.optim.WeightedLeastSquares
|
||||
import org.apache.spark.annotation.{Experimental, Since}
|
||||
import org.apache.spark.ml.PredictorParams
|
||||
import org.apache.spark.ml.param.ParamMap
|
||||
import org.apache.spark.ml.param.shared._
|
||||
|
@ -61,11 +61,13 @@ private[regression] trait LinearRegressionParams extends PredictorParams
|
|||
* - L1 (Lasso)
|
||||
* - L2 + L1 (elastic net)
|
||||
*/
|
||||
@Since("1.3.0")
|
||||
@Experimental
|
||||
class LinearRegression(override val uid: String)
|
||||
class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
|
||||
extends Regressor[Vector, LinearRegression, LinearRegressionModel]
|
||||
with LinearRegressionParams with Logging {
|
||||
|
||||
@Since("1.4.0")
|
||||
def this() = this(Identifiable.randomUID("linReg"))
|
||||
|
||||
/**
|
||||
|
@ -73,6 +75,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is 0.0.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.3.0")
|
||||
def setRegParam(value: Double): this.type = set(regParam, value)
|
||||
setDefault(regParam -> 0.0)
|
||||
|
||||
|
@ -81,6 +84,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is true.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
|
||||
setDefault(fitIntercept -> true)
|
||||
|
||||
|
@ -93,6 +97,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is true.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
def setStandardization(value: Boolean): this.type = set(standardization, value)
|
||||
setDefault(standardization -> true)
|
||||
|
||||
|
@ -103,6 +108,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is 0.0 which is an L2 penalty.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
|
||||
setDefault(elasticNetParam -> 0.0)
|
||||
|
||||
|
@ -111,6 +117,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is 100.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.3.0")
|
||||
def setMaxIter(value: Int): this.type = set(maxIter, value)
|
||||
setDefault(maxIter -> 100)
|
||||
|
||||
|
@ -120,6 +127,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is 1E-6.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
def setTol(value: Double): this.type = set(tol, value)
|
||||
setDefault(tol -> 1E-6)
|
||||
|
||||
|
@ -129,6 +137,7 @@ class LinearRegression(override val uid: String)
|
|||
* Default is empty, so all instances have weight one.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.6.0")
|
||||
def setWeightCol(value: String): this.type = set(weightCol, value)
|
||||
setDefault(weightCol -> "")
|
||||
|
||||
|
@ -139,6 +148,7 @@ class LinearRegression(override val uid: String)
|
|||
* selected automatically.
|
||||
* @group setParam
|
||||
*/
|
||||
@Since("1.6.0")
|
||||
def setSolver(value: String): this.type = set(solver, value)
|
||||
setDefault(solver -> "auto")
|
||||
|
||||
|
@ -329,6 +339,7 @@ class LinearRegression(override val uid: String)
|
|||
model.setSummary(trainingSummary)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra)
|
||||
}
|
||||
|
||||
|
@ -336,6 +347,7 @@ class LinearRegression(override val uid: String)
|
|||
* :: Experimental ::
|
||||
* Model produced by [[LinearRegression]].
|
||||
*/
|
||||
@Since("1.3.0")
|
||||
@Experimental
|
||||
class LinearRegressionModel private[ml] (
|
||||
override val uid: String,
|
||||
|
@ -355,6 +367,7 @@ class LinearRegressionModel private[ml] (
|
|||
* Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is
|
||||
* thrown if `trainingSummary == None`.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
def summary: LinearRegressionTrainingSummary = trainingSummary match {
|
||||
case Some(summ) => summ
|
||||
case None =>
|
||||
|
@ -369,6 +382,7 @@ class LinearRegressionModel private[ml] (
|
|||
}
|
||||
|
||||
/** Indicates whether a training summary exists for this model instance. */
|
||||
@Since("1.5.0")
|
||||
def hasSummary: Boolean = trainingSummary.isDefined
|
||||
|
||||
/**
|
||||
|
@ -402,6 +416,7 @@ class LinearRegressionModel private[ml] (
|
|||
dot(features, coefficients) + intercept
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): LinearRegressionModel = {
|
||||
val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra)
|
||||
if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
|
||||
|
@ -416,6 +431,7 @@ class LinearRegressionModel private[ml] (
|
|||
* @param predictions predictions outputted by the model's `transform` method.
|
||||
* @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
@Experimental
|
||||
class LinearRegressionTrainingSummary private[regression] (
|
||||
predictions: DataFrame,
|
||||
|
@ -428,6 +444,7 @@ class LinearRegressionTrainingSummary private[regression] (
|
|||
extends LinearRegressionSummary(predictions, predictionCol, labelCol, model, diagInvAtWA) {
|
||||
|
||||
/** Number of training iterations until termination */
|
||||
@Since("1.5.0")
|
||||
val totalIterations = objectiveHistory.length
|
||||
|
||||
}
|
||||
|
@ -437,6 +454,7 @@ class LinearRegressionTrainingSummary private[regression] (
|
|||
* Linear regression results evaluated on a dataset.
|
||||
* @param predictions predictions outputted by the model's `transform` method.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
@Experimental
|
||||
class LinearRegressionSummary private[regression] (
|
||||
@transient val predictions: DataFrame,
|
||||
|
@ -455,33 +473,39 @@ class LinearRegressionSummary private[regression] (
|
|||
* explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
|
||||
* Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
val explainedVariance: Double = metrics.explainedVariance
|
||||
|
||||
/**
|
||||
* Returns the mean absolute error, which is a risk function corresponding to the
|
||||
* expected value of the absolute error loss or l1-norm loss.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
val meanAbsoluteError: Double = metrics.meanAbsoluteError
|
||||
|
||||
/**
|
||||
* Returns the mean squared error, which is a risk function corresponding to the
|
||||
* expected value of the squared error loss or quadratic loss.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
val meanSquaredError: Double = metrics.meanSquaredError
|
||||
|
||||
/**
|
||||
* Returns the root mean squared error, which is defined as the square root of
|
||||
* the mean squared error.
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
val rootMeanSquaredError: Double = metrics.rootMeanSquaredError
|
||||
|
||||
/**
|
||||
* Returns R^2^, the coefficient of determination.
|
||||
* Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
|
||||
*/
|
||||
@Since("1.5.0")
|
||||
val r2: Double = metrics.r2
|
||||
|
||||
/** Residuals (label - predicted value) */
|
||||
@Since("1.5.0")
|
||||
@transient lazy val residuals: DataFrame = {
|
||||
val t = udf { (pred: Double, label: Double) => label - pred }
|
||||
predictions.select(t(col(predictionCol), col(labelCol)).as("residuals"))
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
package org.apache.spark.ml.regression
|
||||
|
||||
import org.apache.spark.annotation.Experimental
|
||||
import org.apache.spark.annotation.{Experimental, Since}
|
||||
import org.apache.spark.ml.{PredictionModel, Predictor}
|
||||
import org.apache.spark.ml.param.ParamMap
|
||||
import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams}
|
||||
|
@ -37,44 +37,55 @@ import org.apache.spark.sql.functions._
|
|||
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
|
||||
* It supports both continuous and categorical features.
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
final class RandomForestRegressor(override val uid: String)
|
||||
final class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
|
||||
extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
|
||||
with RandomForestParams with TreeRegressorParams {
|
||||
|
||||
@Since("1.4.0")
|
||||
def this() = this(Identifiable.randomUID("rfr"))
|
||||
|
||||
// Override parameter setters from parent trait for Java API compatibility.
|
||||
|
||||
// Parameters from TreeRegressorParams:
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMinInstancesPerNode(value: Int): this.type =
|
||||
super.setMinInstancesPerNode(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setImpurity(value: String): this.type = super.setImpurity(value)
|
||||
|
||||
// Parameters from TreeEnsembleParams:
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setSeed(value: Long): this.type = super.setSeed(value)
|
||||
|
||||
// Parameters from RandomForestParams:
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def setFeatureSubsetStrategy(value: String): this.type =
|
||||
super.setFeatureSubsetStrategy(value)
|
||||
|
||||
|
@ -91,15 +102,19 @@ final class RandomForestRegressor(override val uid: String)
|
|||
new RandomForestRegressionModel(trees, numFeatures)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): RandomForestRegressor = defaultCopy(extra)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
object RandomForestRegressor {
|
||||
/** Accessor for supported impurity settings: variance */
|
||||
@Since("1.4.0")
|
||||
final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
|
||||
|
||||
/** Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2 */
|
||||
@Since("1.4.0")
|
||||
final val supportedFeatureSubsetStrategies: Array[String] =
|
||||
RandomForestParams.supportedFeatureSubsetStrategies
|
||||
}
|
||||
|
@ -111,6 +126,7 @@ object RandomForestRegressor {
|
|||
* @param _trees Decision trees in the ensemble.
|
||||
* @param numFeatures Number of features used by this model
|
||||
*/
|
||||
@Since("1.4.0")
|
||||
@Experimental
|
||||
final class RandomForestRegressionModel private[ml] (
|
||||
override val uid: String,
|
||||
|
@ -128,11 +144,13 @@ final class RandomForestRegressionModel private[ml] (
|
|||
private[ml] def this(trees: Array[DecisionTreeRegressionModel], numFeatures: Int) =
|
||||
this(Identifiable.randomUID("rfr"), trees, numFeatures)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
|
||||
|
||||
// Note: We may add support for weights (based on tree performance) later on.
|
||||
private lazy val _treeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0)
|
||||
|
||||
@Since("1.4.0")
|
||||
override def treeWeights: Array[Double] = _treeWeights
|
||||
|
||||
override protected def transformImpl(dataset: DataFrame): DataFrame = {
|
||||
|
@ -150,10 +168,12 @@ final class RandomForestRegressionModel private[ml] (
|
|||
_trees.map(_.rootNode.predictImpl(features).prediction).sum / numTrees
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def copy(extra: ParamMap): RandomForestRegressionModel = {
|
||||
copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), extra).setParent(parent)
|
||||
}
|
||||
|
||||
@Since("1.4.0")
|
||||
override def toString: String = {
|
||||
s"RandomForestRegressionModel (uid=$uid) with $numTrees trees"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue