[SPARK-15281][PYSPARK][ML][TRIVIAL] Add impurity param to GBTRegressor & add experimental inside of regression.py

## What changes were proposed in this pull request?

Add impurity param to  GBTRegressor and mark the of the models & regressors in regression.py as experimental to match Scaladoc.

## How was this patch tested?

Added default value to init, tested with unit/doc tests.

Author: Holden Karau <holden@us.ibm.com>

Closes #13071 from holdenk/SPARK-15281-GBTRegressor-impurity.
This commit is contained in:
Holden Karau 2016-05-12 09:19:27 +02:00 committed by Nick Pentreath
parent 46991448aa
commit 5207a005cc

View file

@ -40,6 +40,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable): HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Linear regression. Linear regression.
The learning objective is to minimize the squared error, with regularization. The learning objective is to minimize the squared error, with regularization.
@ -123,6 +125,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Model fitted by LinearRegression. Model fitted by LinearRegression.
.. versionadded:: 1.4.0 .. versionadded:: 1.4.0
@ -631,6 +635,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval, DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol): HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
""" """
.. note:: Experimental
`Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_ `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
learning algorithm for regression. learning algorithm for regression.
It supports both continuous and categorical features. It supports both continuous and categorical features.
@ -713,7 +719,10 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
@inherit_doc @inherit_doc
class DecisionTreeModel(JavaModel): class DecisionTreeModel(JavaModel):
"""Abstraction for Decision Tree models. """
.. note:: Experimental
Abstraction for Decision Tree models.
.. versionadded:: 1.5.0 .. versionadded:: 1.5.0
""" """
@ -736,7 +745,10 @@ class DecisionTreeModel(JavaModel):
@inherit_doc @inherit_doc
class TreeEnsembleModels(JavaModel): class TreeEnsembleModels(JavaModel):
"""Represents a tree ensemble model. """
.. note:: Experimental
Represents a tree ensemble model.
.. versionadded:: 1.5.0 .. versionadded:: 1.5.0
""" """
@ -754,6 +766,8 @@ class TreeEnsembleModels(JavaModel):
@inherit_doc @inherit_doc
class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Model fitted by DecisionTreeRegressor. Model fitted by DecisionTreeRegressor.
.. versionadded:: 1.4.0 .. versionadded:: 1.4.0
@ -786,6 +800,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
RandomForestParams, TreeRegressorParams, HasCheckpointInterval, RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
JavaMLWritable, JavaMLReadable): JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_ `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
learning algorithm for regression. learning algorithm for regression.
It supports both continuous and categorical features. It supports both continuous and categorical features.
@ -868,6 +884,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Model fitted by RandomForestRegressor. Model fitted by RandomForestRegressor.
.. versionadded:: 1.4.0 .. versionadded:: 1.4.0
@ -892,8 +910,10 @@ class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLRead
@inherit_doc @inherit_doc
class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
JavaMLReadable): JavaMLReadable, TreeRegressorParams):
""" """
.. note:: Experimental
`Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_ `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
learning algorithm for regression. learning algorithm for regression.
It supports both continuous and categorical features. It supports both continuous and categorical features.
@ -904,6 +924,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
... (1.0, Vectors.dense(1.0)), ... (1.0, Vectors.dense(1.0)),
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
>>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42) >>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42)
>>> print(gbt.getImpurity())
variance
>>> model = gbt.fit(df) >>> model = gbt.fit(df)
>>> model.featureImportances >>> model.featureImportances
SparseVector(1, {0: 1.0}) SparseVector(1, {0: 1.0})
@ -940,19 +962,21 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None): checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
impurity="variance"):
""" """
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \ maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None) checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
impurity="variance")
""" """
super(GBTRegressor, self).__init__() super(GBTRegressor, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid) self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
seed=None) seed=None, impurity="variance")
kwargs = self.__init__._input_kwargs kwargs = self.__init__._input_kwargs
self.setParams(**kwargs) self.setParams(**kwargs)
@ -961,12 +985,14 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None): checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
impuriy="variance"):
""" """
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \ maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None) checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
impurity="variance")
Sets params for Gradient Boosted Tree Regression. Sets params for Gradient Boosted Tree Regression.
""" """
kwargs = self.setParams._input_kwargs kwargs = self.setParams._input_kwargs
@ -992,6 +1018,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Model fitted by GBTRegressor. Model fitted by GBTRegressor.
.. versionadded:: 1.4.0 .. versionadded:: 1.4.0
@ -1017,6 +1045,8 @@ class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
HasFitIntercept, HasMaxIter, HasTol, JavaMLWritable, JavaMLReadable): HasFitIntercept, HasMaxIter, HasTol, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Accelerated Failure Time (AFT) Model Survival Regression Accelerated Failure Time (AFT) Model Survival Regression
Fit a parametric AFT survival regression model based on the Weibull distribution Fit a parametric AFT survival regression model based on the Weibull distribution
@ -1157,6 +1187,8 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Model fitted by AFTSurvivalRegression. Model fitted by AFTSurvivalRegression.
.. versionadded:: 1.6.0 .. versionadded:: 1.6.0
@ -1204,6 +1236,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol, HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
HasSolver, JavaMLWritable, JavaMLReadable): HasSolver, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Generalized Linear Regression. Generalized Linear Regression.
Fit a Generalized Linear Model specified by giving a symbolic description of the linear Fit a Generalized Linear Model specified by giving a symbolic description of the linear
@ -1320,6 +1354,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
class GeneralizedLinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): class GeneralizedLinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
""" """
.. note:: Experimental
Model fitted by GeneralizedLinearRegression. Model fitted by GeneralizedLinearRegression.
.. versionadded:: 2.0.0 .. versionadded:: 2.0.0