[SPARK-15281][PYSPARK][ML][TRIVIAL] Add impurity param to GBTRegressor & add experimental inside of regression.py
## What changes were proposed in this pull request? Add impurity param to GBTRegressor and mark the of the models & regressors in regression.py as experimental to match Scaladoc. ## How was this patch tested? Added default value to init, tested with unit/doc tests. Author: Holden Karau <holden@us.ibm.com> Closes #13071 from holdenk/SPARK-15281-GBTRegressor-impurity.
This commit is contained in:
parent
46991448aa
commit
5207a005cc
|
@ -40,6 +40,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
|
||||||
HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
|
HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
|
||||||
HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable):
|
HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Linear regression.
|
Linear regression.
|
||||||
|
|
||||||
The learning objective is to minimize the squared error, with regularization.
|
The learning objective is to minimize the squared error, with regularization.
|
||||||
|
@ -123,6 +125,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
|
||||||
|
|
||||||
class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
|
class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Model fitted by LinearRegression.
|
Model fitted by LinearRegression.
|
||||||
|
|
||||||
.. versionadded:: 1.4.0
|
.. versionadded:: 1.4.0
|
||||||
|
@ -631,6 +635,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
|
||||||
DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
|
DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
|
||||||
HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
|
HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
`Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
|
`Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
|
||||||
learning algorithm for regression.
|
learning algorithm for regression.
|
||||||
It supports both continuous and categorical features.
|
It supports both continuous and categorical features.
|
||||||
|
@ -713,7 +719,10 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
|
||||||
|
|
||||||
@inherit_doc
|
@inherit_doc
|
||||||
class DecisionTreeModel(JavaModel):
|
class DecisionTreeModel(JavaModel):
|
||||||
"""Abstraction for Decision Tree models.
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
|
Abstraction for Decision Tree models.
|
||||||
|
|
||||||
.. versionadded:: 1.5.0
|
.. versionadded:: 1.5.0
|
||||||
"""
|
"""
|
||||||
|
@ -736,7 +745,10 @@ class DecisionTreeModel(JavaModel):
|
||||||
|
|
||||||
@inherit_doc
|
@inherit_doc
|
||||||
class TreeEnsembleModels(JavaModel):
|
class TreeEnsembleModels(JavaModel):
|
||||||
"""Represents a tree ensemble model.
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
|
Represents a tree ensemble model.
|
||||||
|
|
||||||
.. versionadded:: 1.5.0
|
.. versionadded:: 1.5.0
|
||||||
"""
|
"""
|
||||||
|
@ -754,6 +766,8 @@ class TreeEnsembleModels(JavaModel):
|
||||||
@inherit_doc
|
@inherit_doc
|
||||||
class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
|
class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Model fitted by DecisionTreeRegressor.
|
Model fitted by DecisionTreeRegressor.
|
||||||
|
|
||||||
.. versionadded:: 1.4.0
|
.. versionadded:: 1.4.0
|
||||||
|
@ -786,6 +800,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
|
||||||
RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
|
RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
|
||||||
JavaMLWritable, JavaMLReadable):
|
JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
|
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
|
||||||
learning algorithm for regression.
|
learning algorithm for regression.
|
||||||
It supports both continuous and categorical features.
|
It supports both continuous and categorical features.
|
||||||
|
@ -868,6 +884,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
|
||||||
|
|
||||||
class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
|
class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Model fitted by RandomForestRegressor.
|
Model fitted by RandomForestRegressor.
|
||||||
|
|
||||||
.. versionadded:: 1.4.0
|
.. versionadded:: 1.4.0
|
||||||
|
@ -892,8 +910,10 @@ class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLRead
|
||||||
@inherit_doc
|
@inherit_doc
|
||||||
class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
|
class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
|
||||||
GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
|
GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
|
||||||
JavaMLReadable):
|
JavaMLReadable, TreeRegressorParams):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
`Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
|
`Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
|
||||||
learning algorithm for regression.
|
learning algorithm for regression.
|
||||||
It supports both continuous and categorical features.
|
It supports both continuous and categorical features.
|
||||||
|
@ -904,6 +924,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
|
||||||
... (1.0, Vectors.dense(1.0)),
|
... (1.0, Vectors.dense(1.0)),
|
||||||
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
|
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
|
||||||
>>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42)
|
>>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42)
|
||||||
|
>>> print(gbt.getImpurity())
|
||||||
|
variance
|
||||||
>>> model = gbt.fit(df)
|
>>> model = gbt.fit(df)
|
||||||
>>> model.featureImportances
|
>>> model.featureImportances
|
||||||
SparseVector(1, {0: 1.0})
|
SparseVector(1, {0: 1.0})
|
||||||
|
@ -940,19 +962,21 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
|
||||||
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
|
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
|
||||||
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
|
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
|
||||||
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
|
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
|
||||||
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None):
|
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
|
||||||
|
impurity="variance"):
|
||||||
"""
|
"""
|
||||||
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
|
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
|
||||||
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
|
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
|
||||||
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
|
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
|
||||||
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None)
|
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
|
||||||
|
impurity="variance")
|
||||||
"""
|
"""
|
||||||
super(GBTRegressor, self).__init__()
|
super(GBTRegressor, self).__init__()
|
||||||
self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
|
self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
|
||||||
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
|
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
|
||||||
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
|
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
|
||||||
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
|
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
|
||||||
seed=None)
|
seed=None, impurity="variance")
|
||||||
kwargs = self.__init__._input_kwargs
|
kwargs = self.__init__._input_kwargs
|
||||||
self.setParams(**kwargs)
|
self.setParams(**kwargs)
|
||||||
|
|
||||||
|
@ -961,12 +985,14 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
|
||||||
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
|
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
|
||||||
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
|
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
|
||||||
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
|
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
|
||||||
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None):
|
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
|
||||||
|
impuriy="variance"):
|
||||||
"""
|
"""
|
||||||
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
|
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
|
||||||
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
|
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
|
||||||
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
|
maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
|
||||||
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None)
|
checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
|
||||||
|
impurity="variance")
|
||||||
Sets params for Gradient Boosted Tree Regression.
|
Sets params for Gradient Boosted Tree Regression.
|
||||||
"""
|
"""
|
||||||
kwargs = self.setParams._input_kwargs
|
kwargs = self.setParams._input_kwargs
|
||||||
|
@ -992,6 +1018,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
|
||||||
|
|
||||||
class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
|
class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Model fitted by GBTRegressor.
|
Model fitted by GBTRegressor.
|
||||||
|
|
||||||
.. versionadded:: 1.4.0
|
.. versionadded:: 1.4.0
|
||||||
|
@ -1017,6 +1045,8 @@ class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
|
||||||
class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
|
class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
|
||||||
HasFitIntercept, HasMaxIter, HasTol, JavaMLWritable, JavaMLReadable):
|
HasFitIntercept, HasMaxIter, HasTol, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Accelerated Failure Time (AFT) Model Survival Regression
|
Accelerated Failure Time (AFT) Model Survival Regression
|
||||||
|
|
||||||
Fit a parametric AFT survival regression model based on the Weibull distribution
|
Fit a parametric AFT survival regression model based on the Weibull distribution
|
||||||
|
@ -1157,6 +1187,8 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
|
||||||
|
|
||||||
class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
|
class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Model fitted by AFTSurvivalRegression.
|
Model fitted by AFTSurvivalRegression.
|
||||||
|
|
||||||
.. versionadded:: 1.6.0
|
.. versionadded:: 1.6.0
|
||||||
|
@ -1204,6 +1236,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
|
||||||
HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
|
HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
|
||||||
HasSolver, JavaMLWritable, JavaMLReadable):
|
HasSolver, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Generalized Linear Regression.
|
Generalized Linear Regression.
|
||||||
|
|
||||||
Fit a Generalized Linear Model specified by giving a symbolic description of the linear
|
Fit a Generalized Linear Model specified by giving a symbolic description of the linear
|
||||||
|
@ -1320,6 +1354,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
|
||||||
|
|
||||||
class GeneralizedLinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
|
class GeneralizedLinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
|
||||||
"""
|
"""
|
||||||
|
.. note:: Experimental
|
||||||
|
|
||||||
Model fitted by GeneralizedLinearRegression.
|
Model fitted by GeneralizedLinearRegression.
|
||||||
|
|
||||||
.. versionadded:: 2.0.0
|
.. versionadded:: 2.0.0
|
||||||
|
|
Loading…
Reference in a new issue