[SPARK-12296][PYSPARK][MLLIB] Feature parity for pyspark mllib standard scaler model
Some methods are missing, such as ways to access the std, mean, etc. This PR is for feature parity for pyspark.mllib.feature.StandardScaler & StandardScalerModel. Author: Holden Karau <holden@us.ibm.com> Closes #10298 from holdenk/SPARK-12296-feature-parity-pyspark-mllib-StandardScalerModel.
This commit is contained in:
parent
2235cd4440
commit
969d5665bb
|
@ -172,6 +172,38 @@ class StandardScalerModel(JavaVectorTransformer):
|
|||
self.call("setWithStd", withStd)
|
||||
return self
|
||||
|
||||
@property
|
||||
@since('2.0.0')
|
||||
def withStd(self):
|
||||
"""
|
||||
Returns if the model scales the data to unit standard deviation.
|
||||
"""
|
||||
return self.call("withStd")
|
||||
|
||||
@property
|
||||
@since('2.0.0')
|
||||
def withMean(self):
|
||||
"""
|
||||
Returns if the model centers the data before scaling.
|
||||
"""
|
||||
return self.call("withMean")
|
||||
|
||||
@property
|
||||
@since('2.0.0')
|
||||
def std(self):
|
||||
"""
|
||||
Return the column standard deviation values.
|
||||
"""
|
||||
return self.call("std")
|
||||
|
||||
@property
|
||||
@since('2.0.0')
|
||||
def mean(self):
|
||||
"""
|
||||
Return the column mean values.
|
||||
"""
|
||||
return self.call("mean")
|
||||
|
||||
|
||||
class StandardScaler(object):
|
||||
"""
|
||||
|
@ -196,6 +228,14 @@ class StandardScaler(object):
|
|||
>>> for r in result.collect(): r
|
||||
DenseVector([-0.7071, 0.7071, -0.7071])
|
||||
DenseVector([0.7071, -0.7071, 0.7071])
|
||||
>>> int(model.std[0])
|
||||
4
|
||||
>>> int(model.mean[0]*10)
|
||||
9
|
||||
>>> model.withStd
|
||||
True
|
||||
>>> model.withMean
|
||||
True
|
||||
|
||||
.. versionadded:: 1.2.0
|
||||
"""
|
||||
|
|
Loading…
Reference in a new issue