spark-instrumented-optimizer/python/pyspark/ml/classification.pyi
zhengruifeng a2887164bc [SPARK-32907][ML][PYTHON] adaptively blockify instances - LinearSVC
### What changes were proposed in this pull request?
1, use `maxBlockSizeInMB` instead of `blockSize`(#rows) to control the stacking of vectors;
2, infer an appropriate `maxBlockSizeInMB` if set 0;

### Why are the changes needed?
the performance gain is mainly related to the nnz of block.

f2jBLAS |   |   |   |   |   |   |   |   |   |   |   |   |  
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
Duration(millisecond) | branch 3.0 Impl | blockSizeInMB=0.0625 | blockSizeInMB=0.125 | blockSizeInMB=0.25 | blockSizeInMB=0.5 | blockSizeInMB=1 | blockSizeInMB=2 | blockSizeInMB=4 | blockSizeInMB=8 | blockSizeInMB=16 | blockSizeInMB=32 | blockSizeInMB=64 | blockSizeInMB=128
epsilon(100%) | 326481 | 26143 | 25710 | 24726 | 25395 | 25840 | 26846 | 25927 | 27431 | 26190 | 26056 | 26347 | 27204
epsilon3000(67%) | 455247 | 35893 | 34366 | 34985 | 38387 | 38901 | 40426 | 40044 | 39161 | 38767 | 39965 | 39523 | 39108
epsilon4000(50%) | 306390 | 42256 | 41164 | 43748 | 48638 | 50892 | 50986 | 51091 | 51072 | 51289 | 51652 | 53312 | 52146
epsilon5000(40%) | 307619 | 43639 | 42992 | 44743 | 50800 | 51939 | 51871 | 52190 | 53850 | 52607 | 51062 | 52509 | 51570
epsilon10000(20%) | 310070 | 58371 | 55921 | 56317 | 56618 | 53694 | 52131 | 51768 | 51728 | 52233 | 51881 | 51653 | 52440
epsilon20000(10%) | 316565 | 109193 | 95121 | 82764 | 69653 | 60764 | 56066 | 53371 | 52822 | 52872 | 52769 | 52527 | 53508
epsilon200000(1%) | 336181 | 1569721 | 1069355 | 673718 | 375043 | 218230 | 145393 | 110926 | 94327 | 87039 | 83926 | 81890 | 81787
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  | Speedup |   |   |   |   |   |   |   |   |   |   |   |  
epsilon(100%) | 1 | 12.48827602 | 12.69859977 | **13.20395535** | 12.85611341 | 12.63471362 | 12.16125307 | 12.59231689 | 11.90189931 | 12.46586483 | 12.5299739 | 12.39158158 | 12.00121306
epsilon3000(67%) | 1 | 12.68344803 | **13.2470174** | 13.01263399 | 11.85940553 | 11.70270687 | 11.26124276 | 11.36866946 | 11.62500958 | 11.74315784 | 11.39114225 | 11.51853351 | 11.64076404
epsilon4000(50%) | 1 | 7.250804619 | **7.443154212** | 7.003520161 | 6.299395534 | 6.020396133 | 6.00929667 | 5.996946625 | 5.999177632 | 5.973795551 | 5.931812902 | 5.747111345 | 5.875618456
epsilon5000(40%) | 1 | 7.049176196 | **7.155261444** | 6.875243055 | 6.055492126 | 5.92269778 | 5.930462108 | 5.894213451 | 5.712516249 | 5.847491779 | 6.024421292 | 5.858405226 | 5.965076595
epsilon10000(20%) | 1 | 5.312055644 | 5.544786395 | 5.505797539 | 5.4765269 | 5.774760681 | 5.947900481 | 5.98960748 | 5.994239097 | 5.93628549 | 5.976561747 | **6.002942714** | 5.912852784
epsilon20000(10%) | 1 | 2.899132728 | 3.328024306 | 3.824911797 | 4.544886796 | 5.209745902 | 5.64629187 | 5.931404695 | 5.993052137 | 5.987384627 | 5.999071425 | **6.026710073** | 5.916218136
epsilon200000(1%) | 1 | 0.214166084 | 0.314377358 | 0.498993644 | 0.896379882 | 1.540489392 | 2.312222734 | 3.03067811 | 3.563995463 | 3.862417997 | 4.005683578 | 4.105275369 | **4.110445425**

OpenBLAS |   |   |   |   |   |   |   |   |   |   |   |   |  
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
Duration(millisecond) | branch 3.0 Impl | blockSizeInMB=0.0625 | blockSizeInMB=0.125 | blockSizeInMB=0.25 | blockSizeInMB=0.5 | blockSizeInMB=1 | blockSizeInMB=2 | blockSizeInMB=4 | blockSizeInMB=8 | blockSizeInMB=16 | blockSizeInMB=32 | blockSizeInMB=64 | blockSizeInMB=128
epsilon(100%) | 299119 | 26047 | 25049 | 25239 | 28001 | 35138 | 36438 | 36279 | 36114 | 35111 | 35428 | 36295 | 35197
epsilon3000(67%) | 439798 | 33321 | 34423 | 34336 | 38906 | 51756 | 54138 | 54085 | 53412 | 54766 | 54425 | 54221 | 54842
epsilon4000(50%) | 302963 | 42960 | 40678 | 43483 | 48254 | 50888 | 54990 | 52647 | 51947 | 51843 | 52891 | 53410 | 52020
epsilon5000(40%) | 303569 | 44225 | 44961 | 45065 | 51768 | 52776 | 51930 | 53587 | 53104 | 51833 | 52138 | 52574 | 53756
epsilon10000(20%) | 307403 | 58447 | 55993 | 56757 | 56694 | 54038 | 52734 | 52073 | 52051 | 52150 | 51986 | 52407 | 52390
epsilon20000(10%) | 313344 | 107580 | 94679 | 83329 | 70226 | 60996 | 57130 | 55461 | 54641 | 52712 | 52541 | 53101 | 53312
epsilon200000(1%) | 334679 | 1642726 | 1073148 | 654481 | 364974 | 213881 | 140248 | 107579 | 91757 | 85090 | 81940 | 80492 | 80250
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  | Speedup |   |   |   |   |   |   |   |   |   |   |   |  
epsilon(100%) | 1 | 11.48381771 | **11.94135494** | 11.85146004 | 10.68243991 | 8.512692811 | 8.208985125 | 8.244962651 | 8.282632774 | 8.519238985 | 8.443011178 | 8.241328007 | 8.498423161
epsilon3000(67%) | 1 | 13.19882356 | 12.7762833 | **12.80865564** | 11.30411762 | 8.497526857 | 8.123646976 | 8.131607655 | 8.234067251 | 8.030493372 | 8.080808452 | 8.111211523 | 8.01936472
epsilon4000(50%) | 1 | 7.052211359 | **7.44783421** | 6.967389555 | 6.278505409 | 5.953525389 | 5.509419895 | 5.754610899 | 5.832155851 | 5.843855487 | 5.728063376 | 5.672402172 | 5.823971549
epsilon5000(40%) | 1 | **6.86419446** | 6.751829363 | 6.736247642 | 5.864027971 | 5.752027437 | 5.845734643 | 5.664974714 | 5.716499699 | 5.856674319 | 5.822413595 | 5.774127896 | 5.647164968
epsilon10000(20%) | 1 | 5.259517169 | 5.490025539 | 5.416124883 | 5.422143437 | 5.688645028 | 5.829313157 | 5.903308816 | 5.905803923 | 5.894592522 | **5.913188166** | 5.865685882 | 5.867589235
epsilon20000(10%) | 1 | 2.912660346 | 3.309540658 | 3.760323537 | 4.461937174 | 5.137123746 | 5.48475407 | 5.649807973 | 5.734594901 | 5.944452876 | **5.963799699** | 5.900905821 | 5.87755102
epsilon200000(1%) | 1 | 0.203733915 | 0.311866583 | 0.511365494 | 0.916994087 | 1.564790701 | 2.38633706 | 3.111006795 | 3.647449241 | 3.933235398 | 4.084439834 | 4.157916315 | **4.170454829**

### Does this PR introduce _any_ user-facing change?
yes, param `blockSize` -> `blockSizeInMB` in master

### How was this patch tested?
added testsuites and performance test (result attached in [ticket](https://issues.apache.org/jira/browse/SPARK-32907))

Closes #30009 from zhengruifeng/adaptively_blockify_linear_svc_II.

Lead-authored-by: zhengruifeng <ruifengz@foxmail.com>
Co-authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
2020-11-12 19:14:07 +08:00

924 lines
32 KiB
Python

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any, List, Optional
from pyspark.ml._typing import JM, M, P, T, ParamMap
import abc
from abc import abstractmethod
from pyspark.ml import Estimator, Model, PredictionModel, Predictor, Transformer
from pyspark.ml.base import _PredictorParams
from pyspark.ml.param.shared import (
HasAggregationDepth,
HasBlockSize,
HasMaxBlockSizeInMB,
HasElasticNetParam,
HasFitIntercept,
HasMaxIter,
HasParallelism,
HasProbabilityCol,
HasRawPredictionCol,
HasRegParam,
HasSeed,
HasSolver,
HasStandardization,
HasStepSize,
HasThreshold,
HasThresholds,
HasTol,
HasWeightCol,
)
from pyspark.ml.regression import _FactorizationMachinesParams
from pyspark.ml.tree import (
_DecisionTreeModel,
_DecisionTreeParams,
_GBTParams,
_HasVarianceImpurity,
_RandomForestParams,
_TreeClassifierParams,
_TreeEnsembleModel,
)
from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable
from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaWrapper
from pyspark.ml.linalg import Matrix, Vector
from pyspark.ml.param import Param
from pyspark.ml.regression import DecisionTreeRegressionModel
from pyspark.sql.dataframe import DataFrame
class _ClassifierParams(HasRawPredictionCol, _PredictorParams): ...
class Classifier(Predictor, _ClassifierParams, metaclass=abc.ABCMeta):
def setRawPredictionCol(self: P, value: str) -> P: ...
class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=abc.ABCMeta):
def setRawPredictionCol(self: P, value: str) -> P: ...
@property
@abc.abstractmethod
def numClasses(self) -> int: ...
@abstractmethod
def predictRaw(self, value: Vector) -> Vector: ...
class _ProbabilisticClassifierParams(
HasProbabilityCol, HasThresholds, _ClassifierParams
): ...
class ProbabilisticClassifier(
Classifier, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta
):
def setProbabilityCol(self: P, value: str) -> P: ...
def setThresholds(self: P, value: List[float]) -> P: ...
class ProbabilisticClassificationModel(
ClassificationModel, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta
):
def setProbabilityCol(self: M, value: str) -> M: ...
def setThresholds(self: M, value: List[float]) -> M: ...
@abstractmethod
def predictProbability(self, value: Vector) -> Vector: ...
class _JavaClassifier(Classifier, JavaPredictor[JM], metaclass=abc.ABCMeta):
def setRawPredictionCol(self: P, value: str) -> P: ...
class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]):
@property
def numClasses(self) -> int: ...
def predictRaw(self, value: Vector) -> Vector: ...
class _JavaProbabilisticClassifier(
ProbabilisticClassifier, _JavaClassifier[JM], metaclass=abc.ABCMeta
): ...
class _JavaProbabilisticClassificationModel(
ProbabilisticClassificationModel, _JavaClassificationModel[T]
):
def predictProbability(self, value: Any): ...
class _ClassificationSummary(JavaWrapper):
@property
def predictions(self) -> DataFrame: ...
@property
def predictionCol(self) -> str: ...
@property
def labelCol(self) -> str: ...
@property
def weightCol(self) -> str: ...
@property
def labels(self) -> List[str]: ...
@property
def truePositiveRateByLabel(self) -> List[float]: ...
@property
def falsePositiveRateByLabel(self) -> List[float]: ...
@property
def precisionByLabel(self) -> List[float]: ...
@property
def recallByLabel(self) -> List[float]: ...
def fMeasureByLabel(self, beta: float = ...) -> List[float]: ...
@property
def accuracy(self) -> float: ...
@property
def weightedTruePositiveRate(self) -> float: ...
@property
def weightedFalsePositiveRate(self) -> float: ...
@property
def weightedRecall(self) -> float: ...
@property
def weightedPrecision(self) -> float: ...
def weightedFMeasure(self, beta: float = ...) -> float: ...
class _TrainingSummary(JavaWrapper):
@property
def objectiveHistory(self) -> List[float]: ...
@property
def totalIterations(self) -> int: ...
class _BinaryClassificationSummary(_ClassificationSummary):
@property
def scoreCol(self) -> str: ...
@property
def roc(self) -> DataFrame: ...
@property
def areaUnderROC(self) -> float: ...
@property
def pr(self) -> DataFrame: ...
@property
def fMeasureByThreshold(self) -> DataFrame: ...
@property
def precisionByThreshold(self) -> DataFrame: ...
@property
def recallByThreshold(self) -> DataFrame: ...
class _LinearSVCParams(
_ClassifierParams,
HasRegParam,
HasMaxIter,
HasFitIntercept,
HasTol,
HasStandardization,
HasWeightCol,
HasAggregationDepth,
HasThreshold,
HasMaxBlockSizeInMB,
):
threshold: Param[float]
def __init__(self, *args: Any) -> None: ...
class LinearSVC(
_JavaClassifier[LinearSVCModel],
_LinearSVCParams,
JavaMLWritable,
JavaMLReadable[LinearSVC],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxIter: int = ...,
regParam: float = ...,
tol: float = ...,
rawPredictionCol: str = ...,
fitIntercept: bool = ...,
standardization: bool = ...,
threshold: float = ...,
weightCol: Optional[str] = ...,
aggregationDepth: int = ...,
maxBlockSizeInMB: float = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxIter: int = ...,
regParam: float = ...,
tol: float = ...,
rawPredictionCol: str = ...,
fitIntercept: bool = ...,
standardization: bool = ...,
threshold: float = ...,
weightCol: Optional[str] = ...,
aggregationDepth: int = ...,
maxBlockSizeInMB: float = ...
) -> LinearSVC: ...
def setMaxIter(self, value: int) -> LinearSVC: ...
def setRegParam(self, value: float) -> LinearSVC: ...
def setTol(self, value: float) -> LinearSVC: ...
def setFitIntercept(self, value: bool) -> LinearSVC: ...
def setStandardization(self, value: bool) -> LinearSVC: ...
def setThreshold(self, value: float) -> LinearSVC: ...
def setWeightCol(self, value: str) -> LinearSVC: ...
def setAggregationDepth(self, value: int) -> LinearSVC: ...
def setMaxBlockSizeInMB(self, value: float) -> LinearSVC: ...
class LinearSVCModel(
_JavaClassificationModel[Vector],
_LinearSVCParams,
JavaMLWritable,
JavaMLReadable[LinearSVCModel],
HasTrainingSummary[LinearSVCTrainingSummary],
):
def setThreshold(self, value: float) -> LinearSVCModel: ...
@property
def coefficients(self) -> Vector: ...
@property
def intercept(self) -> float: ...
def summary(self) -> LinearSVCTrainingSummary: ...
def evaluate(self, dataset: DataFrame) -> LinearSVCSummary: ...
class LinearSVCSummary(_BinaryClassificationSummary): ...
class LinearSVCTrainingSummary(LinearSVCSummary, _TrainingSummary): ...
class _LogisticRegressionParams(
_ProbabilisticClassifierParams,
HasRegParam,
HasElasticNetParam,
HasMaxIter,
HasFitIntercept,
HasTol,
HasStandardization,
HasWeightCol,
HasAggregationDepth,
HasThreshold,
HasBlockSize,
):
threshold: Param[float]
family: Param[str]
lowerBoundsOnCoefficients: Param[Matrix]
upperBoundsOnCoefficients: Param[Matrix]
lowerBoundsOnIntercepts: Param[Vector]
upperBoundsOnIntercepts: Param[Vector]
def __init__(self, *args: Any): ...
def setThreshold(self: P, value: float) -> P: ...
def getThreshold(self) -> float: ...
def setThresholds(self: P, value: List[float]) -> P: ...
def getThresholds(self) -> List[float]: ...
def getFamily(self) -> str: ...
def getLowerBoundsOnCoefficients(self) -> Matrix: ...
def getUpperBoundsOnCoefficients(self) -> Matrix: ...
def getLowerBoundsOnIntercepts(self) -> Vector: ...
def getUpperBoundsOnIntercepts(self) -> Vector: ...
class LogisticRegression(
_JavaProbabilisticClassifier[LogisticRegressionModel],
_LogisticRegressionParams,
JavaMLWritable,
JavaMLReadable[LogisticRegression],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxIter: int = ...,
regParam: float = ...,
elasticNetParam: float = ...,
tol: float = ...,
fitIntercept: bool = ...,
threshold: float = ...,
thresholds: Optional[List[float]] = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
standardization: bool = ...,
weightCol: Optional[str] = ...,
aggregationDepth: int = ...,
family: str = ...,
lowerBoundsOnCoefficients: Optional[Matrix] = ...,
upperBoundsOnCoefficients: Optional[Matrix] = ...,
lowerBoundsOnIntercepts: Optional[Vector] = ...,
upperBoundsOnIntercepts: Optional[Vector] = ...,
blockSize: int = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxIter: int = ...,
regParam: float = ...,
elasticNetParam: float = ...,
tol: float = ...,
fitIntercept: bool = ...,
threshold: float = ...,
thresholds: Optional[List[float]] = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
standardization: bool = ...,
weightCol: Optional[str] = ...,
aggregationDepth: int = ...,
family: str = ...,
lowerBoundsOnCoefficients: Optional[Matrix] = ...,
upperBoundsOnCoefficients: Optional[Matrix] = ...,
lowerBoundsOnIntercepts: Optional[Vector] = ...,
upperBoundsOnIntercepts: Optional[Vector] = ...,
blockSize: int = ...
) -> LogisticRegression: ...
def setFamily(self, value: str) -> LogisticRegression: ...
def setLowerBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
def setUpperBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
def setLowerBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ...
def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ...
def setMaxIter(self, value: int) -> LogisticRegression: ...
def setRegParam(self, value: float) -> LogisticRegression: ...
def setTol(self, value: float) -> LogisticRegression: ...
def setElasticNetParam(self, value: float) -> LogisticRegression: ...
def setFitIntercept(self, value: bool) -> LogisticRegression: ...
def setStandardization(self, value: bool) -> LogisticRegression: ...
def setWeightCol(self, value: str) -> LogisticRegression: ...
def setAggregationDepth(self, value: int) -> LogisticRegression: ...
def setBlockSize(self, value: int) -> LogisticRegression: ...
class LogisticRegressionModel(
_JavaProbabilisticClassificationModel[Vector],
_LogisticRegressionParams,
JavaMLWritable,
JavaMLReadable[LogisticRegressionModel],
HasTrainingSummary[LogisticRegressionTrainingSummary],
):
@property
def coefficients(self) -> Vector: ...
@property
def intercept(self) -> float: ...
@property
def coefficientMatrix(self) -> Matrix: ...
@property
def interceptVector(self) -> Vector: ...
@property
def summary(self) -> LogisticRegressionTrainingSummary: ...
def evaluate(self, dataset: DataFrame) -> LogisticRegressionSummary: ...
class LogisticRegressionSummary(_ClassificationSummary):
@property
def probabilityCol(self) -> str: ...
@property
def featuresCol(self) -> str: ...
class LogisticRegressionTrainingSummary(
LogisticRegressionSummary, _TrainingSummary
): ...
class BinaryLogisticRegressionSummary(
_BinaryClassificationSummary, LogisticRegressionSummary
): ...
class BinaryLogisticRegressionTrainingSummary(
BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary
): ...
class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams):
def __init__(self, *args: Any): ...
class DecisionTreeClassifier(
_JavaProbabilisticClassifier[DecisionTreeClassificationModel],
_DecisionTreeClassifierParams,
JavaMLWritable,
JavaMLReadable[DecisionTreeClassifier],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
maxDepth: int = ...,
maxBins: int = ...,
minInstancesPerNode: int = ...,
minInfoGain: float = ...,
maxMemoryInMB: int = ...,
cacheNodeIds: bool = ...,
checkpointInterval: int = ...,
impurity: str = ...,
seed: Optional[int] = ...,
weightCol: Optional[str] = ...,
leafCol: str = ...,
minWeightFractionPerNode: float = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
maxDepth: int = ...,
maxBins: int = ...,
minInstancesPerNode: int = ...,
minInfoGain: float = ...,
maxMemoryInMB: int = ...,
cacheNodeIds: bool = ...,
checkpointInterval: int = ...,
impurity: str = ...,
seed: Optional[int] = ...,
weightCol: Optional[str] = ...,
leafCol: str = ...,
minWeightFractionPerNode: float = ...
) -> DecisionTreeClassifier: ...
def setMaxDepth(self, value: int) -> DecisionTreeClassifier: ...
def setMaxBins(self, value: int) -> DecisionTreeClassifier: ...
def setMinInstancesPerNode(self, value: int) -> DecisionTreeClassifier: ...
def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeClassifier: ...
def setMinInfoGain(self, value: float) -> DecisionTreeClassifier: ...
def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ...
def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ...
def setImpurity(self, value: str) -> DecisionTreeClassifier: ...
def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ...
def setSeed(self, value: int) -> DecisionTreeClassifier: ...
def setWeightCol(self, value: str) -> DecisionTreeClassifier: ...
class DecisionTreeClassificationModel(
_DecisionTreeModel,
_JavaProbabilisticClassificationModel[Vector],
_DecisionTreeClassifierParams,
JavaMLWritable,
JavaMLReadable[DecisionTreeClassificationModel],
):
@property
def featureImportances(self) -> Vector: ...
class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams):
def __init__(self, *args: Any): ...
class RandomForestClassifier(
_JavaProbabilisticClassifier[RandomForestClassificationModel],
_RandomForestClassifierParams,
JavaMLWritable,
JavaMLReadable[RandomForestClassifier],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
maxDepth: int = ...,
maxBins: int = ...,
minInstancesPerNode: int = ...,
minInfoGain: float = ...,
maxMemoryInMB: int = ...,
cacheNodeIds: bool = ...,
checkpointInterval: int = ...,
impurity: str = ...,
numTrees: int = ...,
featureSubsetStrategy: str = ...,
seed: Optional[int] = ...,
subsamplingRate: float = ...,
leafCol: str = ...,
minWeightFractionPerNode: float = ...,
weightCol: Optional[str] = ...,
bootstrap: Optional[bool] = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
maxDepth: int = ...,
maxBins: int = ...,
minInstancesPerNode: int = ...,
minInfoGain: float = ...,
maxMemoryInMB: int = ...,
cacheNodeIds: bool = ...,
checkpointInterval: int = ...,
seed: Optional[int] = ...,
impurity: str = ...,
numTrees: int = ...,
featureSubsetStrategy: str = ...,
subsamplingRate: float = ...,
leafCol: str = ...,
minWeightFractionPerNode: float = ...,
weightCol: Optional[str] = ...,
bootstrap: Optional[bool] = ...
) -> RandomForestClassifier: ...
def setMaxDepth(self, value: int) -> RandomForestClassifier: ...
def setMaxBins(self, value: int) -> RandomForestClassifier: ...
def setMinInstancesPerNode(self, value: int) -> RandomForestClassifier: ...
def setMinInfoGain(self, value: float) -> RandomForestClassifier: ...
def setMaxMemoryInMB(self, value: int) -> RandomForestClassifier: ...
def setCacheNodeIds(self, value: bool) -> RandomForestClassifier: ...
def setImpurity(self, value: str) -> RandomForestClassifier: ...
def setNumTrees(self, value: int) -> RandomForestClassifier: ...
def setBootstrap(self, value: bool) -> RandomForestClassifier: ...
def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ...
def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ...
def setSeed(self, value: int) -> RandomForestClassifier: ...
def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ...
def setWeightCol(self, value: str) -> RandomForestClassifier: ...
def setMinWeightFractionPerNode(self, value: float) -> RandomForestClassifier: ...
class RandomForestClassificationModel(
_TreeEnsembleModel,
_JavaProbabilisticClassificationModel[Vector],
_RandomForestClassifierParams,
JavaMLWritable,
JavaMLReadable[RandomForestClassificationModel],
HasTrainingSummary[RandomForestClassificationTrainingSummary],
):
@property
def featureImportances(self) -> Vector: ...
@property
def trees(self) -> List[DecisionTreeClassificationModel]: ...
def summary(self) -> RandomForestClassificationTrainingSummary: ...
def evaluate(self, dataset) -> RandomForestClassificationSummary: ...
class RandomForestClassificationSummary(_ClassificationSummary): ...
class RandomForestClassificationTrainingSummary(
RandomForestClassificationSummary, _TrainingSummary
): ...
class BinaryRandomForestClassificationSummary(_BinaryClassificationSummary): ...
class BinaryRandomForestClassificationTrainingSummary(
BinaryRandomForestClassificationSummary, RandomForestClassificationTrainingSummary
): ...
class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity):
supportedLossTypes: List[str]
lossType: Param[str]
def __init__(self, *args: Any): ...
def getLossType(self) -> str: ...
class GBTClassifier(
_JavaProbabilisticClassifier[GBTClassificationModel],
_GBTClassifierParams,
JavaMLWritable,
JavaMLReadable[GBTClassifier],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxDepth: int = ...,
maxBins: int = ...,
minInstancesPerNode: int = ...,
minInfoGain: float = ...,
maxMemoryInMB: int = ...,
cacheNodeIds: bool = ...,
checkpointInterval: int = ...,
lossType: str = ...,
maxIter: int = ...,
stepSize: float = ...,
seed: Optional[int] = ...,
subsamplingRate: float = ...,
featureSubsetStrategy: str = ...,
validationTol: float = ...,
validationIndicatorCol: Optional[str] = ...,
leafCol: str = ...,
minWeightFractionPerNode: float = ...,
weightCol: Optional[str] = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxDepth: int = ...,
maxBins: int = ...,
minInstancesPerNode: int = ...,
minInfoGain: float = ...,
maxMemoryInMB: int = ...,
cacheNodeIds: bool = ...,
checkpointInterval: int = ...,
lossType: str = ...,
maxIter: int = ...,
stepSize: float = ...,
seed: Optional[int] = ...,
subsamplingRate: float = ...,
featureSubsetStrategy: str = ...,
validationTol: float = ...,
validationIndicatorCol: Optional[str] = ...,
leafCol: str = ...,
minWeightFractionPerNode: float = ...,
weightCol: Optional[str] = ...
) -> GBTClassifier: ...
def setMaxDepth(self, value: int) -> GBTClassifier: ...
def setMaxBins(self, value: int) -> GBTClassifier: ...
def setMinInstancesPerNode(self, value: int) -> GBTClassifier: ...
def setMinInfoGain(self, value: float) -> GBTClassifier: ...
def setMaxMemoryInMB(self, value: int) -> GBTClassifier: ...
def setCacheNodeIds(self, value: bool) -> GBTClassifier: ...
def setImpurity(self, value: str) -> GBTClassifier: ...
def setLossType(self, value: str) -> GBTClassifier: ...
def setSubsamplingRate(self, value: float) -> GBTClassifier: ...
def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ...
def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ...
def setMaxIter(self, value: int) -> GBTClassifier: ...
def setCheckpointInterval(self, value: int) -> GBTClassifier: ...
def setSeed(self, value: int) -> GBTClassifier: ...
def setStepSize(self, value: float) -> GBTClassifier: ...
def setWeightCol(self, value: str) -> GBTClassifier: ...
def setMinWeightFractionPerNode(self, value: float) -> GBTClassifier: ...
class GBTClassificationModel(
_TreeEnsembleModel,
_JavaProbabilisticClassificationModel[Vector],
_GBTClassifierParams,
JavaMLWritable,
JavaMLReadable[GBTClassificationModel],
):
@property
def featureImportances(self) -> Vector: ...
@property
def trees(self) -> List[DecisionTreeRegressionModel]: ...
def evaluateEachIteration(self, dataset: DataFrame) -> List[float]: ...
class _NaiveBayesParams(_PredictorParams, HasWeightCol):
smoothing: Param[float]
modelType: Param[str]
def __init__(self, *args: Any): ...
def getSmoothing(self) -> float: ...
def getModelType(self) -> str: ...
class NaiveBayes(
_JavaProbabilisticClassifier[NaiveBayesModel],
_NaiveBayesParams,
HasThresholds,
HasWeightCol,
JavaMLWritable,
JavaMLReadable[NaiveBayes],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
smoothing: float = ...,
modelType: str = ...,
thresholds: Optional[List[float]] = ...,
weightCol: Optional[str] = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
smoothing: float = ...,
modelType: str = ...,
thresholds: Optional[List[float]] = ...,
weightCol: Optional[str] = ...
) -> NaiveBayes: ...
def setSmoothing(self, value: float) -> NaiveBayes: ...
def setModelType(self, value: str) -> NaiveBayes: ...
def setWeightCol(self, value: str) -> NaiveBayes: ...
class NaiveBayesModel(
_JavaProbabilisticClassificationModel[Vector],
_NaiveBayesParams,
JavaMLWritable,
JavaMLReadable[NaiveBayesModel],
):
@property
def pi(self) -> Vector: ...
@property
def theta(self) -> Matrix: ...
@property
def sigma(self) -> Matrix: ...
class _MultilayerPerceptronParams(
_ProbabilisticClassifierParams,
HasSeed,
HasMaxIter,
HasTol,
HasStepSize,
HasSolver,
HasBlockSize,
):
layers: Param[List[int]]
solver: Param[str]
initialWeights: Param[Vector]
def __init__(self, *args: Any): ...
def getLayers(self) -> List[int]: ...
def getInitialWeights(self) -> Vector: ...
class MultilayerPerceptronClassifier(
_JavaProbabilisticClassifier[MultilayerPerceptronClassificationModel],
_MultilayerPerceptronParams,
JavaMLWritable,
JavaMLReadable[MultilayerPerceptronClassifier],
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxIter: int = ...,
tol: float = ...,
seed: Optional[int] = ...,
layers: Optional[List[int]] = ...,
blockSize: int = ...,
stepSize: float = ...,
solver: str = ...,
initialWeights: Optional[Vector] = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
maxIter: int = ...,
tol: float = ...,
seed: Optional[int] = ...,
layers: Optional[List[int]] = ...,
blockSize: int = ...,
stepSize: float = ...,
solver: str = ...,
initialWeights: Optional[Vector] = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...
) -> MultilayerPerceptronClassifier: ...
def setLayers(self, value: List[int]) -> MultilayerPerceptronClassifier: ...
def setBlockSize(self, value: int) -> MultilayerPerceptronClassifier: ...
def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ...
def setMaxIter(self, value: int) -> MultilayerPerceptronClassifier: ...
def setSeed(self, value: int) -> MultilayerPerceptronClassifier: ...
def setTol(self, value: float) -> MultilayerPerceptronClassifier: ...
def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ...
def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ...
class MultilayerPerceptronClassificationModel(
_JavaProbabilisticClassificationModel[Vector],
_MultilayerPerceptronParams,
JavaMLWritable,
JavaMLReadable[MultilayerPerceptronClassificationModel],
HasTrainingSummary[MultilayerPerceptronClassificationTrainingSummary],
):
@property
def weights(self) -> Vector: ...
def summary(self) -> MultilayerPerceptronClassificationTrainingSummary: ...
def evaluate(
self, dataset: DataFrame
) -> MultilayerPerceptronClassificationSummary: ...
class MultilayerPerceptronClassificationSummary(_ClassificationSummary): ...
class MultilayerPerceptronClassificationTrainingSummary(
MultilayerPerceptronClassificationSummary, _TrainingSummary
): ...
class _OneVsRestParams(_ClassifierParams, HasWeightCol):
classifier: Param[Estimator]
def getClassifier(self) -> Estimator[M]: ...
class OneVsRest(
Estimator[OneVsRestModel],
_OneVsRestParams,
HasParallelism,
JavaMLReadable[OneVsRest],
JavaMLWritable,
):
def __init__(
self,
*,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
rawPredictionCol: str = ...,
classifier: Optional[Estimator[M]] = ...,
weightCol: Optional[str] = ...,
parallelism: int = ...
) -> None: ...
def setParams(
self,
*,
featuresCol: Optional[str] = ...,
labelCol: Optional[str] = ...,
predictionCol: Optional[str] = ...,
rawPredictionCol: str = ...,
classifier: Optional[Estimator[M]] = ...,
weightCol: Optional[str] = ...,
parallelism: int = ...
) -> OneVsRest: ...
def setClassifier(self, value: Estimator[M]) -> OneVsRest: ...
def setLabelCol(self, value: str) -> OneVsRest: ...
def setFeaturesCol(self, value: str) -> OneVsRest: ...
def setPredictionCol(self, value: str) -> OneVsRest: ...
def setRawPredictionCol(self, value: str) -> OneVsRest: ...
def setWeightCol(self, value: str) -> OneVsRest: ...
def setParallelism(self, value: int) -> OneVsRest: ...
def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ...
class OneVsRestModel(
Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable
):
models: List[Transformer]
def __init__(self, models: List[Transformer]) -> None: ...
def setFeaturesCol(self, value: str) -> OneVsRestModel: ...
def setPredictionCol(self, value: str) -> OneVsRestModel: ...
def setRawPredictionCol(self, value: str) -> OneVsRestModel: ...
def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ...
class FMClassifier(
_JavaProbabilisticClassifier[FMClassificationModel],
_FactorizationMachinesParams,
JavaMLWritable,
JavaMLReadable[FMClassifier],
):
factorSize: Param[int]
fitLinear: Param[bool]
miniBatchFraction: Param[float]
initStd: Param[float]
solver: Param[str]
def __init__(
self,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
factorSize: int = ...,
fitIntercept: bool = ...,
fitLinear: bool = ...,
regParam: float = ...,
miniBatchFraction: float = ...,
initStd: float = ...,
maxIter: int = ...,
stepSize: float = ...,
tol: float = ...,
solver: str = ...,
thresholds: Optional[Any] = ...,
seed: Optional[Any] = ...,
) -> None: ...
def setParams(
self,
featuresCol: str = ...,
labelCol: str = ...,
predictionCol: str = ...,
probabilityCol: str = ...,
rawPredictionCol: str = ...,
factorSize: int = ...,
fitIntercept: bool = ...,
fitLinear: bool = ...,
regParam: float = ...,
miniBatchFraction: float = ...,
initStd: float = ...,
maxIter: int = ...,
stepSize: float = ...,
tol: float = ...,
solver: str = ...,
thresholds: Optional[Any] = ...,
seed: Optional[Any] = ...,
): ...
def setFactorSize(self, value: int) -> FMClassifier: ...
def setFitLinear(self, value: bool) -> FMClassifier: ...
def setMiniBatchFraction(self, value: float) -> FMClassifier: ...
def setInitStd(self, value: float) -> FMClassifier: ...
def setMaxIter(self, value: int) -> FMClassifier: ...
def setStepSize(self, value: float) -> FMClassifier: ...
def setTol(self, value: float) -> FMClassifier: ...
def setSolver(self, value: str) -> FMClassifier: ...
def setSeed(self, value: int) -> FMClassifier: ...
def setFitIntercept(self, value: bool) -> FMClassifier: ...
def setRegParam(self, value: float) -> FMClassifier: ...
class FMClassificationModel(
_JavaProbabilisticClassificationModel[Vector],
_FactorizationMachinesParams,
JavaMLWritable,
JavaMLReadable[FMClassificationModel],
):
@property
def intercept(self) -> float: ...
@property
def linear(self) -> Vector: ...
@property
def factors(self) -> Matrix: ...
def summary(self) -> FMClassificationTrainingSummary: ...
def evaluate(self, dataset: DataFrame) -> FMClassificationSummary: ...
class FMClassificationSummary(_BinaryClassificationSummary): ...
class FMClassificationTrainingSummary(FMClassificationSummary, _TrainingSummary): ...