2020-09-24 01:15:36 -04:00
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
# or more contributor license agreements. See the NOTICE file
|
|
|
|
# distributed with this work for additional information
|
|
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
|
|
# to you under the Apache License, Version 2.0 (the
|
|
|
|
# "License"); you may not use this file except in compliance
|
|
|
|
# with the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing,
|
|
|
|
# software distributed under the License is distributed on an
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
# KIND, either express or implied. See the License for the
|
|
|
|
# specific language governing permissions and limitations
|
|
|
|
# under the License.
|
|
|
|
|
|
|
|
from typing import overload
|
|
|
|
from typing import Any, Dict, Generic, List, Optional, Tuple
|
|
|
|
from pyspark.ml._typing import JM, P
|
|
|
|
|
|
|
|
from pyspark.ml.param.shared import (
|
|
|
|
HasFeaturesCol,
|
|
|
|
HasHandleInvalid,
|
|
|
|
HasInputCol,
|
|
|
|
HasInputCols,
|
|
|
|
HasLabelCol,
|
|
|
|
HasMaxIter,
|
|
|
|
HasNumFeatures,
|
|
|
|
HasOutputCol,
|
|
|
|
HasOutputCols,
|
|
|
|
HasRelativeError,
|
|
|
|
HasSeed,
|
|
|
|
HasStepSize,
|
|
|
|
HasThreshold,
|
|
|
|
HasThresholds,
|
|
|
|
)
|
|
|
|
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
|
|
|
|
from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer
|
|
|
|
from pyspark.ml.linalg import Vector, DenseVector, DenseMatrix
|
|
|
|
from pyspark.sql.dataframe import DataFrame
|
|
|
|
from pyspark.ml.param import Param
|
|
|
|
|
|
|
|
class Binarizer(
|
|
|
|
JavaTransformer,
|
|
|
|
HasThreshold,
|
|
|
|
HasThresholds,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCols,
|
|
|
|
JavaMLReadable[Binarizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
threshold: Param[float]
|
|
|
|
thresholds: Param[List[float]]
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
threshold: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
thresholds: Optional[List[float]] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
threshold: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> Binarizer: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
thresholds: Optional[List[float]] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> Binarizer: ...
|
|
|
|
def setThreshold(self, value: float) -> Binarizer: ...
|
|
|
|
def setThresholds(self, value: List[float]) -> Binarizer: ...
|
|
|
|
def setInputCol(self, value: str) -> Binarizer: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> Binarizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> Binarizer: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> Binarizer: ...
|
|
|
|
|
|
|
|
class _LSHParams(HasInputCol, HasOutputCol):
|
|
|
|
numHashTables: Param[int]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getNumHashTables(self) -> int: ...
|
|
|
|
|
|
|
|
class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable):
|
2020-11-24 19:27:04 -05:00
|
|
|
def setNumHashTables(self: P, value: int) -> P: ...
|
|
|
|
def setInputCol(self: P, value: str) -> P: ...
|
|
|
|
def setOutputCol(self: P, value: str) -> P: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
|
|
|
|
class _LSHModel(JavaModel, _LSHParams):
|
|
|
|
def setInputCol(self: P, value: str) -> P: ...
|
|
|
|
def setOutputCol(self: P, value: str) -> P: ...
|
|
|
|
def approxNearestNeighbors(
|
|
|
|
self,
|
|
|
|
dataset: DataFrame,
|
|
|
|
key: Vector,
|
|
|
|
numNearestNeighbors: int,
|
|
|
|
distCol: str = ...,
|
|
|
|
) -> DataFrame: ...
|
|
|
|
def approxSimilarityJoin(
|
|
|
|
self,
|
|
|
|
datasetA: DataFrame,
|
|
|
|
datasetB: DataFrame,
|
|
|
|
threshold: float,
|
|
|
|
distCol: str = ...,
|
|
|
|
) -> DataFrame: ...
|
|
|
|
|
|
|
|
class _BucketedRandomProjectionLSHParams:
|
|
|
|
bucketLength: Param[float]
|
|
|
|
def getBucketLength(self) -> float: ...
|
|
|
|
|
|
|
|
class BucketedRandomProjectionLSH(
|
|
|
|
_LSH[BucketedRandomProjectionLSHModel],
|
|
|
|
_LSHParams,
|
|
|
|
HasSeed,
|
|
|
|
JavaMLReadable[BucketedRandomProjectionLSH],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
seed: Optional[int] = ...,
|
|
|
|
numHashTables: int = ...,
|
|
|
|
bucketLength: Optional[float] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
seed: Optional[int] = ...,
|
|
|
|
numHashTables: int = ...,
|
|
|
|
bucketLength: Optional[float] = ...
|
|
|
|
) -> BucketedRandomProjectionLSH: ...
|
|
|
|
def setBucketLength(self, value: float) -> BucketedRandomProjectionLSH: ...
|
|
|
|
def setSeed(self, value: int) -> BucketedRandomProjectionLSH: ...
|
|
|
|
|
|
|
|
class BucketedRandomProjectionLSHModel(
|
|
|
|
_LSHModel,
|
|
|
|
_BucketedRandomProjectionLSHParams,
|
|
|
|
JavaMLReadable[BucketedRandomProjectionLSHModel],
|
|
|
|
JavaMLWritable,
|
|
|
|
): ...
|
|
|
|
|
|
|
|
class Bucketizer(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCols,
|
|
|
|
HasHandleInvalid,
|
|
|
|
JavaMLReadable[Bucketizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
splits: Param[List[float]]
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
splitsArray: Param[List[List[float]]]
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
splits: Optional[List[float]] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
splitsArray: Optional[List[List[float]]] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
splits: Optional[List[float]] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> Bucketizer: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
splitsArray: Optional[List[List[float]]] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> Bucketizer: ...
|
|
|
|
def setSplits(self, value: List[float]) -> Bucketizer: ...
|
|
|
|
def getSplits(self) -> List[float]: ...
|
|
|
|
def setSplitsArray(self, value: List[List[float]]) -> Bucketizer: ...
|
|
|
|
def getSplitsArray(self) -> List[List[float]]: ...
|
|
|
|
def setInputCol(self, value: str) -> Bucketizer: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> Bucketizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> Bucketizer: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> Bucketizer: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> Bucketizer: ...
|
|
|
|
|
|
|
|
class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol):
|
|
|
|
minTF: Param[float]
|
|
|
|
minDF: Param[float]
|
|
|
|
maxDF: Param[float]
|
|
|
|
vocabSize: Param[int]
|
|
|
|
binary: Param[bool]
|
|
|
|
def __init__(self, *args: Any) -> None: ...
|
|
|
|
def getMinTF(self) -> float: ...
|
|
|
|
def getMinDF(self) -> float: ...
|
|
|
|
def getMaxDF(self) -> float: ...
|
|
|
|
def getVocabSize(self) -> int: ...
|
|
|
|
def getBinary(self) -> bool: ...
|
|
|
|
|
|
|
|
class CountVectorizer(
|
|
|
|
JavaEstimator[CountVectorizerModel],
|
|
|
|
_CountVectorizerParams,
|
|
|
|
JavaMLReadable[CountVectorizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
minTF: float = ...,
|
|
|
|
minDF: float = ...,
|
|
|
|
maxDF: float = ...,
|
|
|
|
vocabSize: int = ...,
|
|
|
|
binary: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
minTF: float = ...,
|
|
|
|
minDF: float = ...,
|
|
|
|
maxDF: float = ...,
|
|
|
|
vocabSize: int = ...,
|
|
|
|
binary: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> CountVectorizer: ...
|
|
|
|
def setMinTF(self, value: float) -> CountVectorizer: ...
|
|
|
|
def setMinDF(self, value: float) -> CountVectorizer: ...
|
|
|
|
def setMaxDF(self, value: float) -> CountVectorizer: ...
|
|
|
|
def setVocabSize(self, value: int) -> CountVectorizer: ...
|
|
|
|
def setBinary(self, value: bool) -> CountVectorizer: ...
|
|
|
|
def setInputCol(self, value: str) -> CountVectorizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> CountVectorizer: ...
|
|
|
|
|
|
|
|
class CountVectorizerModel(
|
|
|
|
JavaModel, JavaMLReadable[CountVectorizerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> CountVectorizerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> CountVectorizerModel: ...
|
|
|
|
def setMinTF(self, value: float) -> CountVectorizerModel: ...
|
|
|
|
def setBinary(self, value: bool) -> CountVectorizerModel: ...
|
|
|
|
@classmethod
|
|
|
|
def from_vocabulary(
|
|
|
|
cls,
|
|
|
|
vocabulary: List[str],
|
|
|
|
inputCol: str,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
minTF: Optional[float] = ...,
|
|
|
|
binary: Optional[bool] = ...,
|
|
|
|
) -> CountVectorizerModel: ...
|
|
|
|
@property
|
|
|
|
def vocabulary(self) -> List[str]: ...
|
|
|
|
|
|
|
|
class DCT(
|
|
|
|
JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[DCT], JavaMLWritable
|
|
|
|
):
|
|
|
|
inverse: Param[bool]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inverse: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inverse: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> DCT: ...
|
|
|
|
def setInverse(self, value: bool) -> DCT: ...
|
|
|
|
def getInverse(self) -> bool: ...
|
|
|
|
def setInputCol(self, value: str) -> DCT: ...
|
|
|
|
def setOutputCol(self, value: str) -> DCT: ...
|
|
|
|
|
|
|
|
class ElementwiseProduct(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[ElementwiseProduct],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
scalingVec: Param[Vector]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
scalingVec: Optional[Vector] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
scalingVec: Optional[Vector] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> ElementwiseProduct: ...
|
|
|
|
def setScalingVec(self, value: Vector) -> ElementwiseProduct: ...
|
|
|
|
def getScalingVec(self) -> Vector: ...
|
|
|
|
def setInputCol(self, value: str) -> ElementwiseProduct: ...
|
|
|
|
def setOutputCol(self, value: str) -> ElementwiseProduct: ...
|
|
|
|
|
|
|
|
class FeatureHasher(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCol,
|
|
|
|
HasNumFeatures,
|
|
|
|
JavaMLReadable[FeatureHasher],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
categoricalCols: Param[List[str]]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numFeatures: int = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
categoricalCols: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numFeatures: int = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
categoricalCols: Optional[List[str]] = ...
|
|
|
|
) -> FeatureHasher: ...
|
|
|
|
def setCategoricalCols(self, value: List[str]) -> FeatureHasher: ...
|
|
|
|
def getCategoricalCols(self) -> List[str]: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> FeatureHasher: ...
|
|
|
|
def setOutputCol(self, value: str) -> FeatureHasher: ...
|
|
|
|
def setNumFeatures(self, value: int) -> FeatureHasher: ...
|
|
|
|
|
|
|
|
class HashingTF(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
HasNumFeatures,
|
|
|
|
JavaMLReadable[HashingTF],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
binary: Param[bool]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numFeatures: int = ...,
|
|
|
|
binary: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numFeatures: int = ...,
|
|
|
|
binary: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> HashingTF: ...
|
|
|
|
def setBinary(self, value: bool) -> HashingTF: ...
|
|
|
|
def getBinary(self) -> bool: ...
|
|
|
|
def setInputCol(self, value: str) -> HashingTF: ...
|
|
|
|
def setOutputCol(self, value: str) -> HashingTF: ...
|
|
|
|
def setNumFeatures(self, value: int) -> HashingTF: ...
|
|
|
|
def indexOf(self, term: Any) -> int: ...
|
|
|
|
|
|
|
|
class _IDFParams(HasInputCol, HasOutputCol):
|
|
|
|
minDocFreq: Param[int]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getMinDocFreq(self) -> int: ...
|
|
|
|
|
|
|
|
class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritable):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
minDocFreq: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
minDocFreq: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> IDF: ...
|
|
|
|
def setMinDocFreq(self, value: int) -> IDF: ...
|
|
|
|
def setInputCol(self, value: str) -> IDF: ...
|
|
|
|
def setOutputCol(self, value: str) -> IDF: ...
|
|
|
|
|
|
|
|
class IDFModel(JavaModel, _IDFParams, JavaMLReadable[IDFModel], JavaMLWritable):
|
|
|
|
def setInputCol(self, value: str) -> IDFModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> IDFModel: ...
|
|
|
|
@property
|
|
|
|
def idf(self) -> Vector: ...
|
|
|
|
@property
|
|
|
|
def docFreq(self) -> List[int]: ...
|
|
|
|
@property
|
|
|
|
def numDocs(self) -> int: ...
|
|
|
|
|
|
|
|
class _ImputerParams(
|
|
|
|
HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, HasRelativeError
|
|
|
|
):
|
|
|
|
strategy: Param[str]
|
|
|
|
missingValue: Param[float]
|
|
|
|
def getStrategy(self) -> str: ...
|
|
|
|
def getMissingValue(self) -> float: ...
|
|
|
|
|
|
|
|
class Imputer(
|
|
|
|
JavaEstimator[ImputerModel], _ImputerParams, JavaMLReadable[Imputer], JavaMLWritable
|
|
|
|
):
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
strategy: str = ...,
|
|
|
|
missingValue: float = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
relativeError: float = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
strategy: str = ...,
|
|
|
|
missingValue: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
relativeError: float = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
strategy: str = ...,
|
|
|
|
missingValue: float = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
relativeError: float = ...
|
|
|
|
) -> Imputer: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
strategy: str = ...,
|
|
|
|
missingValue: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCols: Optional[str] = ...,
|
|
|
|
relativeError: float = ...
|
|
|
|
) -> Imputer: ...
|
|
|
|
def setStrategy(self, value: str) -> Imputer: ...
|
|
|
|
def setMissingValue(self, value: float) -> Imputer: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> Imputer: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> Imputer: ...
|
|
|
|
def setInputCol(self, value: str) -> Imputer: ...
|
|
|
|
def setOutputCol(self, value: str) -> Imputer: ...
|
|
|
|
def setRelativeError(self, value: float) -> Imputer: ...
|
|
|
|
|
|
|
|
class ImputerModel(
|
|
|
|
JavaModel, _ImputerParams, JavaMLReadable[ImputerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCols(self, value: List[str]) -> ImputerModel: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> ImputerModel: ...
|
|
|
|
def setInputCol(self, value: str) -> ImputerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> ImputerModel: ...
|
|
|
|
@property
|
|
|
|
def surrogateDF(self) -> DataFrame: ...
|
|
|
|
|
|
|
|
class Interaction(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[Interaction],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ...
|
|
|
|
) -> Interaction: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> Interaction: ...
|
|
|
|
def setOutputCol(self, value: str) -> Interaction: ...
|
|
|
|
|
|
|
|
class _MaxAbsScalerParams(HasInputCol, HasOutputCol): ...
|
|
|
|
|
|
|
|
class MaxAbsScaler(
|
|
|
|
JavaEstimator[MaxAbsScalerModel],
|
|
|
|
_MaxAbsScalerParams,
|
|
|
|
JavaMLReadable[MaxAbsScaler],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
|
|
|
|
) -> MaxAbsScaler: ...
|
|
|
|
def setInputCol(self, value: str) -> MaxAbsScaler: ...
|
|
|
|
def setOutputCol(self, value: str) -> MaxAbsScaler: ...
|
|
|
|
|
|
|
|
class MaxAbsScalerModel(
|
|
|
|
JavaModel, _MaxAbsScalerParams, JavaMLReadable[MaxAbsScalerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> MaxAbsScalerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> MaxAbsScalerModel: ...
|
|
|
|
@property
|
|
|
|
def maxAbs(self) -> Vector: ...
|
|
|
|
|
|
|
|
class MinHashLSH(
|
|
|
|
_LSH[MinHashLSHModel],
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
HasSeed,
|
|
|
|
JavaMLReadable[MinHashLSH],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
seed: Optional[int] = ...,
|
|
|
|
numHashTables: int = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
seed: Optional[int] = ...,
|
|
|
|
numHashTables: int = ...
|
|
|
|
) -> MinHashLSH: ...
|
|
|
|
def setSeed(self, value: int) -> MinHashLSH: ...
|
|
|
|
|
|
|
|
class MinHashLSHModel(_LSHModel, JavaMLReadable[MinHashLSHModel], JavaMLWritable): ...
|
|
|
|
|
|
|
|
class _MinMaxScalerParams(HasInputCol, HasOutputCol):
|
|
|
|
min: Param[float]
|
|
|
|
max: Param[float]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getMin(self) -> float: ...
|
|
|
|
def getMax(self) -> float: ...
|
|
|
|
|
|
|
|
class MinMaxScaler(
|
|
|
|
JavaEstimator[MinMaxScalerModel],
|
|
|
|
_MinMaxScalerParams,
|
|
|
|
JavaMLReadable[MinMaxScaler],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
min: float = ...,
|
|
|
|
max: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
min: float = ...,
|
|
|
|
max: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> MinMaxScaler: ...
|
|
|
|
def setMin(self, value: float) -> MinMaxScaler: ...
|
|
|
|
def setMax(self, value: float) -> MinMaxScaler: ...
|
|
|
|
def setInputCol(self, value: str) -> MinMaxScaler: ...
|
|
|
|
def setOutputCol(self, value: str) -> MinMaxScaler: ...
|
|
|
|
|
|
|
|
class MinMaxScalerModel(
|
|
|
|
JavaModel, _MinMaxScalerParams, JavaMLReadable[MinMaxScalerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> MinMaxScalerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> MinMaxScalerModel: ...
|
|
|
|
def setMin(self, value: float) -> MinMaxScalerModel: ...
|
|
|
|
def setMax(self, value: float) -> MinMaxScalerModel: ...
|
|
|
|
@property
|
|
|
|
def originalMin(self) -> Vector: ...
|
|
|
|
@property
|
|
|
|
def originalMax(self) -> Vector: ...
|
|
|
|
|
|
|
|
class NGram(
|
|
|
|
JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[NGram], JavaMLWritable
|
|
|
|
):
|
|
|
|
n: Param[int]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
n: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
n: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> NGram: ...
|
|
|
|
def setN(self, value: int) -> NGram: ...
|
|
|
|
def getN(self) -> int: ...
|
|
|
|
def setInputCol(self, value: str) -> NGram: ...
|
|
|
|
def setOutputCol(self, value: str) -> NGram: ...
|
|
|
|
|
|
|
|
class Normalizer(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[Normalizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
p: Param[float]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
p: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
p: float = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> Normalizer: ...
|
|
|
|
def setP(self, value: float) -> Normalizer: ...
|
|
|
|
def getP(self) -> float: ...
|
|
|
|
def setInputCol(self, value: str) -> Normalizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> Normalizer: ...
|
|
|
|
|
|
|
|
class _OneHotEncoderParams(HasInputCols, HasOutputCols, HasHandleInvalid):
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
dropLast: Param[bool]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getDropLast(self) -> bool: ...
|
|
|
|
|
|
|
|
class OneHotEncoder(
|
|
|
|
JavaEstimator[OneHotEncoderModel],
|
|
|
|
_OneHotEncoderParams,
|
|
|
|
JavaMLReadable[OneHotEncoder],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
dropLast: bool = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
dropLast: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
dropLast: bool = ...
|
|
|
|
) -> OneHotEncoder: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
dropLast: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> OneHotEncoder: ...
|
|
|
|
def setDropLast(self, value: bool) -> OneHotEncoder: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> OneHotEncoder: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> OneHotEncoder: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> OneHotEncoder: ...
|
|
|
|
def setInputCol(self, value: str) -> OneHotEncoder: ...
|
|
|
|
def setOutputCol(self, value: str) -> OneHotEncoder: ...
|
|
|
|
|
|
|
|
class OneHotEncoderModel(
|
|
|
|
JavaModel, _OneHotEncoderParams, JavaMLReadable[OneHotEncoderModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setDropLast(self, value: bool) -> OneHotEncoderModel: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> OneHotEncoderModel: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> OneHotEncoderModel: ...
|
|
|
|
def setInputCol(self, value: str) -> OneHotEncoderModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> OneHotEncoderModel: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> OneHotEncoderModel: ...
|
|
|
|
@property
|
|
|
|
def categorySizes(self) -> List[int]: ...
|
|
|
|
|
|
|
|
class PolynomialExpansion(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[PolynomialExpansion],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
degree: Param[int]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
degree: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
degree: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> PolynomialExpansion: ...
|
|
|
|
def setDegree(self, value: int) -> PolynomialExpansion: ...
|
|
|
|
def getDegree(self) -> int: ...
|
|
|
|
def setInputCol(self, value: str) -> PolynomialExpansion: ...
|
|
|
|
def setOutputCol(self, value: str) -> PolynomialExpansion: ...
|
|
|
|
|
|
|
|
class QuantileDiscretizer(
|
|
|
|
JavaEstimator[Bucketizer],
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCols,
|
|
|
|
HasHandleInvalid,
|
|
|
|
HasRelativeError,
|
|
|
|
JavaMLReadable[QuantileDiscretizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
numBuckets: Param[int]
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
numBucketsArray: Param[List[int]]
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numBuckets: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
relativeError: float = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
relativeError: float = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
numBucketsArray: Optional[List[int]] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numBuckets: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
relativeError: float = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> QuantileDiscretizer: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
relativeError: float = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
numBucketsArray: Optional[List[int]] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> QuantileDiscretizer: ...
|
|
|
|
def setNumBuckets(self, value: int) -> QuantileDiscretizer: ...
|
|
|
|
def getNumBuckets(self) -> int: ...
|
|
|
|
def setNumBucketsArray(self, value: List[int]) -> QuantileDiscretizer: ...
|
|
|
|
def getNumBucketsArray(self) -> List[int]: ...
|
|
|
|
def setRelativeError(self, value: float) -> QuantileDiscretizer: ...
|
|
|
|
def setInputCol(self, value: str) -> QuantileDiscretizer: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> QuantileDiscretizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> QuantileDiscretizer: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> QuantileDiscretizer: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> QuantileDiscretizer: ...
|
|
|
|
|
|
|
|
class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError):
|
|
|
|
lower: Param[float]
|
|
|
|
upper: Param[float]
|
|
|
|
withCentering: Param[bool]
|
|
|
|
withScaling: Param[bool]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getLower(self) -> float: ...
|
|
|
|
def getUpper(self) -> float: ...
|
|
|
|
def getWithCentering(self) -> bool: ...
|
|
|
|
def getWithScaling(self) -> bool: ...
|
|
|
|
|
|
|
|
class RobustScaler(
|
|
|
|
JavaEstimator, _RobustScalerParams, JavaMLReadable[RobustScaler], JavaMLWritable
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
lower: float = ...,
|
|
|
|
upper: float = ...,
|
|
|
|
withCentering: bool = ...,
|
|
|
|
withScaling: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
relativeError: float = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
lower: float = ...,
|
|
|
|
upper: float = ...,
|
|
|
|
withCentering: bool = ...,
|
|
|
|
withScaling: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
relativeError: float = ...
|
|
|
|
) -> RobustScaler: ...
|
|
|
|
def setLower(self, value: float) -> RobustScaler: ...
|
|
|
|
def setUpper(self, value: float) -> RobustScaler: ...
|
|
|
|
def setWithCentering(self, value: bool) -> RobustScaler: ...
|
|
|
|
def setWithScaling(self, value: bool) -> RobustScaler: ...
|
|
|
|
def setInputCol(self, value: str) -> RobustScaler: ...
|
|
|
|
def setOutputCol(self, value: str) -> RobustScaler: ...
|
|
|
|
def setRelativeError(self, value: float) -> RobustScaler: ...
|
|
|
|
|
|
|
|
class RobustScalerModel(
|
|
|
|
JavaModel, _RobustScalerParams, JavaMLReadable[RobustScalerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> RobustScalerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> RobustScalerModel: ...
|
|
|
|
@property
|
|
|
|
def median(self) -> Vector: ...
|
|
|
|
@property
|
|
|
|
def range(self) -> Vector: ...
|
|
|
|
|
|
|
|
class RegexTokenizer(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[RegexTokenizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
minTokenLength: Param[int]
|
|
|
|
gaps: Param[bool]
|
|
|
|
pattern: Param[str]
|
|
|
|
toLowercase: Param[bool]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
minTokenLength: int = ...,
|
|
|
|
gaps: bool = ...,
|
|
|
|
pattern: str = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
toLowercase: bool = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
minTokenLength: int = ...,
|
|
|
|
gaps: bool = ...,
|
|
|
|
pattern: str = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
toLowercase: bool = ...
|
|
|
|
) -> RegexTokenizer: ...
|
|
|
|
def setMinTokenLength(self, value: int) -> RegexTokenizer: ...
|
|
|
|
def getMinTokenLength(self) -> int: ...
|
|
|
|
def setGaps(self, value: bool) -> RegexTokenizer: ...
|
|
|
|
def getGaps(self) -> bool: ...
|
|
|
|
def setPattern(self, value: str) -> RegexTokenizer: ...
|
|
|
|
def getPattern(self) -> str: ...
|
|
|
|
def setToLowercase(self, value: bool) -> RegexTokenizer: ...
|
|
|
|
def getToLowercase(self) -> bool: ...
|
|
|
|
def setInputCol(self, value: str) -> RegexTokenizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> RegexTokenizer: ...
|
|
|
|
|
|
|
|
class SQLTransformer(JavaTransformer, JavaMLReadable[SQLTransformer], JavaMLWritable):
|
|
|
|
statement: Param[str]
|
|
|
|
def __init__(self, *, statement: Optional[str] = ...) -> None: ...
|
|
|
|
def setParams(self, *, statement: Optional[str] = ...) -> SQLTransformer: ...
|
|
|
|
def setStatement(self, value: str) -> SQLTransformer: ...
|
|
|
|
def getStatement(self) -> str: ...
|
|
|
|
|
|
|
|
class _StandardScalerParams(HasInputCol, HasOutputCol):
|
|
|
|
withMean: Param[bool]
|
|
|
|
withStd: Param[bool]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getWithMean(self) -> bool: ...
|
|
|
|
def getWithStd(self) -> bool: ...
|
|
|
|
|
|
|
|
class StandardScaler(
|
|
|
|
JavaEstimator[StandardScalerModel],
|
|
|
|
_StandardScalerParams,
|
|
|
|
JavaMLReadable[StandardScaler],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
withMean: bool = ...,
|
|
|
|
withStd: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
withMean: bool = ...,
|
|
|
|
withStd: bool = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> StandardScaler: ...
|
|
|
|
def setWithMean(self, value: bool) -> StandardScaler: ...
|
|
|
|
def setWithStd(self, value: bool) -> StandardScaler: ...
|
|
|
|
def setInputCol(self, value: str) -> StandardScaler: ...
|
|
|
|
def setOutputCol(self, value: str) -> StandardScaler: ...
|
|
|
|
|
|
|
|
class StandardScalerModel(
|
|
|
|
JavaModel,
|
|
|
|
_StandardScalerParams,
|
|
|
|
JavaMLReadable[StandardScalerModel],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> StandardScalerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> StandardScalerModel: ...
|
|
|
|
@property
|
|
|
|
def std(self) -> Vector: ...
|
|
|
|
@property
|
|
|
|
def mean(self) -> Vector: ...
|
|
|
|
|
|
|
|
class _StringIndexerParams(
|
|
|
|
JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols
|
|
|
|
):
|
|
|
|
stringOrderType: Param[str]
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
def __init__(self, *args: Any) -> None: ...
|
|
|
|
def getStringOrderType(self) -> str: ...
|
|
|
|
|
|
|
|
class StringIndexer(
|
|
|
|
JavaEstimator[StringIndexerModel],
|
|
|
|
_StringIndexerParams,
|
|
|
|
JavaMLReadable[StringIndexer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
stringOrderType: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
stringOrderType: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
stringOrderType: str = ...
|
|
|
|
) -> StringIndexer: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
handleInvalid: str = ...,
|
|
|
|
stringOrderType: str = ...
|
|
|
|
) -> StringIndexer: ...
|
|
|
|
def setStringOrderType(self, value: str) -> StringIndexer: ...
|
|
|
|
def setInputCol(self, value: str) -> StringIndexer: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> StringIndexer: ...
|
|
|
|
def setOutputCol(self, value: str) -> StringIndexer: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> StringIndexer: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> StringIndexer: ...
|
|
|
|
|
|
|
|
class StringIndexerModel(
|
|
|
|
JavaModel, _StringIndexerParams, JavaMLReadable[StringIndexerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> StringIndexerModel: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> StringIndexerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> StringIndexerModel: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> StringIndexerModel: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> StringIndexerModel: ...
|
|
|
|
@classmethod
|
|
|
|
def from_labels(
|
|
|
|
cls,
|
|
|
|
labels: List[str],
|
|
|
|
inputCol: str,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: Optional[str] = ...,
|
|
|
|
) -> StringIndexerModel: ...
|
|
|
|
@classmethod
|
|
|
|
def from_arrays_of_labels(
|
|
|
|
cls,
|
|
|
|
arrayOfLabels: List[List[str]],
|
|
|
|
inputCols: List[str],
|
|
|
|
outputCols: Optional[List[str]] = ...,
|
|
|
|
handleInvalid: Optional[str] = ...,
|
|
|
|
) -> StringIndexerModel: ...
|
|
|
|
@property
|
|
|
|
def labels(self) -> List[str]: ...
|
|
|
|
|
|
|
|
class IndexToString(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[IndexToString],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
labels: Param[List[str]]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
labels: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
labels: Optional[List[str]] = ...
|
|
|
|
) -> IndexToString: ...
|
|
|
|
def setLabels(self, value: List[str]) -> IndexToString: ...
|
|
|
|
def getLabels(self) -> List[str]: ...
|
|
|
|
def setInputCol(self, value: str) -> IndexToString: ...
|
|
|
|
def setOutputCol(self, value: str) -> IndexToString: ...
|
|
|
|
|
|
|
|
class StopWordsRemover(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCols,
|
|
|
|
JavaMLReadable[StopWordsRemover],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
stopWords: Param[List[str]]
|
|
|
|
caseSensitive: Param[bool]
|
|
|
|
locale: Param[str]
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
stopWords: Optional[List[str]] = ...,
|
|
|
|
caseSensitive: bool = ...,
|
|
|
|
locale: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
stopWords: Optional[List[str]] = ...,
|
|
|
|
caseSensitive: bool = ...,
|
|
|
|
locale: Optional[str] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
stopWords: Optional[List[str]] = ...,
|
|
|
|
caseSensitive: bool = ...,
|
|
|
|
locale: Optional[str] = ...
|
|
|
|
) -> StopWordsRemover: ...
|
|
|
|
@overload
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
stopWords: Optional[List[str]] = ...,
|
|
|
|
caseSensitive: bool = ...,
|
|
|
|
locale: Optional[str] = ...,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCols: Optional[List[str]] = ...
|
|
|
|
) -> StopWordsRemover: ...
|
|
|
|
def setStopWords(self, value: List[str]) -> StopWordsRemover: ...
|
|
|
|
def getStopWords(self) -> List[str]: ...
|
|
|
|
def setCaseSensitive(self, value: bool) -> StopWordsRemover: ...
|
|
|
|
def getCaseSensitive(self) -> bool: ...
|
|
|
|
def setLocale(self, value: str) -> StopWordsRemover: ...
|
|
|
|
def getLocale(self) -> str: ...
|
|
|
|
def setInputCol(self, value: str) -> StopWordsRemover: ...
|
|
|
|
def setOutputCol(self, value: str) -> StopWordsRemover: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> StopWordsRemover: ...
|
|
|
|
def setOutputCols(self, value: List[str]) -> StopWordsRemover: ...
|
|
|
|
@staticmethod
|
|
|
|
def loadDefaultStopWords(language: str) -> List[str]: ...
|
|
|
|
|
|
|
|
class Tokenizer(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[Tokenizer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
|
|
|
|
) -> Tokenizer: ...
|
|
|
|
def setInputCol(self, value: str) -> Tokenizer: ...
|
|
|
|
def setOutputCol(self, value: str) -> Tokenizer: ...
|
|
|
|
|
|
|
|
class VectorAssembler(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCols,
|
|
|
|
HasOutputCol,
|
|
|
|
HasHandleInvalid,
|
|
|
|
JavaMLReadable[VectorAssembler],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCols: Optional[List[str]] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> VectorAssembler: ...
|
|
|
|
def setInputCols(self, value: List[str]) -> VectorAssembler: ...
|
|
|
|
def setOutputCol(self, value: str) -> VectorAssembler: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> VectorAssembler: ...
|
|
|
|
|
|
|
|
class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid):
|
|
|
|
maxCategories: Param[int]
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getMaxCategories(self) -> int: ...
|
|
|
|
|
|
|
|
class VectorIndexer(
|
|
|
|
JavaEstimator[VectorIndexerModel],
|
|
|
|
_VectorIndexerParams,
|
|
|
|
HasHandleInvalid,
|
|
|
|
JavaMLReadable[VectorIndexer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
maxCategories: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
maxCategories: int = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> VectorIndexer: ...
|
|
|
|
def setMaxCategories(self, value: int) -> VectorIndexer: ...
|
|
|
|
def setInputCol(self, value: str) -> VectorIndexer: ...
|
|
|
|
def setOutputCol(self, value: str) -> VectorIndexer: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> VectorIndexer: ...
|
|
|
|
|
|
|
|
class VectorIndexerModel(
|
|
|
|
JavaModel, _VectorIndexerParams, JavaMLReadable[VectorIndexerModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setInputCol(self, value: str) -> VectorIndexerModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> VectorIndexerModel: ...
|
|
|
|
@property
|
|
|
|
def numFeatures(self) -> int: ...
|
|
|
|
@property
|
|
|
|
def categoryMaps(self) -> Dict[int, Tuple[float, int]]: ...
|
|
|
|
|
|
|
|
class VectorSlicer(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasOutputCol,
|
|
|
|
JavaMLReadable[VectorSlicer],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
indices: Param[List[int]]
|
|
|
|
names: Param[List[str]]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
indices: Optional[List[int]] = ...,
|
|
|
|
names: Optional[List[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
indices: Optional[List[int]] = ...,
|
|
|
|
names: Optional[List[str]] = ...
|
|
|
|
) -> VectorSlicer: ...
|
|
|
|
def setIndices(self, value: List[int]) -> VectorSlicer: ...
|
|
|
|
def getIndices(self) -> List[int]: ...
|
|
|
|
def setNames(self, value: List[str]) -> VectorSlicer: ...
|
|
|
|
def getNames(self) -> List[str]: ...
|
|
|
|
def setInputCol(self, value: str) -> VectorSlicer: ...
|
|
|
|
def setOutputCol(self, value: str) -> VectorSlicer: ...
|
|
|
|
|
|
|
|
class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
|
|
|
|
vectorSize: Param[int]
|
|
|
|
numPartitions: Param[int]
|
|
|
|
minCount: Param[int]
|
|
|
|
windowSize: Param[int]
|
|
|
|
maxSentenceLength: Param[int]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getVectorSize(self) -> int: ...
|
|
|
|
def getNumPartitions(self) -> int: ...
|
|
|
|
def getMinCount(self) -> int: ...
|
|
|
|
def getWindowSize(self) -> int: ...
|
|
|
|
def getMaxSentenceLength(self) -> int: ...
|
|
|
|
|
|
|
|
class Word2Vec(
|
|
|
|
JavaEstimator[Word2VecModel],
|
|
|
|
_Word2VecParams,
|
|
|
|
JavaMLReadable[Word2Vec],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
vectorSize: int = ...,
|
|
|
|
minCount: int = ...,
|
|
|
|
numPartitions: int = ...,
|
|
|
|
stepSize: float = ...,
|
|
|
|
maxIter: int = ...,
|
|
|
|
seed: Optional[int] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
windowSize: int = ...,
|
|
|
|
maxSentenceLength: int = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
vectorSize: int = ...,
|
|
|
|
minCount: int = ...,
|
|
|
|
numPartitions: int = ...,
|
|
|
|
stepSize: float = ...,
|
|
|
|
maxIter: int = ...,
|
|
|
|
seed: Optional[int] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
windowSize: int = ...,
|
|
|
|
maxSentenceLength: int = ...
|
|
|
|
) -> Word2Vec: ...
|
|
|
|
def setVectorSize(self, value: int) -> Word2Vec: ...
|
|
|
|
def setNumPartitions(self, value: int) -> Word2Vec: ...
|
|
|
|
def setMinCount(self, value: int) -> Word2Vec: ...
|
|
|
|
def setWindowSize(self, value: int) -> Word2Vec: ...
|
|
|
|
def setMaxSentenceLength(self, value: int) -> Word2Vec: ...
|
|
|
|
def setMaxIter(self, value: int) -> Word2Vec: ...
|
|
|
|
def setInputCol(self, value: str) -> Word2Vec: ...
|
|
|
|
def setOutputCol(self, value: str) -> Word2Vec: ...
|
|
|
|
def setSeed(self, value: int) -> Word2Vec: ...
|
|
|
|
def setStepSize(self, value: float) -> Word2Vec: ...
|
|
|
|
|
|
|
|
class Word2VecModel(
|
|
|
|
JavaModel, _Word2VecParams, JavaMLReadable[Word2VecModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def getVectors(self) -> DataFrame: ...
|
|
|
|
def setInputCol(self, value: str) -> Word2VecModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> Word2VecModel: ...
|
|
|
|
@overload
|
|
|
|
def findSynonyms(self, word: str, num: int) -> DataFrame: ...
|
|
|
|
@overload
|
|
|
|
def findSynonyms(self, word: Vector, num: int) -> DataFrame: ...
|
|
|
|
@overload
|
|
|
|
def findSynonymsArray(self, word: str, num: int) -> List[Tuple[str, float]]: ...
|
|
|
|
@overload
|
|
|
|
def findSynonymsArray(self, word: Vector, num: int) -> List[Tuple[str, float]]: ...
|
|
|
|
|
|
|
|
class _PCAParams(HasInputCol, HasOutputCol):
|
|
|
|
k: Param[int]
|
|
|
|
def getK(self) -> int: ...
|
|
|
|
|
|
|
|
class PCA(JavaEstimator[PCAModel], _PCAParams, JavaMLReadable[PCA], JavaMLWritable):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
k: Optional[int] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
k: Optional[int] = ...,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
outputCol: Optional[str] = ...
|
|
|
|
) -> PCA: ...
|
|
|
|
def setK(self, value: int) -> PCA: ...
|
|
|
|
def setInputCol(self, value: str) -> PCA: ...
|
|
|
|
def setOutputCol(self, value: str) -> PCA: ...
|
|
|
|
|
|
|
|
class PCAModel(JavaModel, _PCAParams, JavaMLReadable[PCAModel], JavaMLWritable):
|
|
|
|
def setInputCol(self, value: str) -> PCAModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> PCAModel: ...
|
|
|
|
@property
|
|
|
|
def pc(self) -> DenseMatrix: ...
|
|
|
|
@property
|
|
|
|
def explainedVariance(self) -> DenseVector: ...
|
|
|
|
|
|
|
|
class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid):
|
|
|
|
formula: Param[str]
|
|
|
|
forceIndexLabel: Param[bool]
|
|
|
|
stringIndexerOrderType: Param[str]
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getFormula(self) -> str: ...
|
|
|
|
def getForceIndexLabel(self) -> bool: ...
|
|
|
|
def getStringIndexerOrderType(self) -> str: ...
|
|
|
|
|
|
|
|
class RFormula(
|
|
|
|
JavaEstimator[RFormulaModel],
|
|
|
|
_RFormulaParams,
|
|
|
|
JavaMLReadable[RFormula],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
formula: Optional[str] = ...,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
labelCol: str = ...,
|
|
|
|
forceIndexLabel: bool = ...,
|
|
|
|
stringIndexerOrderType: str = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
formula: Optional[str] = ...,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
labelCol: str = ...,
|
|
|
|
forceIndexLabel: bool = ...,
|
|
|
|
stringIndexerOrderType: str = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> RFormula: ...
|
|
|
|
def setFormula(self, value: str) -> RFormula: ...
|
|
|
|
def setForceIndexLabel(self, value: bool) -> RFormula: ...
|
|
|
|
def setStringIndexerOrderType(self, value: str) -> RFormula: ...
|
|
|
|
def setFeaturesCol(self, value: str) -> RFormula: ...
|
|
|
|
def setLabelCol(self, value: str) -> RFormula: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> RFormula: ...
|
|
|
|
|
|
|
|
class RFormulaModel(
|
|
|
|
JavaModel, _RFormulaParams, JavaMLReadable[RFormulaModel], JavaMLWritable
|
|
|
|
): ...
|
|
|
|
|
|
|
|
class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol):
|
|
|
|
selectorType: Param[str]
|
|
|
|
numTopFeatures: Param[int]
|
|
|
|
percentile: Param[float]
|
|
|
|
fpr: Param[float]
|
|
|
|
fdr: Param[float]
|
|
|
|
fwe: Param[float]
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getSelectorType(self) -> str: ...
|
|
|
|
def getNumTopFeatures(self) -> int: ...
|
|
|
|
def getPercentile(self) -> float: ...
|
|
|
|
def getFpr(self) -> float: ...
|
|
|
|
def getFdr(self) -> float: ...
|
|
|
|
def getFwe(self) -> float: ...
|
|
|
|
|
|
|
|
class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable):
|
|
|
|
def setSelectorType(self: P, value: str) -> P: ...
|
|
|
|
def setNumTopFeatures(self: P, value: int) -> P: ...
|
|
|
|
def setPercentile(self: P, value: float) -> P: ...
|
|
|
|
def setFpr(self: P, value: float) -> P: ...
|
|
|
|
def setFdr(self: P, value: float) -> P: ...
|
|
|
|
def setFwe(self: P, value: float) -> P: ...
|
|
|
|
def setFeaturesCol(self: P, value: str) -> P: ...
|
|
|
|
def setOutputCol(self: P, value: str) -> P: ...
|
|
|
|
def setLabelCol(self: P, value: str) -> P: ...
|
|
|
|
|
|
|
|
class _SelectorModel(JavaModel, _SelectorParams):
|
|
|
|
def setFeaturesCol(self: P, value: str) -> P: ...
|
|
|
|
def setOutputCol(self: P, value: str) -> P: ...
|
|
|
|
@property
|
|
|
|
def selectedFeatures(self) -> List[int]: ...
|
|
|
|
|
|
|
|
class ChiSqSelector(
|
|
|
|
_Selector[ChiSqSelectorModel],
|
|
|
|
JavaMLReadable[ChiSqSelector],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numTopFeatures: int = ...,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
labelCol: str = ...,
|
|
|
|
selectorType: str = ...,
|
|
|
|
percentile: float = ...,
|
|
|
|
fpr: float = ...,
|
|
|
|
fdr: float = ...,
|
|
|
|
fwe: float = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
numTopFeatures: int = ...,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
labelCol: str = ...,
|
|
|
|
selectorType: str = ...,
|
|
|
|
percentile: float = ...,
|
|
|
|
fpr: float = ...,
|
|
|
|
fdr: float = ...,
|
|
|
|
fwe: float = ...
|
2020-11-24 19:27:04 -05:00
|
|
|
) -> ChiSqSelector: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def setSelectorType(self, value: str) -> ChiSqSelector: ...
|
|
|
|
def setNumTopFeatures(self, value: int) -> ChiSqSelector: ...
|
|
|
|
def setPercentile(self, value: float) -> ChiSqSelector: ...
|
|
|
|
def setFpr(self, value: float) -> ChiSqSelector: ...
|
|
|
|
def setFdr(self, value: float) -> ChiSqSelector: ...
|
|
|
|
def setFwe(self, value: float) -> ChiSqSelector: ...
|
|
|
|
def setFeaturesCol(self, value: str) -> ChiSqSelector: ...
|
|
|
|
def setOutputCol(self, value: str) -> ChiSqSelector: ...
|
|
|
|
def setLabelCol(self, value: str) -> ChiSqSelector: ...
|
|
|
|
|
|
|
|
class ChiSqSelectorModel(
|
|
|
|
_SelectorModel, JavaMLReadable[ChiSqSelectorModel], JavaMLWritable
|
|
|
|
):
|
|
|
|
def setFeaturesCol(self, value: str) -> ChiSqSelectorModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> ChiSqSelectorModel: ...
|
|
|
|
@property
|
|
|
|
def selectedFeatures(self) -> List[int]: ...
|
|
|
|
|
|
|
|
class VectorSizeHint(
|
|
|
|
JavaTransformer,
|
|
|
|
HasInputCol,
|
|
|
|
HasHandleInvalid,
|
|
|
|
JavaMLReadable[VectorSizeHint],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
size: Param[int]
|
|
|
|
handleInvalid: Param[str]
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
size: Optional[int] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
inputCol: Optional[str] = ...,
|
|
|
|
size: Optional[int] = ...,
|
|
|
|
handleInvalid: str = ...
|
|
|
|
) -> VectorSizeHint: ...
|
|
|
|
def setSize(self, value: int) -> VectorSizeHint: ...
|
|
|
|
def getSize(self) -> int: ...
|
|
|
|
def setInputCol(self, value: str) -> VectorSizeHint: ...
|
|
|
|
def setHandleInvalid(self, value: str) -> VectorSizeHint: ...
|
|
|
|
|
|
|
|
class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol):
|
|
|
|
varianceThreshold: Param[float] = ...
|
|
|
|
def getVarianceThreshold(self) -> float: ...
|
|
|
|
|
|
|
|
class VarianceThresholdSelector(
|
2020-11-24 19:27:04 -05:00
|
|
|
JavaEstimator[VarianceThresholdSelectorModel],
|
|
|
|
_VarianceThresholdSelectorParams,
|
|
|
|
JavaMLReadable[VarianceThresholdSelector],
|
|
|
|
JavaMLWritable,
|
2020-09-24 01:15:36 -04:00
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
varianceThreshold: float = ...,
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
varianceThreshold: float = ...,
|
2020-11-24 19:27:04 -05:00
|
|
|
) -> VarianceThresholdSelector: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ...
|
|
|
|
def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ...
|
|
|
|
def setOutputCol(self, value: str) -> VarianceThresholdSelector: ...
|
|
|
|
|
|
|
|
class VarianceThresholdSelectorModel(
|
2020-11-24 19:27:04 -05:00
|
|
|
JavaModel,
|
|
|
|
_VarianceThresholdSelectorParams,
|
|
|
|
JavaMLReadable[VarianceThresholdSelectorModel],
|
|
|
|
JavaMLWritable,
|
2020-09-24 01:15:36 -04:00
|
|
|
):
|
|
|
|
def setFeaturesCol(self, value: str) -> VarianceThresholdSelectorModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> VarianceThresholdSelectorModel: ...
|
|
|
|
@property
|
|
|
|
def selectedFeatures(self) -> List[int]: ...
|
[SPARK-34080][ML][PYTHON] Add UnivariateFeatureSelector
### What changes were proposed in this pull request?
Add UnivariateFeatureSelector
### Why are the changes needed?
Have one UnivariateFeatureSelector, so we don't need to have three Feature Selectors.
### Does this PR introduce _any_ user-facing change?
Yes
```
selector = UnivariateFeatureSelector(featureCols=["x", "y", "z"], labelCol=["target"], featureType="categorical", labelType="continuous", selectorType="numTopFeatures", numTopFeatures=100)
```
Or
numTopFeatures
```
selector = UnivariateFeatureSelector(featureCols=["x", "y", "z"], labelCol=["target"], scoreFunction="f_classif", selectorType="numTopFeatures", numTopFeatures=100)
```
### How was this patch tested?
Add Unit test
Closes #31160 from huaxingao/UnivariateSelector.
Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
2021-01-15 22:09:23 -05:00
|
|
|
|
|
|
|
class _UnivariateFeatureSelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol):
|
|
|
|
featureType: Param[str] = ...
|
|
|
|
labelType: Param[str] = ...
|
|
|
|
selectionMode: Param[str] = ...
|
|
|
|
selectionThreshold: Param[float] = ...
|
|
|
|
def __init__(self, *args: Any): ...
|
|
|
|
def getFeatureType(self) -> str: ...
|
|
|
|
def getLabelType(self) -> str: ...
|
|
|
|
def getSelectionMode(self) -> str: ...
|
|
|
|
def getSelectionThreshold(self) -> float: ...
|
|
|
|
|
|
|
|
class UnivariateFeatureSelector(
|
|
|
|
JavaEstimator[UnivariateFeatureSelectorModel],
|
|
|
|
_UnivariateFeatureSelectorParams,
|
|
|
|
JavaMLReadable[UnivariateFeatureSelector],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
labelCol: str = ...,
|
|
|
|
selectionMode: str = ...,
|
|
|
|
) -> None: ...
|
|
|
|
def setParams(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
featuresCol: str = ...,
|
|
|
|
outputCol: Optional[str] = ...,
|
|
|
|
labelCol: str = ...,
|
|
|
|
selectionMode: str = ...,
|
|
|
|
) -> UnivariateFeatureSelector: ...
|
|
|
|
def setFeatureType(self, value: str) -> UnivariateFeatureSelector: ...
|
|
|
|
def setLabelType(self, value: str) -> UnivariateFeatureSelector: ...
|
|
|
|
def setSelectionMode(self, value: str) -> UnivariateFeatureSelector: ...
|
|
|
|
def setSelectionThreshold(self, value: float) -> UnivariateFeatureSelector: ...
|
|
|
|
def setFeaturesCol(self, value: str) -> UnivariateFeatureSelector: ...
|
|
|
|
def setOutputCol(self, value: str) -> UnivariateFeatureSelector: ...
|
|
|
|
def setLabelCol(self, value: str) -> UnivariateFeatureSelector: ...
|
|
|
|
|
|
|
|
class UnivariateFeatureSelectorModel(
|
|
|
|
JavaModel,
|
|
|
|
_UnivariateFeatureSelectorParams,
|
|
|
|
JavaMLReadable[UnivariateFeatureSelectorModel],
|
|
|
|
JavaMLWritable,
|
|
|
|
):
|
|
|
|
def setFeaturesCol(self, value: str) -> UnivariateFeatureSelectorModel: ...
|
|
|
|
def setOutputCol(self, value: str) -> UnivariateFeatureSelectorModel: ...
|
|
|
|
@property
|
|
|
|
def selectedFeatures(self) -> List[int]: ...
|