spark-instrumented-optimizer/python/pyspark/mllib/classification.pyi

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from typing import overload
from typing import Optional, Union

from pyspark.context import SparkContext
from pyspark.rdd import RDD
from pyspark.mllib._typing import VectorLike
from pyspark.mllib.linalg import Vector
from pyspark.mllib.regression import LabeledPoint, LinearModel, StreamingLinearAlgorithm
from pyspark.mllib.util import Saveable, Loader
from pyspark.streaming.dstream import DStream

from numpy import float64, ndarray  # type: ignore[import]

class LinearClassificationModel(LinearModel):
    def __init__(self, weights: Vector, intercept: float) -> None: ...
    def setThreshold(self, value: float) -> None: ...
    @property
    def threshold(self) -> Optional[float]: ...
    def clearThreshold(self) -> None: ...
    @overload
    def predict(self, test: VectorLike) -> Union[int, float, float64]: ...
    @overload
    def predict(self, test: RDD[VectorLike]) -> RDD[Union[int, float]]: ...

class LogisticRegressionModel(LinearClassificationModel):
    def __init__(
        self, weights: Vector, intercept: float, numFeatures: int, numClasses: int
    ) -> None: ...
    @property
    def numFeatures(self) -> int: ...
    @property
    def numClasses(self) -> int: ...
    @overload
    def predict(self, x: VectorLike) -> Union[int, float]: ...
    @overload
    def predict(self, x: RDD[VectorLike]) -> RDD[Union[int, float]]: ...
    def save(self, sc: SparkContext, path: str) -> None: ...
    @classmethod
    def load(cls, sc: SparkContext, path: str) -> LogisticRegressionModel: ...

class LogisticRegressionWithSGD:
    @classmethod
    def train(
        cls,
        data: RDD[LabeledPoint],
        iterations: int = ...,
        step: float = ...,
        miniBatchFraction: float = ...,
        initialWeights: Optional[VectorLike] = ...,
        regParam: float = ...,
        regType: str = ...,
        intercept: bool = ...,
        validateData: bool = ...,
        convergenceTol: float = ...,
    ) -> LogisticRegressionModel: ...

class LogisticRegressionWithLBFGS:
    @classmethod
    def train(
        cls,
        data: RDD[LabeledPoint],
        iterations: int = ...,
        initialWeights: Optional[VectorLike] = ...,
        regParam: float = ...,
        regType: str = ...,
        intercept: bool = ...,
        corrections: int = ...,
        tolerance: float = ...,
        validateData: bool = ...,
        numClasses: int = ...,
    ) -> LogisticRegressionModel: ...

class SVMModel(LinearClassificationModel):
    def __init__(self, weights: Vector, intercept: float) -> None: ...
    @overload
    def predict(self, x: VectorLike) -> float64: ...
    @overload
    def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ...
    def save(self, sc: SparkContext, path: str) -> None: ...
    @classmethod
    def load(cls, sc: SparkContext, path: str) -> SVMModel: ...

class SVMWithSGD:
    @classmethod
    def train(
        cls,
        data: RDD[LabeledPoint],
        iterations: int = ...,
        step: float = ...,
        regParam: float = ...,
        miniBatchFraction: float = ...,
        initialWeights: Optional[VectorLike] = ...,
        regType: str = ...,
        intercept: bool = ...,
        validateData: bool = ...,
        convergenceTol: float = ...,
    ) -> SVMModel: ...

class NaiveBayesModel(Saveable, Loader[NaiveBayesModel]):
    labels: ndarray
    pi: ndarray
    theta: ndarray
    def __init__(self, labels: ndarray, pi: ndarray, theta: ndarray) -> None: ...
    @overload
    def predict(self, x: VectorLike) -> float64: ...
    @overload
    def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ...
    def save(self, sc: SparkContext, path: str) -> None: ...
    @classmethod
    def load(cls, sc: SparkContext, path: str) -> NaiveBayesModel: ...

class NaiveBayes:
    @classmethod
    def train(cls, data: RDD[VectorLike], lambda_: float = ...) -> NaiveBayesModel: ...

class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
    stepSize: float
    numIterations: int
    regParam: float
    miniBatchFraction: float
    convergenceTol: float
    def __init__(
        self,
        stepSize: float = ...,
        numIterations: int = ...,
        miniBatchFraction: float = ...,
        regParam: float = ...,
        convergenceTol: float = ...,
    ) -> None: ...
    def setInitialWeights(
        self, initialWeights: VectorLike
    ) -> StreamingLogisticRegressionWithSGD: ...
    def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ...