spark-instrumented-optimizer/python/pyspark/context.pyi

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from typing import (
    Any,
    Callable,
    Dict,
    Iterable,
    List,
    NoReturn,
    Optional,
    Tuple,
    Type,
    TypeVar,
)
from types import TracebackType

from py4j.java_gateway import JavaGateway, JavaObject  # type: ignore[import]

from pyspark.accumulators import Accumulator, AccumulatorParam
from pyspark.broadcast import Broadcast
from pyspark.conf import SparkConf
from pyspark.profiler import Profiler  # noqa: F401
from pyspark.resource.information import ResourceInformation
from pyspark.rdd import RDD
from pyspark.serializers import Serializer
from pyspark.status import StatusTracker

T = TypeVar("T")
U = TypeVar("U")

class SparkContext:
    master: str
    appName: str
    sparkHome: str
    PACKAGE_EXTENSIONS: Iterable[str]
    def __init__(
        self,
        master: Optional[str] = ...,
        appName: Optional[str] = ...,
        sparkHome: Optional[str] = ...,
        pyFiles: Optional[List[str]] = ...,
        environment: Optional[Dict[str, str]] = ...,
        batchSize: int = ...,
        serializer: Serializer = ...,
        conf: Optional[SparkConf] = ...,
        gateway: Optional[JavaGateway] = ...,
        jsc: Optional[JavaObject] = ...,
        profiler_cls: type = ...,
    ) -> None: ...
    def __getnewargs__(self) -> NoReturn: ...
    def __enter__(self) -> SparkContext: ...
    def __exit__(
        self,
        type: Optional[Type[BaseException]],
        value: Optional[BaseException],
        trace: Optional[TracebackType],
    ) -> None: ...
    @classmethod
    def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ...
    def setLogLevel(self, logLevel: str) -> None: ...
    @classmethod
    def setSystemProperty(cls, key: str, value: str) -> None: ...
    @property
    def version(self) -> str: ...
    @property
    def applicationId(self) -> str: ...
    @property
    def uiWebUrl(self) -> str: ...
    @property
    def startTime(self) -> int: ...
    @property
    def defaultParallelism(self) -> int: ...
    @property
    def defaultMinPartitions(self) -> int: ...
    def stop(self) -> None: ...
    def emptyRDD(self) -> RDD[Any]: ...
    def range(
        self,
        start: int,
        end: Optional[int] = ...,
        step: int = ...,
        numSlices: Optional[int] = ...,
    ) -> RDD[int]: ...
    def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ...
    def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ...
    def textFile(
        self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
    ) -> RDD[str]: ...
    def wholeTextFiles(
        self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
    ) -> RDD[Tuple[str, str]]: ...
    def binaryFiles(
        self, path: str, minPartitions: Optional[int] = ...
    ) -> RDD[Tuple[str, bytes]]: ...
    def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ...
    def sequenceFile(
        self,
        path: str,
        keyClass: Optional[str] = ...,
        valueClass: Optional[str] = ...,
        keyConverter: Optional[str] = ...,
        valueConverter: Optional[str] = ...,
        minSplits: Optional[int] = ...,
        batchSize: int = ...,
    ) -> RDD[Tuple[T, U]]: ...
    def newAPIHadoopFile(
        self,
        path: str,
        inputFormatClass: str,
        keyClass: str,
        valueClass: str,
        keyConverter: Optional[str] = ...,
        valueConverter: Optional[str] = ...,
        conf: Optional[Dict[str, str]] = ...,
        batchSize: int = ...,
    ) -> RDD[Tuple[T, U]]: ...
    def newAPIHadoopRDD(
        self,
        inputFormatClass: str,
        keyClass: str,
        valueClass: str,
        keyConverter: Optional[str] = ...,
        valueConverter: Optional[str] = ...,
        conf: Optional[Dict[str, str]] = ...,
        batchSize: int = ...,
    ) -> RDD[Tuple[T, U]]: ...
    def hadoopFile(
        self,
        path: str,
        inputFormatClass: str,
        keyClass: str,
        valueClass: str,
        keyConverter: Optional[str] = ...,
        valueConverter: Optional[str] = ...,
        conf: Optional[Dict[str, str]] = ...,
        batchSize: int = ...,
    ) -> RDD[Tuple[T, U]]: ...
    def hadoopRDD(
        self,
        inputFormatClass: str,
        keyClass: str,
        valueClass: str,
        keyConverter: Optional[str] = ...,
        valueConverter: Optional[str] = ...,
        conf: Optional[Dict[str, str]] = ...,
        batchSize: int = ...,
    ) -> RDD[Tuple[T, U]]: ...
    def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ...
    def broadcast(self, value: T) -> Broadcast[T]: ...
    def accumulator(
        self, value: T, accum_param: Optional[AccumulatorParam[T]] = ...
    ) -> Accumulator[T]: ...
    def addFile(self, path: str, recursive: bool = ...) -> None: ...
    def addPyFile(self, path: str) -> None: ...
    def setCheckpointDir(self, dirName: str) -> None: ...
    def getCheckpointDir(self) -> Optional[str]: ...
    def setJobGroup(
        self, groupId: str, description: str, interruptOnCancel: bool = ...
    ) -> None: ...
    def setLocalProperty(self, key: str, value: str) -> None: ...
    def getLocalProperty(self, key: str) -> Optional[str]: ...
    def sparkUser(self) -> str: ...
    def setJobDescription(self, value: str) -> None: ...
    def cancelJobGroup(self, groupId: str) -> None: ...
    def cancelAllJobs(self) -> None: ...
    def statusTracker(self) -> StatusTracker: ...
    def runJob(
        self,
        rdd: RDD[T],
        partitionFunc: Callable[[Iterable[T]], Iterable[U]],
        partitions: Optional[List[int]] = ...,
        allowLocal: bool = ...,
    ) -> List[U]: ...
    def show_profiles(self) -> None: ...
    def dump_profiles(self, path: str) -> None: ...
    def getConf(self) -> SparkConf: ...
    @property
    def resources(self) -> Dict[str, ResourceInformation]: ...