[SPARK-33002][PYTHON] Remove non-API annotations

### What changes were proposed in this pull request? This PR: - removes annotations for modules which are not part of the public API. - removes `__init__.pyi` files, if no annotations, beyond exports, are present. ### Why are the changes needed? Primarily to reduce maintenance overhead and as requested in the comments to https://github.com/apache/spark/pull/29591 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests and additional MyPy checks: ``` mypy --no-incremental --config python/mypy.ini python/pyspark MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` Closes #29879 from zero323/SPARK-33002. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
2020-10-07 19:53:59 +09:00 · 2020-10-07 19:53:59 +09:00 · 72da6f86cf
parent 4e1ded67f8
commit 72da6f86cf
30 changed files with 14 additions and 1039 deletions
--- a/python/mypy.ini
+++ b/python/mypy.ini
@ -32,5 +32,8 @@ ignore_missing_imports = True
 [mypy-pandas.*]
 ignore_missing_imports = True

-[mypy-pyarrow]
+[mypy-pyarrow.*]
+ignore_missing_imports = True
+
+[mypy-psutil.*]
 ignore_missing_imports = True
--- a/python/pyspark/_globals.pyi
+++ b/python/pyspark/_globals.pyi
@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-from typing import Any
-
-__ALL__: Any
-
-class _NoValueType:
-    def __new__(cls): ...
-    def __reduce__(self): ...
--- a/python/pyspark/accumulators.pyi
+++ b/python/pyspark/accumulators.pyi
@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.

-from typing import Callable, Generic, Tuple, Type, TypeVar
+from typing import Callable, Dict, Generic, Tuple, Type, TypeVar

 import socketserver.BaseRequestHandler  # type: ignore

@ -27,6 +27,8 @@ U = TypeVar("U", bound=SupportsIAdd)

 import socketserver as SocketServer

+_accumulatorRegistry: Dict[int, Accumulator]
+
 class Accumulator(Generic[T]):
    aid: int
    accum_param: AccumulatorParam[T]
--- a/python/pyspark/broadcast.pyi
+++ b/python/pyspark/broadcast.pyi
@ -17,10 +17,12 @@
 # under the License.

 import threading
-from typing import Any, Generic, Optional, TypeVar
+from typing import Any, Dict, Generic, Optional, TypeVar

 T = TypeVar("T")

+_broadcastRegistry: Dict[int, Broadcast]
+
 class Broadcast(Generic[T]):
    def __init__(
        self,
--- a/python/pyspark/daemon.pyi
+++ b/python/pyspark/daemon.pyi
@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import (  # noqa: F401
-    UTF8Deserializer as UTF8Deserializer,
-    read_int as read_int,
-    write_int as write_int,
-    write_with_length as write_with_length,
-)
-from typing import Any
-
-def compute_real_exit_code(exit_code: Any): ...
-def worker(sock: Any, authenticated: Any): ...
-def manager() -> None: ...
--- a/python/pyspark/find_spark_home.pyi
+++ b/python/pyspark/find_spark_home.pyi
@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
--- a/python/pyspark/java_gateway.pyi
+++ b/python/pyspark/java_gateway.pyi
@ -1,24 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length  # type: ignore[attr-defined]
-from typing import Any, Optional
-
-def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ...
-def local_connect_and_auth(port: Any, auth_secret: Any): ...
-def ensure_callback_server_started(gw: Any) -> None: ...
--- a/python/pyspark/join.pyi
+++ b/python/pyspark/join.pyi
@ -1,50 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Hashable, Iterable, Optional, Tuple, TypeVar
-
-from pyspark.resultiterable import ResultIterable
-import pyspark.rdd
-
-K = TypeVar("K", bound=Hashable)
-V = TypeVar("V")
-U = TypeVar("U")
-
-def python_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, U]]]: ...
-def python_right_outer_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, Optional[U]]]]: ...
-def python_left_outer_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], U]]]: ...
-def python_full_outer_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
-def python_cogroup(
-    rdds: Iterable[pyspark.rdd.RDD[Tuple[K, V]]], numPartitions: int
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[ResultIterable[V], ...]]]: ...
--- a/python/pyspark/ml/init.pyi
+++ b/python/pyspark/ml/init.pyi
@ -1,45 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.ml import (  # noqa: F401
-    classification as classification,
-    clustering as clustering,
-    evaluation as evaluation,
-    feature as feature,
-    fpm as fpm,
-    image as image,
-    linalg as linalg,
-    param as param,
-    recommendation as recommendation,
-    regression as regression,
-    stat as stat,
-    tuning as tuning,
-    util as util,
-)
-from pyspark.ml.base import (  # noqa: F401
-    Estimator as Estimator,
-    Model as Model,
-    PredictionModel as PredictionModel,
-    Predictor as Predictor,
-    Transformer as Transformer,
-    UnaryTransformer as UnaryTransformer,
-)
-from pyspark.ml.pipeline import (  # noqa: F401
-    Pipeline as Pipeline,
-    PipelineModel as PipelineModel,
-)
--- a/python/pyspark/mllib/init.pyi
+++ b/python/pyspark/mllib/init.pyi
@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-# Names in __all__ with no definition:
-#   classification
-#   clustering
-#   feature
-#   fpm
-#   linalg
-#   random
-#   recommendation
-#   regression
-#   stat
-#   tree
-#   util
--- a/python/pyspark/rddsampler.pyi
+++ b/python/pyspark/rddsampler.pyi
@ -1,54 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar
-
-T = TypeVar("T")
-U = TypeVar("U")
-K = TypeVar("K")
-V = TypeVar("V")
-
-class RDDSamplerBase:
-    def __init__(self, withReplacement: bool, seed: Optional[int] = ...) -> None: ...
-    def initRandomGenerator(self, split: int) -> None: ...
-    def getUniformSample(self) -> float: ...
-    def getPoissonSample(self, mean: float) -> int: ...
-    def func(self, split: int, iterator: Iterator[Any]) -> Iterator[Any]: ...
-
-class RDDSampler(RDDSamplerBase):
-    def __init__(
-        self, withReplacement: bool, fraction: float, seed: Optional[int] = ...
-    ) -> None: ...
-    def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
-
-class RDDRangeSampler(RDDSamplerBase):
-    def __init__(
-        self, lowerBound: T, upperBound: T, seed: Optional[Any] = ...
-    ) -> None: ...
-    def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
-
-class RDDStratifiedSampler(RDDSamplerBase):
-    def __init__(
-        self,
-        withReplacement: bool,
-        fractions: Dict[K, float],
-        seed: Optional[int] = ...,
-    ) -> None: ...
-    def func(
-        self, split: int, iterator: Iterator[Tuple[K, V]]
-    ) -> Iterator[Tuple[K, V]]: ...
--- a/python/pyspark/resource/init.pyi
+++ b/python/pyspark/resource/init.pyi
@ -1,31 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.resource.information import (  # noqa: F401
-    ResourceInformation as ResourceInformation,
-)
-from pyspark.resource.profile import (  # noqa: F401
-    ResourceProfile as ResourceProfile,
-    ResourceProfileBuilder as ResourceProfileBuilder,
-)
-from pyspark.resource.requests import (  # noqa: F401
-    ExecutorResourceRequest as ExecutorResourceRequest,
-    ExecutorResourceRequests as ExecutorResourceRequests,
-    TaskResourceRequest as TaskResourceRequest,
-    TaskResourceRequests as TaskResourceRequests,
-)
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@ -342,7 +342,7 @@ class NoOpSerializer(FramedSerializer):

 # Hack namedtuple, make it picklable

-__cls = {}
+__cls = {}  # type: ignore


 def _restore(name, fields, value):
--- a/python/pyspark/serializers.pyi
+++ b/python/pyspark/serializers.pyi
@ -1,122 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any
-
-class SpecialLengths:
-    END_OF_DATA_SECTION: int = ...
-    PYTHON_EXCEPTION_THROWN: int = ...
-    TIMING_DATA: int = ...
-    END_OF_STREAM: int = ...
-    NULL: int = ...
-    START_ARROW_STREAM: int = ...
-
-class Serializer:
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-    def __eq__(self, other: Any) -> Any: ...
-    def __ne__(self, other: Any) -> Any: ...
-    def __hash__(self) -> Any: ...
-
-class FramedSerializer(Serializer):
-    def __init__(self) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-    def dumps(self, obj: Any) -> None: ...
-    def loads(self, obj: Any) -> None: ...
-
-class BatchedSerializer(Serializer):
-    UNLIMITED_BATCH_SIZE: int = ...
-    UNKNOWN_BATCH_SIZE: int = ...
-    serializer: Any = ...
-    batchSize: Any = ...
-    def __init__(self, serializer: Any, batchSize: Any = ...) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class FlattenedValuesSerializer(BatchedSerializer):
-    def __init__(self, serializer: Any, batchSize: int = ...) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class AutoBatchedSerializer(BatchedSerializer):
-    bestSize: Any = ...
-    def __init__(self, serializer: Any, bestSize: Any = ...) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-
-class CartesianDeserializer(Serializer):
-    key_ser: Any = ...
-    val_ser: Any = ...
-    def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class PairDeserializer(Serializer):
-    key_ser: Any = ...
-    val_ser: Any = ...
-    def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class NoOpSerializer(FramedSerializer):
-    def loads(self, obj: Any): ...
-    def dumps(self, obj: Any): ...
-
-class PickleSerializer(FramedSerializer):
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any, encoding: str = ...): ...
-
-class CloudPickleSerializer(PickleSerializer):
-    def dumps(self, obj: Any): ...
-
-class MarshalSerializer(FramedSerializer):
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any): ...
-
-class AutoSerializer(FramedSerializer):
-    def __init__(self) -> None: ...
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any): ...
-
-class CompressedSerializer(FramedSerializer):
-    serializer: Any = ...
-    def __init__(self, serializer: Any) -> None: ...
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any): ...
-
-class UTF8Deserializer(Serializer):
-    use_unicode: Any = ...
-    def __init__(self, use_unicode: bool = ...) -> None: ...
-    def loads(self, stream: Any): ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ChunkedStream:
-    buffer_size: Any = ...
-    buffer: Any = ...
-    current_pos: int = ...
-    wrapped: Any = ...
-    def __init__(self, wrapped: Any, buffer_size: Any) -> None: ...
-    def write(self, bytes: Any) -> None: ...
-    def close(self) -> None: ...
-    @property
-    def closed(self): ...
-
-def write_with_length(obj: Any, stream: Any): ...
-def pack_long(value): ...
-def read_int(stream): ...
-def read_long(stream): ...
-def read_bool(stream): ...
-def write_int(value, stream): ...
-def write_long(value, stream): ...
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@ -32,10 +32,10 @@ from pyspark.sql import SparkSession
 if os.environ.get("SPARK_EXECUTOR_URI"):
    SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])

-SparkContext._ensure_initialized()
+SparkContext._ensure_initialized()  # type: ignore

 try:
-    spark = SparkSession._create_shell_session()
+    spark = SparkSession._create_shell_session()  # type: ignore
 except Exception:
    import sys
    import traceback
--- a/python/pyspark/shell.pyi
+++ b/python/pyspark/shell.pyi
@ -1,31 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark import SparkConf as SparkConf  # noqa: F401
-from pyspark.context import SparkContext as SparkContext
-from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession
-from typing import Any, Callable
-
-from pyspark.sql.dataframe import DataFrame
-
-spark: SparkSession
-sc: SparkContext
-sql: Callable[[str], DataFrame]
-sqlContext: SQLContext
-sqlCtx: SQLContext
-code: Any
--- a/python/pyspark/shuffle.pyi
+++ b/python/pyspark/shuffle.pyi
@ -1,109 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import (  # noqa: F401
-    AutoBatchedSerializer as AutoBatchedSerializer,
-    BatchedSerializer as BatchedSerializer,
-    CompressedSerializer as CompressedSerializer,
-    FlattenedValuesSerializer as FlattenedValuesSerializer,
-    PickleSerializer as PickleSerializer,
-)
-from pyspark.util import fail_on_stopiteration as fail_on_stopiteration  # noqa: F401
-from typing import Any, Optional
-
-process: Any
-
-def get_used_memory(): ...
-
-MemoryBytesSpilled: int
-DiskBytesSpilled: int
-
-class Aggregator:
-    createCombiner: Any = ...
-    mergeValue: Any = ...
-    mergeCombiners: Any = ...
-    def __init__(
-        self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any
-    ) -> None: ...
-
-class SimpleAggregator(Aggregator):
-    def __init__(self, combiner: Any): ...
-
-class Merger:
-    agg: Any = ...
-    def __init__(self, aggregator: Any) -> None: ...
-    def mergeValues(self, iterator: Any) -> None: ...
-    def mergeCombiners(self, iterator: Any) -> None: ...
-    def items(self) -> None: ...
-
-class ExternalMerger(Merger):
-    MAX_TOTAL_PARTITIONS: int = ...
-    memory_limit: Any = ...
-    serializer: Any = ...
-    localdirs: Any = ...
-    partitions: Any = ...
-    batch: Any = ...
-    scale: Any = ...
-    data: Any = ...
-    pdata: Any = ...
-    spills: int = ...
-    def __init__(
-        self,
-        aggregator: Any,
-        memory_limit: int = ...,
-        serializer: Optional[Any] = ...,
-        localdirs: Optional[Any] = ...,
-        scale: int = ...,
-        partitions: int = ...,
-        batch: int = ...,
-    ) -> None: ...
-    def mergeValues(self, iterator: Any) -> None: ...
-    def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ...
-    def items(self): ...
-
-class ExternalSorter:
-    memory_limit: Any = ...
-    local_dirs: Any = ...
-    serializer: Any = ...
-    def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ...
-    def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ...
-
-class ExternalList:
-    LIMIT: int = ...
-    values: Any = ...
-    count: Any = ...
-    def __init__(self, values: Any) -> None: ...
-    def __iter__(self) -> Any: ...
-    def __len__(self): ...
-    def append(self, value: Any) -> None: ...
-    def __del__(self) -> None: ...
-
-class ExternalListOfList(ExternalList):
-    count: Any = ...
-    def __init__(self, values: Any) -> None: ...
-    def append(self, value: Any) -> None: ...
-    def __iter__(self) -> Any: ...
-
-class GroupByKey:
-    iterator: Any = ...
-    def __init__(self, iterator: Any) -> None: ...
-    def __iter__(self) -> Any: ...
-
-class ExternalGroupBy(ExternalMerger):
-    SORT_KEY_LIMIT: int = ...
-    def flattened_serializer(self): ...
--- a/python/pyspark/sql/avro/init.pyi
+++ b/python/pyspark/sql/avro/init.pyi
@ -1,22 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-# Names in __all__ with no definition:
-#   functions
--- a/python/pyspark/sql/pandas/init.pyi
+++ b/python/pyspark/sql/pandas/init.pyi
@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
--- a/python/pyspark/sql/pandas/serializers.pyi
+++ b/python/pyspark/sql/pandas/serializers.pyi
@ -1,65 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import (  # noqa: F401
-    Serializer as Serializer,
-    UTF8Deserializer as UTF8Deserializer,
-    read_int as read_int,
-    write_int as write_int,
-)
-from typing import Any
-
-class SpecialLengths:
-    END_OF_DATA_SECTION: int = ...
-    PYTHON_EXCEPTION_THROWN: int = ...
-    TIMING_DATA: int = ...
-    END_OF_STREAM: int = ...
-    NULL: int = ...
-    START_ARROW_STREAM: int = ...
-
-class ArrowCollectSerializer(Serializer):
-    serializer: Any = ...
-    def __init__(self) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any): ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ArrowStreamSerializer(Serializer):
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ArrowStreamPandasSerializer(ArrowStreamSerializer):
-    def __init__(
-        self, timezone: Any, safecheck: Any, assign_cols_by_name: Any
-    ) -> None: ...
-    def arrow_to_pandas(self, arrow_column: Any): ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
-    def __init__(
-        self,
-        timezone: Any,
-        safecheck: Any,
-        assign_cols_by_name: Any,
-        df_for_struct: bool = ...,
-    ) -> None: ...
-    def arrow_to_pandas(self, arrow_column: Any): ...
-    def dump_stream(self, iterator: Any, stream: Any): ...
-
-class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer):
-    def load_stream(self, stream: Any) -> None: ...
--- a/python/pyspark/sql/pandas/typehints.pyi
+++ b/python/pyspark/sql/pandas/typehints.pyi
@ -1,33 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql.pandas.utils import (  # noqa: F401
-    require_minimum_pandas_version as require_minimum_pandas_version,
-)
-from typing import Any, Optional
-
-def infer_eval_type(sig: Any): ...
-def check_tuple_annotation(
-    annotation: Any, parameter_check_func: Optional[Any] = ...
-): ...
-def check_iterator_annotation(
-    annotation: Any, parameter_check_func: Optional[Any] = ...
-): ...
-def check_union_annotation(
-    annotation: Any, parameter_check_func: Optional[Any] = ...
-): ...
--- a/python/pyspark/sql/pandas/types.pyi
+++ b/python/pyspark/sql/pandas/types.pyi
@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql.types import (  # noqa: F401
-    ArrayType as ArrayType,
-    BinaryType as BinaryType,
-    BooleanType as BooleanType,
-    ByteType as ByteType,
-    DateType as DateType,
-    DecimalType as DecimalType,
-    DoubleType as DoubleType,
-    FloatType as FloatType,
-    IntegerType as IntegerType,
-    LongType as LongType,
-    ShortType as ShortType,
-    StringType as StringType,
-    StructField as StructField,
-    StructType as StructType,
-    TimestampType as TimestampType,
-)
-from typing import Any
-
-def to_arrow_type(dt: Any): ...
-def to_arrow_schema(schema: Any): ...
-def from_arrow_type(at: Any): ...
-def from_arrow_schema(arrow_schema: Any): ...
--- a/python/pyspark/sql/pandas/utils.pyi
+++ b/python/pyspark/sql/pandas/utils.pyi
@ -1,20 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-def require_minimum_pandas_version() -> None: ...
-def require_minimum_pyarrow_version() -> None: ...
--- a/python/pyspark/sql/utils.pyi
+++ b/python/pyspark/sql/utils.pyi
@ -1,55 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-from pyspark import SparkContext as SparkContext  # noqa: F401
-from typing import Any, Optional
-
-class CapturedException(Exception):
-    desc: Any = ...
-    stackTrace: Any = ...
-    cause: Any = ...
-    def __init__(
-        self, desc: Any, stackTrace: Any, cause: Optional[Any] = ...
-    ) -> None: ...
-
-class AnalysisException(CapturedException): ...
-class ParseException(CapturedException): ...
-class IllegalArgumentException(CapturedException): ...
-class StreamingQueryException(CapturedException): ...
-class QueryExecutionException(CapturedException): ...
-class PythonException(CapturedException): ...
-class UnknownException(CapturedException): ...
-
-def convert_exception(e: Any): ...
-def capture_sql_exception(f: Any): ...
-def install_exception_handler() -> None: ...
-def toJArray(gateway: Any, jtype: Any, arr: Any): ...
-def require_test_compiled() -> None: ...
-
-class ForeachBatchFunction:
-    sql_ctx: Any = ...
-    func: Any = ...
-    def __init__(self, sql_ctx: Any, func: Any) -> None: ...
-    error: Any = ...
-    def call(self, jdf: Any, batch_id: Any) -> None: ...
-    class Java:
-        implements: Any = ...
-
-def to_str(value: Any): ...
--- a/python/pyspark/streaming/init.pyi
+++ b/python/pyspark/streaming/init.pyi
@ -1,23 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.streaming.context import StreamingContext as StreamingContext  # noqa: F401
-from pyspark.streaming.dstream import DStream as DStream  # noqa: F401
-from pyspark.streaming.listener import (  # noqa: F401
-    StreamingListener as StreamingListener,
-)
--- a/python/pyspark/streaming/util.pyi
+++ b/python/pyspark/streaming/util.pyi
@ -1,48 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-from typing import Any, Optional
-
-class TransformFunction:
-    ctx: Any
-    func: Any
-    deserializers: Any
-    rdd_wrap_func: Any
-    failure: Any
-    def __init__(self, ctx, func, *deserializers) -> None: ...
-    def rdd_wrapper(self, func): ...
-    def call(self, milliseconds, jrdds): ...
-    def getLastFailure(self): ...
-    class Java:
-        implements: Any
-
-class TransformFunctionSerializer:
-    ctx: Any
-    serializer: Any
-    gateway: Any
-    failure: Any
-    def __init__(self, ctx, serializer, gateway: Optional[Any] = ...) -> None: ...
-    def dumps(self, id): ...
-    def loads(self, data): ...
-    def getLastFailure(self): ...
-    class Java:
-        implements: Any
-
-def rddToFileName(prefix, suffix, timestamp): ...
--- a/python/pyspark/traceback_utils.pyi
+++ b/python/pyspark/traceback_utils.pyi
@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from collections import namedtuple
-from typing import Any
-
-CallSite = namedtuple("CallSite", "function file linenum")
-
-def first_spark_call(): ...
-
-class SCCallSiteSync:
-    def __init__(self, sc: Any) -> None: ...
-    def __enter__(self) -> None: ...
-    def __exit__(self, type: Any, value: Any, tb: Any) -> None: ...
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@ -23,7 +23,7 @@ import traceback

 from py4j.clientserver import ClientServer

-__all__ = []
+__all__ = []  # type: ignore


 def print_exec(stream):
--- a/python/pyspark/util.pyi
+++ b/python/pyspark/util.pyi
@ -1,35 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-from typing import Any, Tuple
-from pyspark._typing import F
-
-import threading
-
-def print_exec(stream: Any) -> None: ...
-
-class VersionUtils:
-    @staticmethod
-    def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]: ...
-
-def fail_on_stopiteration(f: F) -> F: ...
-
-class InheritableThread(threading.Thread):
-    def __init__(self, target: Any, *args: Any, **kwargs: Any): ...
-    def __del__(self) -> None: ...
--- a/python/pyspark/worker.pyi
+++ b/python/pyspark/worker.pyi
@ -1,73 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark import shuffle as shuffle
-from pyspark.broadcast import Broadcast as Broadcast
-from pyspark.files import SparkFiles as SparkFiles
-from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth
-from pyspark.rdd import PythonEvalType as PythonEvalType
-from pyspark.resource import ResourceInformation as ResourceInformation
-from pyspark.serializers import (
-    BatchedSerializer as BatchedSerializer,
-    PickleSerializer as PickleSerializer,
-    SpecialLengths as SpecialLengths,
-    UTF8Deserializer as UTF8Deserializer,
-    read_bool as read_bool,
-    read_int as read_int,
-    read_long as read_long,
-    write_int as write_int,
-    write_long as write_long,
-    write_with_length as write_with_length,
-)
-from pyspark.sql.pandas.serializers import (
-    ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer,
-    CogroupUDFSerializer as CogroupUDFSerializer,
-)
-from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type
-from pyspark.sql.types import StructType as StructType
-from pyspark.taskcontext import (
-    BarrierTaskContext as BarrierTaskContext,
-    TaskContext as TaskContext,
-)
-from pyspark.util import fail_on_stopiteration as fail_on_stopiteration
-from typing import Any
-
-has_resource_module: bool
-pickleSer: Any
-utf8_deserializer: Any
-
-def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ...
-def add_path(path: Any) -> None: ...
-def read_command(serializer: Any, file: Any): ...
-def chain(f: Any, g: Any): ...
-def wrap_udf(f: Any, return_type: Any): ...
-def wrap_scalar_pandas_udf(f: Any, return_type: Any): ...
-def wrap_pandas_iter_udf(f: Any, return_type: Any): ...
-def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
-def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
-def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ...
-def wrap_window_agg_pandas_udf(
-    f: Any, return_type: Any, runner_conf: Any, udf_index: Any
-): ...
-def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
-def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
-def read_single_udf(
-    pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any
-): ...
-def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ...
-def main(infile: Any, outfile: Any) -> None: ...