[SPARK-33002][PYTHON] Remove non-API annotations

### What changes were proposed in this pull request?

This PR:

- removes annotations for modules which are not part of the public API.
- removes `__init__.pyi` files, if no annotations, beyond exports, are present.

### Why are the changes needed?

Primarily to reduce maintenance overhead and as requested in the comments to https://github.com/apache/spark/pull/29591

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests and additional MyPy checks:

```
mypy --no-incremental --config python/mypy.ini python/pyspark
MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming
```

Closes #29879 from zero323/SPARK-33002.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
zero323 2020-10-07 19:53:59 +09:00 committed by HyukjinKwon
parent 4e1ded67f8
commit 72da6f86cf
30 changed files with 14 additions and 1039 deletions

View file

@ -32,5 +32,8 @@ ignore_missing_imports = True
[mypy-pandas.*]
ignore_missing_imports = True
[mypy-pyarrow]
[mypy-pyarrow.*]
ignore_missing_imports = True
[mypy-psutil.*]
ignore_missing_imports = True

View file

@ -1,27 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# NOTE: This dynamically typed stub was automatically generated by stubgen.
from typing import Any
__ALL__: Any
class _NoValueType:
def __new__(cls): ...
def __reduce__(self): ...

View file

@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
from typing import Callable, Generic, Tuple, Type, TypeVar
from typing import Callable, Dict, Generic, Tuple, Type, TypeVar
import socketserver.BaseRequestHandler # type: ignore
@ -27,6 +27,8 @@ U = TypeVar("U", bound=SupportsIAdd)
import socketserver as SocketServer
_accumulatorRegistry: Dict[int, Accumulator]
class Accumulator(Generic[T]):
aid: int
accum_param: AccumulatorParam[T]

View file

@ -17,10 +17,12 @@
# under the License.
import threading
from typing import Any, Generic, Optional, TypeVar
from typing import Any, Dict, Generic, Optional, TypeVar
T = TypeVar("T")
_broadcastRegistry: Dict[int, Broadcast]
class Broadcast(Generic[T]):
def __init__(
self,

View file

@ -1,29 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.serializers import ( # noqa: F401
UTF8Deserializer as UTF8Deserializer,
read_int as read_int,
write_int as write_int,
write_with_length as write_with_length,
)
from typing import Any
def compute_real_exit_code(exit_code: Any): ...
def worker(sock: Any, authenticated: Any): ...
def manager() -> None: ...

View file

@ -1,17 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View file

@ -1,24 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length # type: ignore[attr-defined]
from typing import Any, Optional
def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ...
def local_connect_and_auth(port: Any, auth_secret: Any): ...
def ensure_callback_server_started(gw: Any) -> None: ...

View file

@ -1,50 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Hashable, Iterable, Optional, Tuple, TypeVar
from pyspark.resultiterable import ResultIterable
import pyspark.rdd
K = TypeVar("K", bound=Hashable)
V = TypeVar("V")
U = TypeVar("U")
def python_join(
rdd: pyspark.rdd.RDD[Tuple[K, V]],
other: pyspark.rdd.RDD[Tuple[K, U]],
numPartitions: int,
) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, U]]]: ...
def python_right_outer_join(
rdd: pyspark.rdd.RDD[Tuple[K, V]],
other: pyspark.rdd.RDD[Tuple[K, U]],
numPartitions: int,
) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, Optional[U]]]]: ...
def python_left_outer_join(
rdd: pyspark.rdd.RDD[Tuple[K, V]],
other: pyspark.rdd.RDD[Tuple[K, U]],
numPartitions: int,
) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], U]]]: ...
def python_full_outer_join(
rdd: pyspark.rdd.RDD[Tuple[K, V]],
other: pyspark.rdd.RDD[Tuple[K, U]],
numPartitions: int,
) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
def python_cogroup(
rdds: Iterable[pyspark.rdd.RDD[Tuple[K, V]]], numPartitions: int
) -> pyspark.rdd.RDD[Tuple[K, Tuple[ResultIterable[V], ...]]]: ...

View file

@ -1,45 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.ml import ( # noqa: F401
classification as classification,
clustering as clustering,
evaluation as evaluation,
feature as feature,
fpm as fpm,
image as image,
linalg as linalg,
param as param,
recommendation as recommendation,
regression as regression,
stat as stat,
tuning as tuning,
util as util,
)
from pyspark.ml.base import ( # noqa: F401
Estimator as Estimator,
Model as Model,
PredictionModel as PredictionModel,
Predictor as Predictor,
Transformer as Transformer,
UnaryTransformer as UnaryTransformer,
)
from pyspark.ml.pipeline import ( # noqa: F401
Pipeline as Pipeline,
PipelineModel as PipelineModel,
)

View file

@ -1,32 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# NOTE: This dynamically typed stub was automatically generated by stubgen.
# Names in __all__ with no definition:
# classification
# clustering
# feature
# fpm
# linalg
# random
# recommendation
# regression
# stat
# tree
# util

View file

@ -1,54 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar
T = TypeVar("T")
U = TypeVar("U")
K = TypeVar("K")
V = TypeVar("V")
class RDDSamplerBase:
def __init__(self, withReplacement: bool, seed: Optional[int] = ...) -> None: ...
def initRandomGenerator(self, split: int) -> None: ...
def getUniformSample(self) -> float: ...
def getPoissonSample(self, mean: float) -> int: ...
def func(self, split: int, iterator: Iterator[Any]) -> Iterator[Any]: ...
class RDDSampler(RDDSamplerBase):
def __init__(
self, withReplacement: bool, fraction: float, seed: Optional[int] = ...
) -> None: ...
def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
class RDDRangeSampler(RDDSamplerBase):
def __init__(
self, lowerBound: T, upperBound: T, seed: Optional[Any] = ...
) -> None: ...
def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
class RDDStratifiedSampler(RDDSamplerBase):
def __init__(
self,
withReplacement: bool,
fractions: Dict[K, float],
seed: Optional[int] = ...,
) -> None: ...
def func(
self, split: int, iterator: Iterator[Tuple[K, V]]
) -> Iterator[Tuple[K, V]]: ...

View file

@ -1,31 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.resource.information import ( # noqa: F401
ResourceInformation as ResourceInformation,
)
from pyspark.resource.profile import ( # noqa: F401
ResourceProfile as ResourceProfile,
ResourceProfileBuilder as ResourceProfileBuilder,
)
from pyspark.resource.requests import ( # noqa: F401
ExecutorResourceRequest as ExecutorResourceRequest,
ExecutorResourceRequests as ExecutorResourceRequests,
TaskResourceRequest as TaskResourceRequest,
TaskResourceRequests as TaskResourceRequests,
)

View file

@ -342,7 +342,7 @@ class NoOpSerializer(FramedSerializer):
# Hack namedtuple, make it picklable
__cls = {}
__cls = {} # type: ignore
def _restore(name, fields, value):

View file

@ -1,122 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
class SpecialLengths:
END_OF_DATA_SECTION: int = ...
PYTHON_EXCEPTION_THROWN: int = ...
TIMING_DATA: int = ...
END_OF_STREAM: int = ...
NULL: int = ...
START_ARROW_STREAM: int = ...
class Serializer:
def dump_stream(self, iterator: Any, stream: Any) -> None: ...
def load_stream(self, stream: Any) -> None: ...
def __eq__(self, other: Any) -> Any: ...
def __ne__(self, other: Any) -> Any: ...
def __hash__(self) -> Any: ...
class FramedSerializer(Serializer):
def __init__(self) -> None: ...
def dump_stream(self, iterator: Any, stream: Any) -> None: ...
def load_stream(self, stream: Any) -> None: ...
def dumps(self, obj: Any) -> None: ...
def loads(self, obj: Any) -> None: ...
class BatchedSerializer(Serializer):
UNLIMITED_BATCH_SIZE: int = ...
UNKNOWN_BATCH_SIZE: int = ...
serializer: Any = ...
batchSize: Any = ...
def __init__(self, serializer: Any, batchSize: Any = ...) -> None: ...
def dump_stream(self, iterator: Any, stream: Any) -> None: ...
def load_stream(self, stream: Any): ...
class FlattenedValuesSerializer(BatchedSerializer):
def __init__(self, serializer: Any, batchSize: int = ...) -> None: ...
def load_stream(self, stream: Any): ...
class AutoBatchedSerializer(BatchedSerializer):
bestSize: Any = ...
def __init__(self, serializer: Any, bestSize: Any = ...) -> None: ...
def dump_stream(self, iterator: Any, stream: Any) -> None: ...
class CartesianDeserializer(Serializer):
key_ser: Any = ...
val_ser: Any = ...
def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
def load_stream(self, stream: Any): ...
class PairDeserializer(Serializer):
key_ser: Any = ...
val_ser: Any = ...
def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
def load_stream(self, stream: Any): ...
class NoOpSerializer(FramedSerializer):
def loads(self, obj: Any): ...
def dumps(self, obj: Any): ...
class PickleSerializer(FramedSerializer):
def dumps(self, obj: Any): ...
def loads(self, obj: Any, encoding: str = ...): ...
class CloudPickleSerializer(PickleSerializer):
def dumps(self, obj: Any): ...
class MarshalSerializer(FramedSerializer):
def dumps(self, obj: Any): ...
def loads(self, obj: Any): ...
class AutoSerializer(FramedSerializer):
def __init__(self) -> None: ...
def dumps(self, obj: Any): ...
def loads(self, obj: Any): ...
class CompressedSerializer(FramedSerializer):
serializer: Any = ...
def __init__(self, serializer: Any) -> None: ...
def dumps(self, obj: Any): ...
def loads(self, obj: Any): ...
class UTF8Deserializer(Serializer):
use_unicode: Any = ...
def __init__(self, use_unicode: bool = ...) -> None: ...
def loads(self, stream: Any): ...
def load_stream(self, stream: Any) -> None: ...
class ChunkedStream:
buffer_size: Any = ...
buffer: Any = ...
current_pos: int = ...
wrapped: Any = ...
def __init__(self, wrapped: Any, buffer_size: Any) -> None: ...
def write(self, bytes: Any) -> None: ...
def close(self) -> None: ...
@property
def closed(self): ...
def write_with_length(obj: Any, stream: Any): ...
def pack_long(value): ...
def read_int(stream): ...
def read_long(stream): ...
def read_bool(stream): ...
def write_int(value, stream): ...
def write_long(value, stream): ...

View file

@ -32,10 +32,10 @@ from pyspark.sql import SparkSession
if os.environ.get("SPARK_EXECUTOR_URI"):
SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
SparkContext._ensure_initialized()
SparkContext._ensure_initialized() # type: ignore
try:
spark = SparkSession._create_shell_session()
spark = SparkSession._create_shell_session() # type: ignore
except Exception:
import sys
import traceback

View file

@ -1,31 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark import SparkConf as SparkConf # noqa: F401
from pyspark.context import SparkContext as SparkContext
from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession
from typing import Any, Callable
from pyspark.sql.dataframe import DataFrame
spark: SparkSession
sc: SparkContext
sql: Callable[[str], DataFrame]
sqlContext: SQLContext
sqlCtx: SQLContext
code: Any

View file

@ -1,109 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.serializers import ( # noqa: F401
AutoBatchedSerializer as AutoBatchedSerializer,
BatchedSerializer as BatchedSerializer,
CompressedSerializer as CompressedSerializer,
FlattenedValuesSerializer as FlattenedValuesSerializer,
PickleSerializer as PickleSerializer,
)
from pyspark.util import fail_on_stopiteration as fail_on_stopiteration # noqa: F401
from typing import Any, Optional
process: Any
def get_used_memory(): ...
MemoryBytesSpilled: int
DiskBytesSpilled: int
class Aggregator:
createCombiner: Any = ...
mergeValue: Any = ...
mergeCombiners: Any = ...
def __init__(
self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any
) -> None: ...
class SimpleAggregator(Aggregator):
def __init__(self, combiner: Any): ...
class Merger:
agg: Any = ...
def __init__(self, aggregator: Any) -> None: ...
def mergeValues(self, iterator: Any) -> None: ...
def mergeCombiners(self, iterator: Any) -> None: ...
def items(self) -> None: ...
class ExternalMerger(Merger):
MAX_TOTAL_PARTITIONS: int = ...
memory_limit: Any = ...
serializer: Any = ...
localdirs: Any = ...
partitions: Any = ...
batch: Any = ...
scale: Any = ...
data: Any = ...
pdata: Any = ...
spills: int = ...
def __init__(
self,
aggregator: Any,
memory_limit: int = ...,
serializer: Optional[Any] = ...,
localdirs: Optional[Any] = ...,
scale: int = ...,
partitions: int = ...,
batch: int = ...,
) -> None: ...
def mergeValues(self, iterator: Any) -> None: ...
def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ...
def items(self): ...
class ExternalSorter:
memory_limit: Any = ...
local_dirs: Any = ...
serializer: Any = ...
def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ...
def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ...
class ExternalList:
LIMIT: int = ...
values: Any = ...
count: Any = ...
def __init__(self, values: Any) -> None: ...
def __iter__(self) -> Any: ...
def __len__(self): ...
def append(self, value: Any) -> None: ...
def __del__(self) -> None: ...
class ExternalListOfList(ExternalList):
count: Any = ...
def __init__(self, values: Any) -> None: ...
def append(self, value: Any) -> None: ...
def __iter__(self) -> Any: ...
class GroupByKey:
iterator: Any = ...
def __init__(self, iterator: Any) -> None: ...
def __iter__(self) -> Any: ...
class ExternalGroupBy(ExternalMerger):
SORT_KEY_LIMIT: int = ...
def flattened_serializer(self): ...

View file

@ -1,22 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# NOTE: This dynamically typed stub was automatically generated by stubgen.
# Names in __all__ with no definition:
# functions

View file

@ -1,17 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View file

@ -1,65 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.serializers import ( # noqa: F401
Serializer as Serializer,
UTF8Deserializer as UTF8Deserializer,
read_int as read_int,
write_int as write_int,
)
from typing import Any
class SpecialLengths:
END_OF_DATA_SECTION: int = ...
PYTHON_EXCEPTION_THROWN: int = ...
TIMING_DATA: int = ...
END_OF_STREAM: int = ...
NULL: int = ...
START_ARROW_STREAM: int = ...
class ArrowCollectSerializer(Serializer):
serializer: Any = ...
def __init__(self) -> None: ...
def dump_stream(self, iterator: Any, stream: Any): ...
def load_stream(self, stream: Any) -> None: ...
class ArrowStreamSerializer(Serializer):
def dump_stream(self, iterator: Any, stream: Any) -> None: ...
def load_stream(self, stream: Any) -> None: ...
class ArrowStreamPandasSerializer(ArrowStreamSerializer):
def __init__(
self, timezone: Any, safecheck: Any, assign_cols_by_name: Any
) -> None: ...
def arrow_to_pandas(self, arrow_column: Any): ...
def dump_stream(self, iterator: Any, stream: Any) -> None: ...
def load_stream(self, stream: Any) -> None: ...
class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
def __init__(
self,
timezone: Any,
safecheck: Any,
assign_cols_by_name: Any,
df_for_struct: bool = ...,
) -> None: ...
def arrow_to_pandas(self, arrow_column: Any): ...
def dump_stream(self, iterator: Any, stream: Any): ...
class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer):
def load_stream(self, stream: Any) -> None: ...

View file

@ -1,33 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.sql.pandas.utils import ( # noqa: F401
require_minimum_pandas_version as require_minimum_pandas_version,
)
from typing import Any, Optional
def infer_eval_type(sig: Any): ...
def check_tuple_annotation(
annotation: Any, parameter_check_func: Optional[Any] = ...
): ...
def check_iterator_annotation(
annotation: Any, parameter_check_func: Optional[Any] = ...
): ...
def check_union_annotation(
annotation: Any, parameter_check_func: Optional[Any] = ...
): ...

View file

@ -1,41 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.sql.types import ( # noqa: F401
ArrayType as ArrayType,
BinaryType as BinaryType,
BooleanType as BooleanType,
ByteType as ByteType,
DateType as DateType,
DecimalType as DecimalType,
DoubleType as DoubleType,
FloatType as FloatType,
IntegerType as IntegerType,
LongType as LongType,
ShortType as ShortType,
StringType as StringType,
StructField as StructField,
StructType as StructType,
TimestampType as TimestampType,
)
from typing import Any
def to_arrow_type(dt: Any): ...
def to_arrow_schema(schema: Any): ...
def from_arrow_type(at: Any): ...
def from_arrow_schema(arrow_schema: Any): ...

View file

@ -1,20 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
def require_minimum_pandas_version() -> None: ...
def require_minimum_pyarrow_version() -> None: ...

View file

@ -1,55 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# NOTE: This dynamically typed stub was automatically generated by stubgen.
from pyspark import SparkContext as SparkContext # noqa: F401
from typing import Any, Optional
class CapturedException(Exception):
desc: Any = ...
stackTrace: Any = ...
cause: Any = ...
def __init__(
self, desc: Any, stackTrace: Any, cause: Optional[Any] = ...
) -> None: ...
class AnalysisException(CapturedException): ...
class ParseException(CapturedException): ...
class IllegalArgumentException(CapturedException): ...
class StreamingQueryException(CapturedException): ...
class QueryExecutionException(CapturedException): ...
class PythonException(CapturedException): ...
class UnknownException(CapturedException): ...
def convert_exception(e: Any): ...
def capture_sql_exception(f: Any): ...
def install_exception_handler() -> None: ...
def toJArray(gateway: Any, jtype: Any, arr: Any): ...
def require_test_compiled() -> None: ...
class ForeachBatchFunction:
sql_ctx: Any = ...
func: Any = ...
def __init__(self, sql_ctx: Any, func: Any) -> None: ...
error: Any = ...
def call(self, jdf: Any, batch_id: Any) -> None: ...
class Java:
implements: Any = ...
def to_str(value: Any): ...

View file

@ -1,23 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark.streaming.context import StreamingContext as StreamingContext # noqa: F401
from pyspark.streaming.dstream import DStream as DStream # noqa: F401
from pyspark.streaming.listener import ( # noqa: F401
StreamingListener as StreamingListener,
)

View file

@ -1,48 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# NOTE: This dynamically typed stub was automatically generated by stubgen.
from typing import Any, Optional
class TransformFunction:
ctx: Any
func: Any
deserializers: Any
rdd_wrap_func: Any
failure: Any
def __init__(self, ctx, func, *deserializers) -> None: ...
def rdd_wrapper(self, func): ...
def call(self, milliseconds, jrdds): ...
def getLastFailure(self): ...
class Java:
implements: Any
class TransformFunctionSerializer:
ctx: Any
serializer: Any
gateway: Any
failure: Any
def __init__(self, ctx, serializer, gateway: Optional[Any] = ...) -> None: ...
def dumps(self, id): ...
def loads(self, data): ...
def getLastFailure(self): ...
class Java:
implements: Any
def rddToFileName(prefix, suffix, timestamp): ...

View file

@ -1,29 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from collections import namedtuple
from typing import Any
CallSite = namedtuple("CallSite", "function file linenum")
def first_spark_call(): ...
class SCCallSiteSync:
def __init__(self, sc: Any) -> None: ...
def __enter__(self) -> None: ...
def __exit__(self, type: Any, value: Any, tb: Any) -> None: ...

View file

@ -23,7 +23,7 @@ import traceback
from py4j.clientserver import ClientServer
__all__ = []
__all__ = [] # type: ignore
def print_exec(stream):

View file

@ -1,35 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
from typing import Any, Tuple
from pyspark._typing import F
import threading
def print_exec(stream: Any) -> None: ...
class VersionUtils:
@staticmethod
def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]: ...
def fail_on_stopiteration(f: F) -> F: ...
class InheritableThread(threading.Thread):
def __init__(self, target: Any, *args: Any, **kwargs: Any): ...
def __del__(self) -> None: ...

View file

@ -1,73 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pyspark import shuffle as shuffle
from pyspark.broadcast import Broadcast as Broadcast
from pyspark.files import SparkFiles as SparkFiles
from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth
from pyspark.rdd import PythonEvalType as PythonEvalType
from pyspark.resource import ResourceInformation as ResourceInformation
from pyspark.serializers import (
BatchedSerializer as BatchedSerializer,
PickleSerializer as PickleSerializer,
SpecialLengths as SpecialLengths,
UTF8Deserializer as UTF8Deserializer,
read_bool as read_bool,
read_int as read_int,
read_long as read_long,
write_int as write_int,
write_long as write_long,
write_with_length as write_with_length,
)
from pyspark.sql.pandas.serializers import (
ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer,
CogroupUDFSerializer as CogroupUDFSerializer,
)
from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type
from pyspark.sql.types import StructType as StructType
from pyspark.taskcontext import (
BarrierTaskContext as BarrierTaskContext,
TaskContext as TaskContext,
)
from pyspark.util import fail_on_stopiteration as fail_on_stopiteration
from typing import Any
has_resource_module: bool
pickleSer: Any
utf8_deserializer: Any
def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ...
def add_path(path: Any) -> None: ...
def read_command(serializer: Any, file: Any): ...
def chain(f: Any, g: Any): ...
def wrap_udf(f: Any, return_type: Any): ...
def wrap_scalar_pandas_udf(f: Any, return_type: Any): ...
def wrap_pandas_iter_udf(f: Any, return_type: Any): ...
def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ...
def wrap_window_agg_pandas_udf(
f: Any, return_type: Any, runner_conf: Any, udf_index: Any
): ...
def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
def read_single_udf(
pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any
): ...
def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ...
def main(infile: Any, outfile: Any) -> None: ...