# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from typing import overload from typing import ( Callable, Generic, Hashable, Iterable, List, Optional, Tuple, TypeVar, Union, ) import datetime from pyspark.rdd import RDD import pyspark.serializers from pyspark.storagelevel import StorageLevel import pyspark.streaming.context from py4j.java_gateway import JavaObject S = TypeVar("S") T = TypeVar("T") U = TypeVar("U") K = TypeVar("K", bound=Hashable) V = TypeVar("V") class DStream(Generic[T]): is_cached: bool is_checkpointed: bool def __init__( self, jdstream: JavaObject, ssc: pyspark.streaming.context.StreamingContext, jrdd_deserializer: pyspark.serializers.Serializer, ) -> None: ... def context(self) -> pyspark.streaming.context.StreamingContext: ... def count(self) -> DStream[int]: ... def filter(self, f: Callable[[T], bool]) -> DStream[T]: ... def flatMap( self: DStream[T], f: Callable[[T], Iterable[U]], preservesPartitioning: bool = ..., ) -> DStream[U]: ... def map( self: DStream[T], f: Callable[[T], U], preservesPartitioning: bool = ... ) -> DStream[U]: ... def mapPartitions( self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ... ) -> DStream[U]: ... def mapPartitionsWithIndex( self, f: Callable[[int, Iterable[T]], Iterable[U]], preservesPartitioning: bool = ..., ) -> DStream[U]: ... def reduce(self, func: Callable[[T, T], T]) -> DStream[T]: ... def reduceByKey( self: DStream[Tuple[K, V]], func: Callable[[V, V], V], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, V]]: ... def combineByKey( self: DStream[Tuple[K, V]], createCombiner: Callable[[V], U], mergeValue: Callable[[U, V], U], mergeCombiners: Callable[[U, U], U], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, U]]: ... def partitionBy( self: DStream[Tuple[K, V]], numPartitions: int, partitionFunc: Callable[[K], int] = ..., ) -> DStream[Tuple[K, V]]: ... @overload def foreachRDD(self, func: Callable[[RDD[T]], None]) -> None: ... @overload def foreachRDD(self, func: Callable[[datetime.datetime, RDD[T]], None]) -> None: ... def pprint(self, num: int = ...) -> None: ... def mapValues( self: DStream[Tuple[K, V]], f: Callable[[V], U] ) -> DStream[Tuple[K, U]]: ... def flatMapValues( self: DStream[Tuple[K, V]], f: Callable[[V], Iterable[U]] ) -> DStream[Tuple[K, U]]: ... def glom(self) -> DStream[List[T]]: ... def cache(self) -> DStream[T]: ... def persist(self, storageLevel: StorageLevel) -> DStream[T]: ... def checkpoint(self, interval: int) -> DStream[T]: ... def groupByKey( self: DStream[Tuple[K, V]], numPartitions: Optional[int] = ... ) -> DStream[Tuple[K, Iterable[V]]]: ... def countByValue(self) -> DStream[Tuple[T, int]]: ... def saveAsTextFiles(self, prefix: str, suffix: Optional[str] = ...) -> None: ... @overload def transform(self, func: Callable[[RDD[T]], RDD[U]]) -> TransformedDStream[U]: ... @overload def transform( self, func: Callable[[datetime.datetime, RDD[T]], RDD[U]] ) -> TransformedDStream[U]: ... @overload def transformWith( self, func: Callable[[RDD[T], RDD[U]], RDD[V]], other: RDD[U], keepSerializer: bool = ..., ) -> DStream[V]: ... @overload def transformWith( self, func: Callable[[datetime.datetime, RDD[T], RDD[U]], RDD[V]], other: RDD[U], keepSerializer: bool = ..., ) -> DStream[V]: ... def repartition(self, numPartitions: int) -> DStream[T]: ... def union(self, other: DStream[U]) -> DStream[Union[T, U]]: ... def cogroup( self: DStream[Tuple[K, V]], other: DStream[Tuple[K, U]], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, Tuple[List[V], List[U]]]]: ... def join( self: DStream[Tuple[K, V]], other: DStream[Tuple[K, U]], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, Tuple[V, U]]]: ... def leftOuterJoin( self: DStream[Tuple[K, V]], other: DStream[Tuple[K, U]], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, Tuple[V, Optional[U]]]]: ... def rightOuterJoin( self: DStream[Tuple[K, V]], other: DStream[Tuple[K, U]], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, Tuple[Optional[V], U]]]: ... def fullOuterJoin( self: DStream[Tuple[K, V]], other: DStream[Tuple[K, U]], numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ... def slice( self, begin: Union[datetime.datetime, int], end: Union[datetime.datetime, int] ) -> List[RDD[T]]: ... def window( self, windowDuration: int, slideDuration: Optional[int] = ... ) -> DStream[T]: ... def reduceByWindow( self, reduceFunc: Callable[[T, T], T], invReduceFunc: Optional[Callable[[T, T], T]], windowDuration: int, slideDuration: int, ) -> DStream[T]: ... def countByWindow( self, windowDuration: int, slideDuration: int ) -> DStream[Tuple[T, int]]: ... def countByValueAndWindow( self, windowDuration: int, slideDuration: int, numPartitions: Optional[int] = ..., ) -> DStream[Tuple[T, int]]: ... def groupByKeyAndWindow( self: DStream[Tuple[K, V]], windowDuration: int, slideDuration: int, numPartitions: Optional[int] = ..., ) -> DStream[Tuple[K, Iterable[V]]]: ... def reduceByKeyAndWindow( self: DStream[Tuple[K, V]], func: Callable[[V, V], V], invFunc: Optional[Callable[[V, V], V]], windowDuration: int, slideDuration: Optional[int] = ..., numPartitions: Optional[int] = ..., filterFunc: Optional[Callable[[Tuple[K, V]], bool]] = ..., ) -> DStream[Tuple[K, V]]: ... def updateStateByKey( self: DStream[Tuple[K, V]], updateFunc: Callable[[Iterable[V], Optional[S]], S], numPartitions: Optional[int] = ..., initialRDD: Optional[RDD[Tuple[K, S]]] = ..., ) -> DStream[Tuple[K, S]]: ... class TransformedDStream(DStream[U]): is_cached: bool is_checkpointed: bool func: Callable prev: DStream @overload def __init__( self: DStream[U], prev: DStream[T], func: Callable[[RDD[T]], RDD[U]] ) -> None: ... @overload def __init__( self: DStream[U], prev: DStream[T], func: Callable[[datetime.datetime, RDD[T]], RDD[U]], ) -> None: ...