2020-09-24 01:15:36 -04:00
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
# or more contributor license agreements. See the NOTICE file
|
|
|
|
# distributed with this work for additional information
|
|
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
|
|
# to you under the Apache License, Version 2.0 (the
|
|
|
|
# "License"); you may not use this file except in compliance
|
|
|
|
# with the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing,
|
|
|
|
# software distributed under the License is distributed on an
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
# KIND, either express or implied. See the License for the
|
|
|
|
# specific language governing permissions and limitations
|
|
|
|
# under the License.
|
|
|
|
|
|
|
|
from typing import overload
|
|
|
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
|
|
|
|
|
|
from pyspark.sql._typing import (
|
|
|
|
ColumnOrName,
|
|
|
|
DataTypeOrString,
|
|
|
|
)
|
|
|
|
from pyspark.sql.pandas.functions import ( # noqa: F401
|
|
|
|
pandas_udf as pandas_udf,
|
|
|
|
PandasUDFType as PandasUDFType,
|
|
|
|
)
|
|
|
|
from pyspark.sql.column import Column
|
|
|
|
from pyspark.sql.dataframe import DataFrame
|
|
|
|
from pyspark.sql.types import ( # noqa: F401
|
|
|
|
ArrayType,
|
|
|
|
StringType,
|
|
|
|
StructType,
|
|
|
|
DataType,
|
|
|
|
)
|
|
|
|
from pyspark.sql.utils import to_str # noqa: F401
|
|
|
|
|
|
|
|
def approxCountDistinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ...
|
|
|
|
def approx_count_distinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ...
|
|
|
|
def broadcast(df: DataFrame) -> DataFrame: ...
|
|
|
|
def coalesce(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def countDistinct(col: ColumnOrName, *cols: ColumnOrName) -> Column: ...
|
|
|
|
def first(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ...
|
|
|
|
def grouping(col: ColumnOrName) -> Column: ...
|
|
|
|
def grouping_id(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def input_file_name() -> Column: ...
|
|
|
|
def isnan(col: ColumnOrName) -> Column: ...
|
|
|
|
def isnull(col: ColumnOrName) -> Column: ...
|
|
|
|
def last(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ...
|
|
|
|
def monotonically_increasing_id() -> Column: ...
|
|
|
|
def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def percentile_approx(
|
|
|
|
col: ColumnOrName,
|
|
|
|
percentage: Union[Column, float, List[float]],
|
|
|
|
accuracy: Union[Column, float] = ...,
|
|
|
|
) -> Column: ...
|
|
|
|
def rand(seed: Optional[int] = ...) -> Column: ...
|
|
|
|
def randn(seed: Optional[int] = ...) -> Column: ...
|
|
|
|
def round(col: ColumnOrName, scale: int = ...) -> Column: ...
|
|
|
|
def bround(col: ColumnOrName, scale: int = ...) -> Column: ...
|
|
|
|
def shiftLeft(col: ColumnOrName, numBits: int) -> Column: ...
|
|
|
|
def shiftRight(col: ColumnOrName, numBits: int) -> Column: ...
|
2020-11-24 19:27:04 -05:00
|
|
|
def shiftRightUnsigned(col: ColumnOrName, numBits: int) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def spark_partition_id() -> Column: ...
|
|
|
|
def expr(str: str) -> Column: ...
|
|
|
|
def struct(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def greatest(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def least(*cols: Column) -> Column: ...
|
2020-11-24 19:27:04 -05:00
|
|
|
def when(condition: Column, value: Any) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
@overload
|
|
|
|
def log(arg1: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def log(arg1: float, arg2: ColumnOrName) -> Column: ...
|
|
|
|
def log2(col: ColumnOrName) -> Column: ...
|
|
|
|
def conv(col: ColumnOrName, fromBase: int, toBase: int) -> Column: ...
|
|
|
|
def factorial(col: ColumnOrName) -> Column: ...
|
|
|
|
def lag(
|
|
|
|
col: ColumnOrName, offset: int = ..., default: Optional[Any] = ...
|
|
|
|
) -> Column: ...
|
|
|
|
def lead(
|
|
|
|
col: ColumnOrName, offset: int = ..., default: Optional[Any] = ...
|
|
|
|
) -> Column: ...
|
2020-09-29 01:14:28 -04:00
|
|
|
def nth_value(
|
|
|
|
col: ColumnOrName, offset: int, ignoreNulls: Optional[bool] = ...
|
|
|
|
) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def ntile(n: int) -> Column: ...
|
|
|
|
def current_date() -> Column: ...
|
|
|
|
def current_timestamp() -> Column: ...
|
|
|
|
def date_format(date: ColumnOrName, format: str) -> Column: ...
|
|
|
|
def year(col: ColumnOrName) -> Column: ...
|
|
|
|
def quarter(col: ColumnOrName) -> Column: ...
|
|
|
|
def month(col: ColumnOrName) -> Column: ...
|
|
|
|
def dayofweek(col: ColumnOrName) -> Column: ...
|
|
|
|
def dayofmonth(col: ColumnOrName) -> Column: ...
|
|
|
|
def dayofyear(col: ColumnOrName) -> Column: ...
|
|
|
|
def hour(col: ColumnOrName) -> Column: ...
|
|
|
|
def minute(col: ColumnOrName) -> Column: ...
|
|
|
|
def second(col: ColumnOrName) -> Column: ...
|
|
|
|
def weekofyear(col: ColumnOrName) -> Column: ...
|
|
|
|
def date_add(start: ColumnOrName, days: int) -> Column: ...
|
|
|
|
def date_sub(start: ColumnOrName, days: int) -> Column: ...
|
|
|
|
def datediff(end: ColumnOrName, start: ColumnOrName) -> Column: ...
|
|
|
|
def add_months(start: ColumnOrName, months: int) -> Column: ...
|
|
|
|
def months_between(
|
|
|
|
date1: ColumnOrName, date2: ColumnOrName, roundOff: bool = ...
|
|
|
|
) -> Column: ...
|
|
|
|
def to_date(col: ColumnOrName, format: Optional[str] = ...) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def to_timestamp(col: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def to_timestamp(col: ColumnOrName, format: str) -> Column: ...
|
|
|
|
def trunc(date: ColumnOrName, format: str) -> Column: ...
|
|
|
|
def date_trunc(format: str, timestamp: ColumnOrName) -> Column: ...
|
|
|
|
def next_day(date: ColumnOrName, dayOfWeek: str) -> Column: ...
|
|
|
|
def last_day(date: ColumnOrName) -> Column: ...
|
|
|
|
def from_unixtime(timestamp: ColumnOrName, format: str = ...) -> Column: ...
|
|
|
|
def unix_timestamp(
|
|
|
|
timestamp: Optional[ColumnOrName] = ..., format: str = ...
|
|
|
|
) -> Column: ...
|
|
|
|
def from_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column: ...
|
|
|
|
def to_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column: ...
|
|
|
|
def timestamp_seconds(col: ColumnOrName) -> Column: ...
|
|
|
|
def window(
|
|
|
|
timeColumn: ColumnOrName,
|
|
|
|
windowDuration: str,
|
|
|
|
slideDuration: Optional[str] = ...,
|
|
|
|
startTime: Optional[str] = ...,
|
|
|
|
) -> Column: ...
|
|
|
|
def crc32(col: ColumnOrName) -> Column: ...
|
|
|
|
def md5(col: ColumnOrName) -> Column: ...
|
|
|
|
def sha1(col: ColumnOrName) -> Column: ...
|
|
|
|
def sha2(col: ColumnOrName, numBits: int) -> Column: ...
|
|
|
|
def hash(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def xxhash64(*cols: ColumnOrName) -> Column: ...
|
2020-10-08 20:50:45 -04:00
|
|
|
def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...) -> Column: ...
|
|
|
|
def raise_error(errMsg: Union[Column, str]) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def concat(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ...
|
|
|
|
def decode(col: ColumnOrName, charset: str) -> Column: ...
|
|
|
|
def encode(col: ColumnOrName, charset: str) -> Column: ...
|
|
|
|
def format_number(col: ColumnOrName, d: int) -> Column: ...
|
|
|
|
def format_string(format: str, *cols: ColumnOrName) -> Column: ...
|
|
|
|
def instr(str: ColumnOrName, substr: str) -> Column: ...
|
|
|
|
def overlay(
|
|
|
|
src: ColumnOrName,
|
|
|
|
replace: ColumnOrName,
|
|
|
|
pos: Union[Column, int],
|
|
|
|
len: Union[Column, int] = ...,
|
|
|
|
) -> Column: ...
|
|
|
|
def substring(str: ColumnOrName, pos: int, len: int) -> Column: ...
|
|
|
|
def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: ...
|
|
|
|
def levenshtein(left: ColumnOrName, right: ColumnOrName) -> Column: ...
|
2020-11-01 05:09:12 -05:00
|
|
|
def locate(substr: str, str: ColumnOrName, pos: int = ...) -> Column: ...
|
|
|
|
def lpad(col: ColumnOrName, len: int, pad: str) -> Column: ...
|
|
|
|
def rpad(col: ColumnOrName, len: int, pad: str) -> Column: ...
|
|
|
|
def repeat(col: ColumnOrName, n: int) -> Column: ...
|
|
|
|
def split(str: ColumnOrName, pattern: str, limit: int = ...) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def regexp_extract(str: ColumnOrName, pattern: str, idx: int) -> Column: ...
|
|
|
|
def regexp_replace(str: ColumnOrName, pattern: str, replacement: str) -> Column: ...
|
|
|
|
def initcap(col: ColumnOrName) -> Column: ...
|
|
|
|
def soundex(col: ColumnOrName) -> Column: ...
|
|
|
|
def bin(col: ColumnOrName) -> Column: ...
|
|
|
|
def hex(col: ColumnOrName) -> Column: ...
|
|
|
|
def unhex(col: ColumnOrName) -> Column: ...
|
|
|
|
def length(col: ColumnOrName) -> Column: ...
|
|
|
|
def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column: ...
|
|
|
|
def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def create_map(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def array(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def array_contains(col: ColumnOrName, value: Any) -> Column: ...
|
|
|
|
def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ...
|
2020-11-24 19:27:04 -05:00
|
|
|
def slice(
|
|
|
|
x: ColumnOrName, start: Union[Column, int], length: Union[Column, int]
|
|
|
|
) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def array_join(
|
|
|
|
col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ...
|
|
|
|
) -> Column: ...
|
|
|
|
def array_position(col: ColumnOrName, value: Any) -> Column: ...
|
|
|
|
def element_at(col: ColumnOrName, extraction: Any) -> Column: ...
|
|
|
|
def array_remove(col: ColumnOrName, element: Any) -> Column: ...
|
|
|
|
def array_distinct(col: ColumnOrName) -> Column: ...
|
|
|
|
def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def array_except(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
def explode(col: ColumnOrName) -> Column: ...
|
|
|
|
def explode_outer(col: ColumnOrName) -> Column: ...
|
|
|
|
def posexplode(col: ColumnOrName) -> Column: ...
|
|
|
|
def posexplode_outer(col: ColumnOrName) -> Column: ...
|
|
|
|
def get_json_object(col: ColumnOrName, path: str) -> Column: ...
|
|
|
|
def json_tuple(col: ColumnOrName, *fields: str) -> Column: ...
|
|
|
|
def from_json(
|
|
|
|
col: ColumnOrName,
|
|
|
|
schema: Union[ArrayType, StructType, Column, str],
|
[SPARK-32320][PYSPARK] Remove mutable default arguments
This is bad practice, and might lead to unexpected behaviour:
https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
```
fokkodriesprongFan spark % grep -R "={}" python | grep def
python/pyspark/resource/profile.py: def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
python/pyspark/sql/functions.py:def from_json(col, schema, options={}):
python/pyspark/sql/functions.py:def to_json(col, options={}):
python/pyspark/sql/functions.py:def schema_of_json(json, options={}):
python/pyspark/sql/functions.py:def schema_of_csv(csv, options={}):
python/pyspark/sql/functions.py:def to_csv(col, options={}):
python/pyspark/sql/functions.py:def from_csv(col, schema, options={}):
python/pyspark/sql/avro/functions.py:def from_avro(data, jsonFormatSchema, options={}):
```
```
fokkodriesprongFan spark % grep -R "=\[\]" python | grep def
python/pyspark/ml/tuning.py: def __init__(self, bestModel, avgMetrics=[], subModels=None):
python/pyspark/ml/tuning.py: def __init__(self, bestModel, validationMetrics=[], subModels=None):
```
### What changes were proposed in this pull request?
Removing the mutable default arguments.
### Why are the changes needed?
Removing the mutable default arguments, and changing the signature to `Optional[...]`.
### Does this PR introduce _any_ user-facing change?
No 👍
### How was this patch tested?
Using the Flake8 bugbear code analysis plugin.
Closes #29122 from Fokko/SPARK-32320.
Authored-by: Fokko Driesprong <fokko@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
2020-12-07 20:35:36 -05:00
|
|
|
options: Optional[Dict[str, str]] = ...,
|
2020-09-24 01:15:36 -04:00
|
|
|
) -> Column: ...
|
[SPARK-32320][PYSPARK] Remove mutable default arguments
This is bad practice, and might lead to unexpected behaviour:
https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
```
fokkodriesprongFan spark % grep -R "={}" python | grep def
python/pyspark/resource/profile.py: def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
python/pyspark/sql/functions.py:def from_json(col, schema, options={}):
python/pyspark/sql/functions.py:def to_json(col, options={}):
python/pyspark/sql/functions.py:def schema_of_json(json, options={}):
python/pyspark/sql/functions.py:def schema_of_csv(csv, options={}):
python/pyspark/sql/functions.py:def to_csv(col, options={}):
python/pyspark/sql/functions.py:def from_csv(col, schema, options={}):
python/pyspark/sql/avro/functions.py:def from_avro(data, jsonFormatSchema, options={}):
```
```
fokkodriesprongFan spark % grep -R "=\[\]" python | grep def
python/pyspark/ml/tuning.py: def __init__(self, bestModel, avgMetrics=[], subModels=None):
python/pyspark/ml/tuning.py: def __init__(self, bestModel, validationMetrics=[], subModels=None):
```
### What changes were proposed in this pull request?
Removing the mutable default arguments.
### Why are the changes needed?
Removing the mutable default arguments, and changing the signature to `Optional[...]`.
### Does this PR introduce _any_ user-facing change?
No 👍
### How was this patch tested?
Using the Flake8 bugbear code analysis plugin.
Closes #29122 from Fokko/SPARK-32320.
Authored-by: Fokko Driesprong <fokko@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
2020-12-07 20:35:36 -05:00
|
|
|
def to_json(col: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
|
|
|
|
def schema_of_json(json: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
|
|
|
|
def schema_of_csv(csv: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
|
|
|
|
def to_csv(col: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def size(col: ColumnOrName) -> Column: ...
|
|
|
|
def array_min(col: ColumnOrName) -> Column: ...
|
|
|
|
def array_max(col: ColumnOrName) -> Column: ...
|
|
|
|
def sort_array(col: ColumnOrName, asc: bool = ...) -> Column: ...
|
|
|
|
def array_sort(col: ColumnOrName) -> Column: ...
|
|
|
|
def shuffle(col: ColumnOrName) -> Column: ...
|
|
|
|
def reverse(col: ColumnOrName) -> Column: ...
|
|
|
|
def flatten(col: ColumnOrName) -> Column: ...
|
|
|
|
def map_keys(col: ColumnOrName) -> Column: ...
|
|
|
|
def map_values(col: ColumnOrName) -> Column: ...
|
|
|
|
def map_entries(col: ColumnOrName) -> Column: ...
|
|
|
|
def map_from_entries(col: ColumnOrName) -> Column: ...
|
|
|
|
def array_repeat(col: ColumnOrName, count: Union[Column, int]) -> Column: ...
|
|
|
|
def arrays_zip(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def map_concat(*cols: ColumnOrName) -> Column: ...
|
|
|
|
def sequence(
|
|
|
|
start: ColumnOrName, stop: ColumnOrName, step: Optional[ColumnOrName] = ...
|
|
|
|
) -> Column: ...
|
|
|
|
def from_csv(
|
|
|
|
col: ColumnOrName,
|
|
|
|
schema: Union[StructType, Column, str],
|
[SPARK-32320][PYSPARK] Remove mutable default arguments
This is bad practice, and might lead to unexpected behaviour:
https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
```
fokkodriesprongFan spark % grep -R "={}" python | grep def
python/pyspark/resource/profile.py: def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
python/pyspark/sql/functions.py:def from_json(col, schema, options={}):
python/pyspark/sql/functions.py:def to_json(col, options={}):
python/pyspark/sql/functions.py:def schema_of_json(json, options={}):
python/pyspark/sql/functions.py:def schema_of_csv(csv, options={}):
python/pyspark/sql/functions.py:def to_csv(col, options={}):
python/pyspark/sql/functions.py:def from_csv(col, schema, options={}):
python/pyspark/sql/avro/functions.py:def from_avro(data, jsonFormatSchema, options={}):
```
```
fokkodriesprongFan spark % grep -R "=\[\]" python | grep def
python/pyspark/ml/tuning.py: def __init__(self, bestModel, avgMetrics=[], subModels=None):
python/pyspark/ml/tuning.py: def __init__(self, bestModel, validationMetrics=[], subModels=None):
```
### What changes were proposed in this pull request?
Removing the mutable default arguments.
### Why are the changes needed?
Removing the mutable default arguments, and changing the signature to `Optional[...]`.
### Does this PR introduce _any_ user-facing change?
No 👍
### How was this patch tested?
Using the Flake8 bugbear code analysis plugin.
Closes #29122 from Fokko/SPARK-32320.
Authored-by: Fokko Driesprong <fokko@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
2020-12-07 20:35:36 -05:00
|
|
|
options: Optional[Dict[str, str]] = ...,
|
2020-09-24 01:15:36 -04:00
|
|
|
) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def transform(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def transform(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
|
|
|
|
def exists(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
|
|
|
|
def forall(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def filter(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
|
|
|
|
def aggregate(
|
|
|
|
col: ColumnOrName,
|
2021-01-06 04:46:20 -05:00
|
|
|
initialValue: ColumnOrName,
|
2020-09-24 01:15:36 -04:00
|
|
|
merge: Callable[[Column, Column], Column],
|
|
|
|
finish: Optional[Callable[[Column], Column]] = ...,
|
|
|
|
) -> Column: ...
|
|
|
|
def zip_with(
|
2021-01-06 04:46:20 -05:00
|
|
|
left: ColumnOrName,
|
|
|
|
right: ColumnOrName,
|
2020-09-24 01:15:36 -04:00
|
|
|
f: Callable[[Column, Column], Column],
|
|
|
|
) -> Column: ...
|
|
|
|
def transform_keys(
|
|
|
|
col: ColumnOrName, f: Callable[[Column, Column], Column]
|
|
|
|
) -> Column: ...
|
|
|
|
def transform_values(
|
|
|
|
col: ColumnOrName, f: Callable[[Column, Column], Column]
|
|
|
|
) -> Column: ...
|
|
|
|
def map_filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
|
|
|
|
def map_zip_with(
|
|
|
|
col1: ColumnOrName,
|
|
|
|
col2: ColumnOrName,
|
|
|
|
f: Callable[[Column, Column, Column], Column],
|
|
|
|
) -> Column: ...
|
|
|
|
def abs(col: ColumnOrName) -> Column: ...
|
|
|
|
def acos(col: ColumnOrName) -> Column: ...
|
2020-11-26 21:00:09 -05:00
|
|
|
def acosh(col: ColumnOrName) -> Column: ...
|
2020-11-03 08:50:59 -05:00
|
|
|
def asc(col: ColumnOrName) -> Column: ...
|
|
|
|
def asc_nulls_first(col: ColumnOrName) -> Column: ...
|
|
|
|
def asc_nulls_last(col: ColumnOrName) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def ascii(col: ColumnOrName) -> Column: ...
|
|
|
|
def asin(col: ColumnOrName) -> Column: ...
|
2020-11-26 21:00:09 -05:00
|
|
|
def asinh(col: ColumnOrName) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def atan(col: ColumnOrName) -> Column: ...
|
2020-11-26 21:00:09 -05:00
|
|
|
def atanh(col: ColumnOrName) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
@overload
|
|
|
|
def atan2(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def atan2(col1: float, col2: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def atan2(col1: ColumnOrName, col2: float) -> Column: ...
|
|
|
|
def avg(col: ColumnOrName) -> Column: ...
|
|
|
|
def base64(col: ColumnOrName) -> Column: ...
|
|
|
|
def bitwiseNOT(col: ColumnOrName) -> Column: ...
|
|
|
|
def cbrt(col: ColumnOrName) -> Column: ...
|
|
|
|
def ceil(col: ColumnOrName) -> Column: ...
|
|
|
|
def col(col: str) -> Column: ...
|
|
|
|
def collect_list(col: ColumnOrName) -> Column: ...
|
|
|
|
def collect_set(col: ColumnOrName) -> Column: ...
|
|
|
|
def column(col: str) -> Column: ...
|
|
|
|
def cos(col: ColumnOrName) -> Column: ...
|
|
|
|
def cosh(col: ColumnOrName) -> Column: ...
|
|
|
|
def count(col: ColumnOrName) -> Column: ...
|
|
|
|
def cume_dist() -> Column: ...
|
|
|
|
def degrees(col: ColumnOrName) -> Column: ...
|
|
|
|
def dense_rank() -> Column: ...
|
2020-11-03 08:50:59 -05:00
|
|
|
def desc(col: ColumnOrName) -> Column: ...
|
|
|
|
def desc_nulls_first(col: ColumnOrName) -> Column: ...
|
|
|
|
def desc_nulls_last(col: ColumnOrName) -> Column: ...
|
2020-09-24 01:15:36 -04:00
|
|
|
def exp(col: ColumnOrName) -> Column: ...
|
|
|
|
def expm1(col: ColumnOrName) -> Column: ...
|
|
|
|
def floor(col: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def hypot(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def hypot(col1: float, col2: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def hypot(col1: ColumnOrName, col2: float) -> Column: ...
|
|
|
|
def kurtosis(col: ColumnOrName) -> Column: ...
|
|
|
|
def lit(col: Any) -> Column: ...
|
|
|
|
def log10(col: ColumnOrName) -> Column: ...
|
|
|
|
def log1p(col: ColumnOrName) -> Column: ...
|
|
|
|
def lower(col: ColumnOrName) -> Column: ...
|
|
|
|
def ltrim(col: ColumnOrName) -> Column: ...
|
|
|
|
def max(col: ColumnOrName) -> Column: ...
|
|
|
|
def mean(col: ColumnOrName) -> Column: ...
|
|
|
|
def min(col: ColumnOrName) -> Column: ...
|
|
|
|
def percent_rank() -> Column: ...
|
|
|
|
@overload
|
|
|
|
def pow(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def pow(col1: float, col2: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def pow(col1: ColumnOrName, col2: float) -> Column: ...
|
|
|
|
def radians(col: ColumnOrName) -> Column: ...
|
|
|
|
def rank() -> Column: ...
|
|
|
|
def rint(col: ColumnOrName) -> Column: ...
|
|
|
|
def row_number() -> Column: ...
|
|
|
|
def rtrim(col: ColumnOrName) -> Column: ...
|
|
|
|
def signum(col: ColumnOrName) -> Column: ...
|
|
|
|
def sin(col: ColumnOrName) -> Column: ...
|
|
|
|
def sinh(col: ColumnOrName) -> Column: ...
|
|
|
|
def skewness(col: ColumnOrName) -> Column: ...
|
|
|
|
def sqrt(col: ColumnOrName) -> Column: ...
|
|
|
|
def stddev(col: ColumnOrName) -> Column: ...
|
|
|
|
def stddev_pop(col: ColumnOrName) -> Column: ...
|
|
|
|
def stddev_samp(col: ColumnOrName) -> Column: ...
|
|
|
|
def sum(col: ColumnOrName) -> Column: ...
|
|
|
|
def sumDistinct(col: ColumnOrName) -> Column: ...
|
|
|
|
def tan(col: ColumnOrName) -> Column: ...
|
|
|
|
def tanh(col: ColumnOrName) -> Column: ...
|
|
|
|
def toDegrees(col: ColumnOrName) -> Column: ...
|
|
|
|
def toRadians(col: ColumnOrName) -> Column: ...
|
|
|
|
def trim(col: ColumnOrName) -> Column: ...
|
|
|
|
def unbase64(col: ColumnOrName) -> Column: ...
|
|
|
|
def upper(col: ColumnOrName) -> Column: ...
|
|
|
|
def var_pop(col: ColumnOrName) -> Column: ...
|
|
|
|
def var_samp(col: ColumnOrName) -> Column: ...
|
|
|
|
def variance(col: ColumnOrName) -> Column: ...
|
|
|
|
@overload
|
|
|
|
def udf(
|
|
|
|
f: Callable[..., Any], returnType: DataTypeOrString = ...
|
|
|
|
) -> Callable[..., Column]: ...
|
|
|
|
@overload
|
|
|
|
def udf(
|
|
|
|
f: DataTypeOrString = ...,
|
|
|
|
) -> Callable[[Callable[..., Any]], Callable[..., Column]]: ...
|