[SPARK-35810][PYTHON] Deprecate ps.broadcast API
### What changes were proposed in this pull request?
The `broadcast` functions in `pyspark.pandas` is duplicated to `DataFrame.spark.hint` with `"broadcast"`.
```python
# The below 2 lines are the same
df.spark.hint("broadcast")
ps.broadcast(df)
```
So, we should remove `broadcast` in the future, and show deprecation warning for now.
### Why are the changes needed?
For deduplication of functions
### Does this PR introduce _any_ user-facing change?
They see the deprecation warning when using `broadcast` in `pyspark.pandas`.
```python
>>> ps.broadcast(df)
FutureWarning: `broadcast` has been deprecated and will be removed in a future version. use `DataFrame.spark.hint` with 'broadcast' for `name` parameter instead.
warnings.warn(
```
### How was this patch tested?
Manually check the warning message and see the build passed.
Closes #33379 from itholic/SPARK-35810.
Lead-authored-by: itholic <haejoon.lee@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Co-authored-by: Haejoon Lee <44108233+itholic@users.noreply.github.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 67e6120a85
)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
parent
d5cec45c0b
commit
80a9644372
|
@ -860,6 +860,11 @@ class Frame(object, metaclass=ABCMeta):
|
|||
)
|
||||
|
||||
if num_files is not None:
|
||||
warnings.warn(
|
||||
"`num_files` has been deprecated and might be removed in a future version. "
|
||||
"Use `DataFrame.spark.repartition` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
sdf = sdf.repartition(num_files)
|
||||
|
||||
builder = sdf.write.mode(mode)
|
||||
|
@ -998,6 +1003,11 @@ class Frame(object, metaclass=ABCMeta):
|
|||
sdf = psdf.to_spark(index_col=index_col) # type: ignore
|
||||
|
||||
if num_files is not None:
|
||||
warnings.warn(
|
||||
"`num_files` has been deprecated and might be removed in a future version. "
|
||||
"Use `DataFrame.spark.repartition` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
sdf = sdf.repartition(num_files)
|
||||
|
||||
builder = sdf.write.mode(mode)
|
||||
|
|
|
@ -39,6 +39,7 @@ from distutils.version import LooseVersion
|
|||
from functools import reduce
|
||||
from io import BytesIO
|
||||
import json
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -2822,6 +2823,8 @@ def broadcast(obj: DataFrame) -> DataFrame:
|
|||
"""
|
||||
Marks a DataFrame as small enough for use in broadcast joins.
|
||||
|
||||
.. deprecated:: 3.2.0
|
||||
Use :func:`DataFrame.spark.hint` instead.
|
||||
Parameters
|
||||
----------
|
||||
obj : DataFrame
|
||||
|
@ -2852,6 +2855,11 @@ def broadcast(obj: DataFrame) -> DataFrame:
|
|||
...BroadcastHashJoin...
|
||||
...
|
||||
"""
|
||||
warnings.warn(
|
||||
"`broadcast` has been deprecated and might be removed in a future version. "
|
||||
"Use `DataFrame.spark.hint` with 'broadcast' for `name` parameter instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
if not isinstance(obj, DataFrame):
|
||||
raise TypeError("Invalid type : expected DataFrame got {}".format(type(obj).__name__))
|
||||
return DataFrame(
|
||||
|
|
Loading…
Reference in a new issue