[SPARK-31963][PYSPARK][SQL] Support both pandas 0.23 and 1.0 in serializers.py
### What changes were proposed in this pull request? This PR aims to support both pandas 0.23 and 1.0. ### Why are the changes needed? ``` $ pip install pandas==0.23.2 $ python -c "import pandas.CategoricalDtype" Traceback (most recent call last): File "<string>", line 1, in <module> ModuleNotFoundError: No module named 'pandas.CategoricalDtype' $ python -c "from pandas.api.types import CategoricalDtype" ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the Jenkins. ``` $ pip freeze | grep pandas pandas==0.23.2 $ python/run-tests.py --python-executables python --modules pyspark-sql ... Tests passed in 359 seconds ``` Closes #28789 from williamhyun/williamhyun-patch-2. Authored-by: William Hyun <williamhyun3@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
c400519322
commit
2ab82fae57
|
@ -143,6 +143,10 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
|
|||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal
|
||||
try:
|
||||
from pandas import CategoricalDtype
|
||||
except ImportError:
|
||||
from pandas.api.types import CategoricalDtype
|
||||
# Make input conform to [(series1, type1), (series2, type2), ...]
|
||||
if not isinstance(series, (list, tuple)) or \
|
||||
(len(series) == 2 and isinstance(series[1], pa.DataType)):
|
||||
|
@ -154,7 +158,7 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
|
|||
# Ensure timestamp series are in expected form for Spark internal representation
|
||||
if t is not None and pa.types.is_timestamp(t):
|
||||
s = _check_series_convert_timestamps_internal(s, self._timezone)
|
||||
elif type(s.dtype) == pd.CategoricalDtype:
|
||||
elif type(s.dtype) == CategoricalDtype:
|
||||
# Note: This can be removed once minimum pyarrow version is >= 0.16.1
|
||||
s = s.astype(s.dtypes.categories.dtype)
|
||||
try:
|
||||
|
|
Loading…
Reference in a new issue