[SPARK-36035][PYTHON] Adjust test_astype, test_neg for old pandas versions

### What changes were proposed in this pull request?
Adjust `test_astype`, `test_neg`  for old pandas versions.

### Why are the changes needed?
There are issues in old pandas versions that fail tests in pandas API on Spark. We ought to adjust `test_astype` and `test_neg` for old pandas versions.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Unit tests. Please refer to https://github.com/apache/spark/pull/33272 for test results with pandas 1.0.1.

Closes #33250 from xinrong-databricks/SPARK-36035.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 698c4ec16b)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
Xinrong Meng 2021-07-09 17:24:20 +09:00 committed by Hyukjin Kwon
parent 3c825a18b6
commit 862178b2a0
3 changed files with 23 additions and 2 deletions

View file

@ -644,6 +644,10 @@ class BooleanExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
if dtype in self.fractional_extension_dtypes:
# A pandas boolean extension series cannot be casted to fractional extension dtypes
self.assert_eq([1.0, 0.0, np.nan], self.psser.astype(dtype).tolist())
elif dtype in self.string_extension_dtype:
if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
# Limit pandas version due to https://github.com/pandas-dev/pandas/issues/31204
self.check_extension(pser.astype(dtype), psser.astype(dtype))
else:
self.check_extension(pser.astype(dtype), psser.astype(dtype))

View file

@ -402,11 +402,24 @@ class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
def test_astype(self):
for pser, psser in self.intergral_extension_pser_psser_pairs:
for dtype in self.extension_dtypes:
self.check_extension(pser.astype(dtype), psser.astype(dtype))
if dtype in self.string_extension_dtype:
if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
# Limit pandas version due to
# https://github.com/pandas-dev/pandas/issues/31204
self.check_extension(pser.astype(dtype), psser.astype(dtype))
else:
self.check_extension(pser.astype(dtype), psser.astype(dtype))
def test_neg(self):
for pser, psser in self.intergral_extension_pser_psser_pairs:
self.check_extension(-pser, -psser)
if LooseVersion(pd.__version__) < LooseVersion("1.1.3"):
# pandas < 1.1.0: object dtype is returned after negation
# pandas 1.1.1 and 1.1.2:
# a TypeError "bad operand type for unary -: 'IntegerArray'" is raised
# Please refer to https://github.com/pandas-dev/pandas/issues/36063.
self.check_extension(pd.Series([-1, -2, -3, None], dtype=pser.dtype), -psser)
else:
self.check_extension(-pser, -psser)
def test_abs(self):
for pser, psser in self.intergral_extension_pser_psser_pairs:

View file

@ -119,6 +119,10 @@ class TestCasesUtils(object):
def pser_psser_pairs(self):
return zip(self.psers, self.pssers)
@property
def string_extension_dtype(self):
return ["string", StringDtype()] if extension_object_dtypes_available else []
@property
def object_extension_dtypes(self):
return (