[SPARK-36127][PYTHON] Support comparison between a Categorical and a scalar

### What changes were proposed in this pull request?
Support comparison between a Categorical and a scalar.
There are 3 main changes:
- Modify `==` and `!=` from comparing **codes** of the Categorical to the scalar to comparing **actual values** of the Categorical to the scalar.
- Support `<`, `<=`, `>`, `>=` between a Categorical and a scalar.
- TypeError message fix.

### Why are the changes needed?
pandas supports comparison between a Categorical and a scalar, we should follow pandas' behaviors.

### Does this PR introduce _any_ user-facing change?
Yes.

Before:
```py
>>> import pyspark.pandas as ps
>>> import pandas as pd
>>> from pandas.api.types import CategoricalDtype
>>> pser = pd.Series(pd.Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True))
>>> psser = ps.from_pandas(pser)
>>> psser == 2
0     True
1    False
2    False
dtype: bool
>>> psser <= 1
Traceback (most recent call last):
...
NotImplementedError: <= can not be applied to categoricals.
```

After:
```py
>>> import pyspark.pandas as ps
>>> import pandas as pd
>>> from pandas.api.types import CategoricalDtype
>>> pser = pd.Series(pd.Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True))
>>> psser = ps.from_pandas(pser)
>>> psser == 2
0    False
1     True
2    False
dtype: bool
>>> psser <= 1
0    True
1    True
2    True
dtype: bool

```

### How was this patch tested?
Unit tests.

Closes #33373 from xinrong-databricks/categorical_eq.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
(cherry picked from commit 8dd43351d5)
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
This commit is contained in:
Xinrong Meng 2021-07-19 15:06:44 -07:00 committed by Takuya UESHIN
parent 57794d3ec9
commit 48fadee158
6 changed files with 358 additions and 160 deletions

View file

@ -331,10 +331,10 @@ class DataTypeOps(object, metaclass=ABCMeta):
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError("<= can not be applied to %s." % self.pretty_name)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError("> can not be applied to %s." % self.pretty_name)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError(">= can not be applied to %s." % self.pretty_name)
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:

View file

@ -16,10 +16,11 @@
#
from itertools import chain
from typing import Any, Union, cast
from typing import cast, Any, Callable, Union
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
from pandas.api.types import is_list_like, CategoricalDtype
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
from pyspark.pandas.base import column_op, IndexOpsMixin
@ -70,28 +71,59 @@ class CategoricalOps(DataTypeOps):
scol = map_scol.getItem(index_ops.spark.column)
return index_ops._with_new_scol(scol).astype(dtype)
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
return _compare(left, right, Column.__eq__, is_equality_comparison=True)
def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
return _compare(left, right, Column.__ne__, is_equality_comparison=True)
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
_non_equality_comparison_input_check(left, right)
return column_op(Column.__lt__)(left, right)
return _compare(left, right, Column.__lt__)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
_non_equality_comparison_input_check(left, right)
return column_op(Column.__le__)(left, right)
return _compare(left, right, Column.__le__)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
_non_equality_comparison_input_check(left, right)
return column_op(Column.__gt__)(left, right)
return _compare(left, right, Column.__gt__)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
_non_equality_comparison_input_check(left, right)
return column_op(Column.__ge__)(left, right)
return _compare(left, right, Column.__ge__)
def _non_equality_comparison_input_check(left: IndexOpsLike, right: Any) -> None:
if not cast(CategoricalDtype, left.dtype).ordered:
raise TypeError("Unordered Categoricals can only compare equality or not.")
def _compare(
left: IndexOpsLike,
right: Any,
f: Callable[..., Column],
*,
is_equality_comparison: bool = False
) -> SeriesOrIndex:
"""
Compare a Categorical operand `left` to `right` with the given Spark Column function.
Parameters
----------
left: A Categorical operand
right: The other operand to compare with
f : The Spark Column function to apply
is_equality_comparison: True if it is equality comparison, ie. == or !=. False by default.
Returns
-------
SeriesOrIndex
"""
if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, CategoricalDtype):
if not is_equality_comparison:
if not cast(CategoricalDtype, left.dtype).ordered:
raise TypeError("Unordered Categoricals can only compare equality or not.")
# Check if categoricals have the same dtype, same categories, and same ordered
if hash(left.dtype) != hash(right.dtype):
raise TypeError("Categoricals can only be compared if 'categories' are the same.")
return column_op(f)(left, right)
elif not is_list_like(right):
categories = cast(CategoricalDtype, left.dtype).categories
if right not in categories:
raise TypeError("Cannot compare a Categorical with a scalar, which is not a category.")
right_code = categories.get_loc(right)
return column_op(f)(left, right_code)
else:
raise TypeError("Cannot compare a Categorical with the given type.")

View file

@ -355,10 +355,10 @@ class DecimalOps(FractionalOps):
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError("<= can not be applied to %s." % self.pretty_name)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError("> can not be applied to %s." % self.pretty_name)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError(">= can not be applied to %s." % self.pretty_name)
def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike:

View file

@ -28,42 +28,41 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase
class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@property
def pdf(self):
return pd.DataFrame(
{
"this_numeric_cat": pd.Series([1, 2, 3], dtype="category"),
"that_numeric_cat": pd.Series([3, 2, 1], dtype="category"),
"this_ordered_numeric_cat": pd.Categorical(
[1, 2, 3], categories=[3, 2, 1], ordered=True
),
"that_ordered_numeric_cat": pd.Categorical(
[2, 3, 1], categories=[3, 2, 1], ordered=True
),
"this_string_cat": pd.Series(["x", "y", "z"], dtype="category"),
"that_string_cat": pd.Series(["z", "y", "x"], dtype="category"),
"this_ordered_string_cat": pd.Categorical(
["x", "y", "z"], categories=["x", "z", "y"], ordered=True
),
"that_ordered_string_cat": pd.Categorical(
["z", "y", "x"], categories=["x", "z", "y"], ordered=True
),
}
)
@property
def psdf(self):
return ps.from_pandas(self.pdf)
@property
def pser(self):
return pd.Series([1, "x", "y"], dtype="category")
return pd.Series([1, 2, 3], dtype="category")
@property
def psser(self):
return ps.from_pandas(self.pser)
@property
def other_pser(self):
return pd.Series(["y", "x", 1], dtype="category")
@property
def other_psser(self):
return ps.from_pandas(self.other_pser)
@property
def ordered_pser(self):
return pd.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True))
@property
def ordered_psser(self):
return ps.from_pandas(self.ordered_pser)
@property
def other_ordered_pser(self):
return pd.Series([2, 1, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True))
@property
def other_ordered_psser(self):
return ps.from_pandas(self.other_ordered_pser)
@property
def unordered_psser(self):
return ps.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1]))
def test_add(self):
self.assertRaises(TypeError, lambda: self.psser + "x")
self.assertRaises(TypeError, lambda: self.psser + 1)
@ -204,150 +203,301 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
)
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
pdf, psdf = self.pdf, self.psdf
pser, psser = pdf["this_numeric_cat"], psdf["this_numeric_cat"]
ordered_pser, ordered_psser = (
pdf["this_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"],
)
self.assert_eq(ordered_pser == 1, ordered_psser == 1)
self.assert_eq(pser == pser, psser == psser)
self.assert_eq(ordered_pser == ordered_pser, ordered_psser == ordered_psser)
pser, psser = pdf["this_string_cat"], psdf["this_string_cat"]
ordered_pser, ordered_psser = (
pdf["this_ordered_string_cat"],
psdf["this_ordered_string_cat"],
)
self.assert_eq(pser == "x", psser == "x")
self.assert_eq(pser == pser, psser == psser)
self.assert_eq(ordered_pser == ordered_pser, ordered_psser == ordered_psser)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser == 4,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser == "a",
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser == ps.Series([1, 2, 3]),
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser == [1, 2, 3],
)
self.assert_eq(
pdf["this_numeric_cat"] == pdf["that_numeric_cat"],
psdf["this_numeric_cat"] == psdf["that_numeric_cat"],
)
self.assert_eq(
pdf["this_string_cat"] == pdf["that_string_cat"],
psdf["this_string_cat"] == psdf["that_string_cat"],
)
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
)
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
pdf, psdf = self.pdf, self.psdf
pser, psser = pdf["this_numeric_cat"], psdf["this_numeric_cat"]
ordered_pser, ordered_psser = (
pdf["this_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"],
)
self.assert_eq(ordered_pser != 1, ordered_psser != 1)
self.assert_eq(pser != pser, psser != psser)
self.assert_eq(ordered_pser != ordered_pser, ordered_psser != ordered_psser)
pser, psser = pdf["this_string_cat"], psdf["this_string_cat"]
ordered_pser, ordered_psser = (
pdf["this_ordered_string_cat"],
psdf["this_ordered_string_cat"],
)
self.assert_eq(pser != "x", psser != "x")
self.assert_eq(pser != pser, psser != psser)
self.assert_eq(ordered_pser != ordered_pser, ordered_psser != ordered_psser)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser != 4,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser != "a",
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser != ps.Series([1, 2, 3]),
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser != [1, 2, 3],
)
self.assert_eq(
pdf["this_numeric_cat"] != pdf["that_numeric_cat"],
psdf["this_numeric_cat"] != psdf["that_numeric_cat"],
)
self.assert_eq(
pdf["this_string_cat"] != pdf["that_string_cat"],
psdf["this_string_cat"] != psdf["that_string_cat"],
)
def test_lt(self):
ordered_pser = self.ordered_pser
ordered_psser = self.ordered_psser
pdf, psdf = self.pdf, self.psdf
ordered_pser, ordered_psser = (
pdf["this_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"],
)
self.assert_eq(ordered_pser < 1, ordered_psser < 1)
self.assert_eq(ordered_pser < ordered_pser, ordered_psser < ordered_psser)
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
ordered_pser < self.other_ordered_pser, ordered_psser < self.other_ordered_psser
)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.unordered_psser < ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Categoricals can only be compared if 'categories' are the same",
lambda: ordered_psser < self.unordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser < ps.Series([1, 2, 3]),
)
ordered_pser, ordered_psser = (
pdf["this_ordered_string_cat"],
psdf["this_ordered_string_cat"],
)
self.assert_eq(ordered_pser < "x", ordered_psser < "x")
self.assert_eq(ordered_pser < ordered_pser, ordered_psser < ordered_psser)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.psser < ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser < 4,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser < "a",
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser < ps.Series([1, 2, 3]),
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser < [1, 2, 3],
)
self.assertRaisesRegex(
TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser < 1
self.assert_eq(
pdf["this_ordered_numeric_cat"] < pdf["that_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"] < psdf["that_ordered_numeric_cat"],
)
self.assert_eq(
pdf["this_ordered_string_cat"] < pdf["that_ordered_string_cat"],
psdf["this_ordered_string_cat"] < psdf["that_ordered_string_cat"],
)
def test_le(self):
ordered_pser = self.ordered_pser
ordered_psser = self.ordered_psser
pdf, psdf = self.pdf, self.psdf
ordered_pser, ordered_psser = (
pdf["this_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"],
)
self.assert_eq(ordered_pser <= 1, ordered_psser <= 1)
self.assert_eq(ordered_pser <= ordered_pser, ordered_psser <= ordered_psser)
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
ordered_pser <= self.other_ordered_pser, ordered_psser <= self.other_ordered_psser
)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.unordered_psser <= ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Categoricals can only be compared if 'categories' are the same",
lambda: ordered_psser <= self.unordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser <= ps.Series([1, 2, 3]),
)
ordered_pser, ordered_psser = (
pdf["this_ordered_string_cat"],
psdf["this_ordered_string_cat"],
)
self.assert_eq(ordered_pser <= "x", ordered_psser <= "x")
self.assert_eq(ordered_pser <= ordered_pser, ordered_psser <= ordered_psser)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.psser <= ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser <= 4,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser <= "a",
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser <= ps.Series([1, 2, 3]),
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser <= [1, 2, 3],
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser <= 1,
self.assert_eq(
pdf["this_ordered_numeric_cat"] <= pdf["that_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"] <= psdf["that_ordered_numeric_cat"],
)
self.assert_eq(
pdf["this_ordered_string_cat"] <= pdf["that_ordered_string_cat"],
psdf["this_ordered_string_cat"] <= psdf["that_ordered_string_cat"],
)
def test_gt(self):
ordered_pser = self.ordered_pser
ordered_psser = self.ordered_psser
pdf, psdf = self.pdf, self.psdf
ordered_pser, ordered_psser = (
pdf["this_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"],
)
self.assert_eq(ordered_pser > 1, ordered_psser > 1)
self.assert_eq(ordered_pser > ordered_pser, ordered_psser > ordered_psser)
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
ordered_pser > self.other_ordered_pser, ordered_psser > self.other_ordered_psser
)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.unordered_psser > ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Categoricals can only be compared if 'categories' are the same",
lambda: ordered_psser > self.unordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser > ps.Series([1, 2, 3]),
)
ordered_pser, ordered_psser = (
pdf["this_ordered_string_cat"],
psdf["this_ordered_string_cat"],
)
self.assert_eq(ordered_pser > "x", ordered_psser > "x")
self.assert_eq(ordered_pser > ordered_pser, ordered_psser > ordered_psser)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.psser > ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser > 4,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser > "a",
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser > ps.Series([1, 2, 3]),
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser > [1, 2, 3],
)
self.assertRaisesRegex(
TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser > 1
self.assert_eq(
pdf["this_ordered_numeric_cat"] > pdf["that_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"] > psdf["that_ordered_numeric_cat"],
)
self.assert_eq(
pdf["this_ordered_string_cat"] > pdf["that_ordered_string_cat"],
psdf["this_ordered_string_cat"] > psdf["that_ordered_string_cat"],
)
def test_ge(self):
ordered_pser = self.ordered_pser
ordered_psser = self.ordered_psser
pdf, psdf = self.pdf, self.psdf
ordered_pser, ordered_psser = (
pdf["this_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"],
)
self.assert_eq(ordered_pser >= 1, ordered_psser >= 1)
self.assert_eq(ordered_pser >= ordered_pser, ordered_psser >= ordered_psser)
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
ordered_pser >= self.other_ordered_pser, ordered_psser >= self.other_ordered_psser
)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.unordered_psser >= ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Categoricals can only be compared if 'categories' are the same",
lambda: ordered_psser >= self.unordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser >= ps.Series([1, 2, 3]),
)
ordered_pser, ordered_psser = (
pdf["this_ordered_string_cat"],
psdf["this_ordered_string_cat"],
)
self.assert_eq(ordered_pser >= "x", ordered_psser >= "x")
self.assert_eq(ordered_pser >= ordered_pser, ordered_psser >= ordered_psser)
self.assertRaisesRegex(
TypeError,
"Unordered Categoricals can only compare equality or not",
lambda: self.psser >= ordered_psser,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser >= 4,
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with a scalar, which is not a category",
lambda: ordered_psser >= "a",
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser >= ps.Series([1, 2, 3]),
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser >= [1, 2, 3],
)
self.assertRaisesRegex(
TypeError,
"Cannot compare a Categorical with the given type",
lambda: ordered_psser >= 1,
self.assert_eq(
pdf["this_ordered_numeric_cat"] >= pdf["that_ordered_numeric_cat"],
psdf["this_ordered_numeric_cat"] >= psdf["that_ordered_numeric_cat"],
)
self.assert_eq(
pdf["this_ordered_string_cat"] >= pdf["that_ordered_string_cat"],
psdf["this_ordered_string_cat"] >= psdf["that_ordered_string_cat"],
)

View file

@ -347,7 +347,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
with option_context("compute.ops_on_diff_frames", True):
for pser, psser in self.numeric_pser_psser_pairs:
if isinstance(psser.spark.data_type, DecimalType):
self.assertRaises(TypeError, lambda: psser < psser)
self.assertRaisesRegex(
TypeError, "< can not be applied to", lambda: psser < psser
)
else:
self.assert_eq(pser < pser, (psser < psser).sort_index())
@ -355,7 +357,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
with option_context("compute.ops_on_diff_frames", True):
for pser, psser in self.numeric_pser_psser_pairs:
if isinstance(psser.spark.data_type, DecimalType):
self.assertRaises(TypeError, lambda: psser <= psser)
self.assertRaisesRegex(
TypeError, "<= can not be applied to", lambda: psser <= psser
)
else:
self.assert_eq(pser <= pser, (psser <= psser).sort_index())
@ -363,7 +367,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
with option_context("compute.ops_on_diff_frames", True):
for pser, psser in self.numeric_pser_psser_pairs:
if isinstance(psser.spark.data_type, DecimalType):
self.assertRaises(TypeError, lambda: psser > psser)
self.assertRaisesRegex(
TypeError, "> can not be applied to", lambda: psser > psser
)
else:
self.assert_eq(pser > pser, (psser > psser).sort_index())
@ -371,7 +377,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
with option_context("compute.ops_on_diff_frames", True):
for pser, psser in self.numeric_pser_psser_pairs:
if isinstance(psser.spark.data_type, DecimalType):
self.assertRaises(TypeError, lambda: psser >= psser)
self.assertRaisesRegex(
TypeError, ">= can not be applied to", lambda: psser >= psser
)
else:
self.assert_eq(pser >= pser, (psser >= psser).sort_index())

View file

@ -149,16 +149,24 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
def test_lt(self):
self.assertRaises(TypeError, lambda: self.psser < self.psser)
self.assertRaisesRegex(
TypeError, "< can not be applied to", lambda: self.psser < self.psser
)
def test_le(self):
self.assertRaises(TypeError, lambda: self.psser <= self.psser)
self.assertRaisesRegex(
TypeError, "<= can not be applied to", lambda: self.psser <= self.psser
)
def test_gt(self):
self.assertRaises(TypeError, lambda: self.psser > self.psser)
self.assertRaisesRegex(
TypeError, "> can not be applied to", lambda: self.psser > self.psser
)
def test_ge(self):
self.assertRaises(TypeError, lambda: self.psser >= self.psser)
self.assertRaisesRegex(
TypeError, ">= can not be applied to", lambda: self.psser >= self.psser
)
if __name__ == "__main__":