[SPARK-36127][PYTHON] Support comparison between a Categorical and a scalar
### What changes were proposed in this pull request?
Support comparison between a Categorical and a scalar.
There are 3 main changes:
- Modify `==` and `!=` from comparing **codes** of the Categorical to the scalar to comparing **actual values** of the Categorical to the scalar.
- Support `<`, `<=`, `>`, `>=` between a Categorical and a scalar.
- TypeError message fix.
### Why are the changes needed?
pandas supports comparison between a Categorical and a scalar, we should follow pandas' behaviors.
### Does this PR introduce _any_ user-facing change?
Yes.
Before:
```py
>>> import pyspark.pandas as ps
>>> import pandas as pd
>>> from pandas.api.types import CategoricalDtype
>>> pser = pd.Series(pd.Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True))
>>> psser = ps.from_pandas(pser)
>>> psser == 2
0 True
1 False
2 False
dtype: bool
>>> psser <= 1
Traceback (most recent call last):
...
NotImplementedError: <= can not be applied to categoricals.
```
After:
```py
>>> import pyspark.pandas as ps
>>> import pandas as pd
>>> from pandas.api.types import CategoricalDtype
>>> pser = pd.Series(pd.Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True))
>>> psser = ps.from_pandas(pser)
>>> psser == 2
0 False
1 True
2 False
dtype: bool
>>> psser <= 1
0 True
1 True
2 True
dtype: bool
```
### How was this patch tested?
Unit tests.
Closes #33373 from xinrong-databricks/categorical_eq.
Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
(cherry picked from commit 8dd43351d5
)
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
This commit is contained in:
parent
57794d3ec9
commit
48fadee158
|
@ -331,10 +331,10 @@ class DataTypeOps(object, metaclass=ABCMeta):
|
|||
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
raise TypeError("<= can not be applied to %s." % self.pretty_name)
|
||||
|
||||
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
raise TypeError("> can not be applied to %s." % self.pretty_name)
|
||||
|
||||
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
raise TypeError(">= can not be applied to %s." % self.pretty_name)
|
||||
|
||||
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
|
|
|
@ -16,10 +16,11 @@
|
|||
#
|
||||
|
||||
from itertools import chain
|
||||
from typing import Any, Union, cast
|
||||
from typing import cast, Any, Callable, Union
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.types import CategoricalDtype
|
||||
import numpy as np
|
||||
from pandas.api.types import is_list_like, CategoricalDtype
|
||||
|
||||
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
|
||||
from pyspark.pandas.base import column_op, IndexOpsMixin
|
||||
|
@ -70,28 +71,59 @@ class CategoricalOps(DataTypeOps):
|
|||
scol = map_scol.getItem(index_ops.spark.column)
|
||||
return index_ops._with_new_scol(scol).astype(dtype)
|
||||
|
||||
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
return _compare(left, right, Column.__eq__, is_equality_comparison=True)
|
||||
|
||||
def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
return _compare(left, right, Column.__ne__, is_equality_comparison=True)
|
||||
|
||||
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
_non_equality_comparison_input_check(left, right)
|
||||
return column_op(Column.__lt__)(left, right)
|
||||
return _compare(left, right, Column.__lt__)
|
||||
|
||||
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
_non_equality_comparison_input_check(left, right)
|
||||
return column_op(Column.__le__)(left, right)
|
||||
return _compare(left, right, Column.__le__)
|
||||
|
||||
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
_non_equality_comparison_input_check(left, right)
|
||||
return column_op(Column.__gt__)(left, right)
|
||||
return _compare(left, right, Column.__gt__)
|
||||
|
||||
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
_non_equality_comparison_input_check(left, right)
|
||||
return column_op(Column.__ge__)(left, right)
|
||||
return _compare(left, right, Column.__ge__)
|
||||
|
||||
|
||||
def _non_equality_comparison_input_check(left: IndexOpsLike, right: Any) -> None:
|
||||
if not cast(CategoricalDtype, left.dtype).ordered:
|
||||
raise TypeError("Unordered Categoricals can only compare equality or not.")
|
||||
def _compare(
|
||||
left: IndexOpsLike,
|
||||
right: Any,
|
||||
f: Callable[..., Column],
|
||||
*,
|
||||
is_equality_comparison: bool = False
|
||||
) -> SeriesOrIndex:
|
||||
"""
|
||||
Compare a Categorical operand `left` to `right` with the given Spark Column function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left: A Categorical operand
|
||||
right: The other operand to compare with
|
||||
f : The Spark Column function to apply
|
||||
is_equality_comparison: True if it is equality comparison, ie. == or !=. False by default.
|
||||
|
||||
Returns
|
||||
-------
|
||||
SeriesOrIndex
|
||||
"""
|
||||
if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, CategoricalDtype):
|
||||
if not is_equality_comparison:
|
||||
if not cast(CategoricalDtype, left.dtype).ordered:
|
||||
raise TypeError("Unordered Categoricals can only compare equality or not.")
|
||||
# Check if categoricals have the same dtype, same categories, and same ordered
|
||||
if hash(left.dtype) != hash(right.dtype):
|
||||
raise TypeError("Categoricals can only be compared if 'categories' are the same.")
|
||||
return column_op(f)(left, right)
|
||||
elif not is_list_like(right):
|
||||
categories = cast(CategoricalDtype, left.dtype).categories
|
||||
if right not in categories:
|
||||
raise TypeError("Cannot compare a Categorical with a scalar, which is not a category.")
|
||||
right_code = categories.get_loc(right)
|
||||
return column_op(f)(left, right_code)
|
||||
else:
|
||||
raise TypeError("Cannot compare a Categorical with the given type.")
|
||||
|
|
|
@ -355,10 +355,10 @@ class DecimalOps(FractionalOps):
|
|||
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
raise TypeError("<= can not be applied to %s." % self.pretty_name)
|
||||
|
||||
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
raise TypeError("> can not be applied to %s." % self.pretty_name)
|
||||
|
||||
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
||||
raise TypeError(">= can not be applied to %s." % self.pretty_name)
|
||||
|
||||
def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike:
|
||||
|
|
|
@ -28,42 +28,41 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
|||
|
||||
|
||||
class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
||||
@property
|
||||
def pdf(self):
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"this_numeric_cat": pd.Series([1, 2, 3], dtype="category"),
|
||||
"that_numeric_cat": pd.Series([3, 2, 1], dtype="category"),
|
||||
"this_ordered_numeric_cat": pd.Categorical(
|
||||
[1, 2, 3], categories=[3, 2, 1], ordered=True
|
||||
),
|
||||
"that_ordered_numeric_cat": pd.Categorical(
|
||||
[2, 3, 1], categories=[3, 2, 1], ordered=True
|
||||
),
|
||||
"this_string_cat": pd.Series(["x", "y", "z"], dtype="category"),
|
||||
"that_string_cat": pd.Series(["z", "y", "x"], dtype="category"),
|
||||
"this_ordered_string_cat": pd.Categorical(
|
||||
["x", "y", "z"], categories=["x", "z", "y"], ordered=True
|
||||
),
|
||||
"that_ordered_string_cat": pd.Categorical(
|
||||
["z", "y", "x"], categories=["x", "z", "y"], ordered=True
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
@property
|
||||
def psdf(self):
|
||||
return ps.from_pandas(self.pdf)
|
||||
|
||||
@property
|
||||
def pser(self):
|
||||
return pd.Series([1, "x", "y"], dtype="category")
|
||||
return pd.Series([1, 2, 3], dtype="category")
|
||||
|
||||
@property
|
||||
def psser(self):
|
||||
return ps.from_pandas(self.pser)
|
||||
|
||||
@property
|
||||
def other_pser(self):
|
||||
return pd.Series(["y", "x", 1], dtype="category")
|
||||
|
||||
@property
|
||||
def other_psser(self):
|
||||
return ps.from_pandas(self.other_pser)
|
||||
|
||||
@property
|
||||
def ordered_pser(self):
|
||||
return pd.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True))
|
||||
|
||||
@property
|
||||
def ordered_psser(self):
|
||||
return ps.from_pandas(self.ordered_pser)
|
||||
|
||||
@property
|
||||
def other_ordered_pser(self):
|
||||
return pd.Series([2, 1, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True))
|
||||
|
||||
@property
|
||||
def other_ordered_psser(self):
|
||||
return ps.from_pandas(self.other_ordered_pser)
|
||||
|
||||
@property
|
||||
def unordered_psser(self):
|
||||
return ps.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1]))
|
||||
|
||||
def test_add(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser + "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser + 1)
|
||||
|
@ -204,150 +203,301 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
|
||||
pser, psser = pdf["this_numeric_cat"], psdf["this_numeric_cat"]
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser == 1, ordered_psser == 1)
|
||||
self.assert_eq(pser == pser, psser == psser)
|
||||
self.assert_eq(ordered_pser == ordered_pser, ordered_psser == ordered_psser)
|
||||
|
||||
pser, psser = pdf["this_string_cat"], psdf["this_string_cat"]
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"],
|
||||
)
|
||||
self.assert_eq(pser == "x", psser == "x")
|
||||
self.assert_eq(pser == pser, psser == psser)
|
||||
self.assert_eq(ordered_pser == ordered_pser, ordered_psser == ordered_psser)
|
||||
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser == 4,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser == "a",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser == ps.Series([1, 2, 3]),
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser == [1, 2, 3],
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
pdf["this_numeric_cat"] == pdf["that_numeric_cat"],
|
||||
psdf["this_numeric_cat"] == psdf["that_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_string_cat"] == pdf["that_string_cat"],
|
||||
psdf["this_string_cat"] == psdf["that_string_cat"],
|
||||
)
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
|
||||
pser, psser = pdf["this_numeric_cat"], psdf["this_numeric_cat"]
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser != 1, ordered_psser != 1)
|
||||
self.assert_eq(pser != pser, psser != psser)
|
||||
self.assert_eq(ordered_pser != ordered_pser, ordered_psser != ordered_psser)
|
||||
|
||||
pser, psser = pdf["this_string_cat"], psdf["this_string_cat"]
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"],
|
||||
)
|
||||
self.assert_eq(pser != "x", psser != "x")
|
||||
self.assert_eq(pser != pser, psser != psser)
|
||||
self.assert_eq(ordered_pser != ordered_pser, ordered_psser != ordered_psser)
|
||||
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser != 4,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser != "a",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser != ps.Series([1, 2, 3]),
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser != [1, 2, 3],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_numeric_cat"] != pdf["that_numeric_cat"],
|
||||
psdf["this_numeric_cat"] != psdf["that_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_string_cat"] != pdf["that_string_cat"],
|
||||
psdf["this_string_cat"] != psdf["that_string_cat"],
|
||||
)
|
||||
|
||||
def test_lt(self):
|
||||
ordered_pser = self.ordered_pser
|
||||
ordered_psser = self.ordered_psser
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser < 1, ordered_psser < 1)
|
||||
self.assert_eq(ordered_pser < ordered_pser, ordered_psser < ordered_psser)
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
ordered_pser < self.other_ordered_pser, ordered_psser < self.other_ordered_psser
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.unordered_psser < ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Categoricals can only be compared if 'categories' are the same",
|
||||
lambda: ordered_psser < self.unordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser < ps.Series([1, 2, 3]),
|
||||
)
|
||||
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser < "x", ordered_psser < "x")
|
||||
self.assert_eq(ordered_pser < ordered_pser, ordered_psser < ordered_psser)
|
||||
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.psser < ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser < 4,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser < "a",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser < ps.Series([1, 2, 3]),
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser < [1, 2, 3],
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser < 1
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_numeric_cat"] < pdf["that_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"] < psdf["that_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_string_cat"] < pdf["that_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"] < psdf["that_ordered_string_cat"],
|
||||
)
|
||||
|
||||
def test_le(self):
|
||||
ordered_pser = self.ordered_pser
|
||||
ordered_psser = self.ordered_psser
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser <= 1, ordered_psser <= 1)
|
||||
self.assert_eq(ordered_pser <= ordered_pser, ordered_psser <= ordered_psser)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
ordered_pser <= self.other_ordered_pser, ordered_psser <= self.other_ordered_psser
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.unordered_psser <= ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Categoricals can only be compared if 'categories' are the same",
|
||||
lambda: ordered_psser <= self.unordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser <= ps.Series([1, 2, 3]),
|
||||
)
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser <= "x", ordered_psser <= "x")
|
||||
self.assert_eq(ordered_pser <= ordered_pser, ordered_psser <= ordered_psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.psser <= ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser <= 4,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser <= "a",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser <= ps.Series([1, 2, 3]),
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser <= [1, 2, 3],
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser <= 1,
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_numeric_cat"] <= pdf["that_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"] <= psdf["that_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_string_cat"] <= pdf["that_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"] <= psdf["that_ordered_string_cat"],
|
||||
)
|
||||
|
||||
def test_gt(self):
|
||||
ordered_pser = self.ordered_pser
|
||||
ordered_psser = self.ordered_psser
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser > 1, ordered_psser > 1)
|
||||
self.assert_eq(ordered_pser > ordered_pser, ordered_psser > ordered_psser)
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
ordered_pser > self.other_ordered_pser, ordered_psser > self.other_ordered_psser
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.unordered_psser > ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Categoricals can only be compared if 'categories' are the same",
|
||||
lambda: ordered_psser > self.unordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser > ps.Series([1, 2, 3]),
|
||||
)
|
||||
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser > "x", ordered_psser > "x")
|
||||
self.assert_eq(ordered_pser > ordered_pser, ordered_psser > ordered_psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.psser > ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser > 4,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser > "a",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser > ps.Series([1, 2, 3]),
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser > [1, 2, 3],
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser > 1
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_numeric_cat"] > pdf["that_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"] > psdf["that_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_string_cat"] > pdf["that_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"] > psdf["that_ordered_string_cat"],
|
||||
)
|
||||
|
||||
def test_ge(self):
|
||||
ordered_pser = self.ordered_pser
|
||||
ordered_psser = self.ordered_psser
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser >= 1, ordered_psser >= 1)
|
||||
self.assert_eq(ordered_pser >= ordered_pser, ordered_psser >= ordered_psser)
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
ordered_pser >= self.other_ordered_pser, ordered_psser >= self.other_ordered_psser
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.unordered_psser >= ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Categoricals can only be compared if 'categories' are the same",
|
||||
lambda: ordered_psser >= self.unordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser >= ps.Series([1, 2, 3]),
|
||||
)
|
||||
|
||||
ordered_pser, ordered_psser = (
|
||||
pdf["this_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"],
|
||||
)
|
||||
self.assert_eq(ordered_pser >= "x", ordered_psser >= "x")
|
||||
self.assert_eq(ordered_pser >= ordered_pser, ordered_psser >= ordered_psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Unordered Categoricals can only compare equality or not",
|
||||
lambda: self.psser >= ordered_psser,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser >= 4,
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with a scalar, which is not a category",
|
||||
lambda: ordered_psser >= "a",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser >= ps.Series([1, 2, 3]),
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser >= [1, 2, 3],
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
TypeError,
|
||||
"Cannot compare a Categorical with the given type",
|
||||
lambda: ordered_psser >= 1,
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_numeric_cat"] >= pdf["that_ordered_numeric_cat"],
|
||||
psdf["this_ordered_numeric_cat"] >= psdf["that_ordered_numeric_cat"],
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_ordered_string_cat"] >= pdf["that_ordered_string_cat"],
|
||||
psdf["this_ordered_string_cat"] >= psdf["that_ordered_string_cat"],
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -347,7 +347,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for pser, psser in self.numeric_pser_psser_pairs:
|
||||
if isinstance(psser.spark.data_type, DecimalType):
|
||||
self.assertRaises(TypeError, lambda: psser < psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "< can not be applied to", lambda: psser < psser
|
||||
)
|
||||
else:
|
||||
self.assert_eq(pser < pser, (psser < psser).sort_index())
|
||||
|
||||
|
@ -355,7 +357,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for pser, psser in self.numeric_pser_psser_pairs:
|
||||
if isinstance(psser.spark.data_type, DecimalType):
|
||||
self.assertRaises(TypeError, lambda: psser <= psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "<= can not be applied to", lambda: psser <= psser
|
||||
)
|
||||
else:
|
||||
self.assert_eq(pser <= pser, (psser <= psser).sort_index())
|
||||
|
||||
|
@ -363,7 +367,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for pser, psser in self.numeric_pser_psser_pairs:
|
||||
if isinstance(psser.spark.data_type, DecimalType):
|
||||
self.assertRaises(TypeError, lambda: psser > psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "> can not be applied to", lambda: psser > psser
|
||||
)
|
||||
else:
|
||||
self.assert_eq(pser > pser, (psser > psser).sort_index())
|
||||
|
||||
|
@ -371,7 +377,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for pser, psser in self.numeric_pser_psser_pairs:
|
||||
if isinstance(psser.spark.data_type, DecimalType):
|
||||
self.assertRaises(TypeError, lambda: psser >= psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, ">= can not be applied to", lambda: psser >= psser
|
||||
)
|
||||
else:
|
||||
self.assert_eq(pser >= pser, (psser >= psser).sort_index())
|
||||
|
||||
|
|
|
@ -149,16 +149,24 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
|
||||
def test_lt(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser < self.psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "< can not be applied to", lambda: self.psser < self.psser
|
||||
)
|
||||
|
||||
def test_le(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser <= self.psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "<= can not be applied to", lambda: self.psser <= self.psser
|
||||
)
|
||||
|
||||
def test_gt(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser > self.psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, "> can not be applied to", lambda: self.psser > self.psser
|
||||
)
|
||||
|
||||
def test_ge(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser >= self.psser)
|
||||
self.assertRaisesRegex(
|
||||
TypeError, ">= can not be applied to", lambda: self.psser >= self.psser
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in a new issue