diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py index c79408bf19..d0e1317c5c 100644 --- a/python/pyspark/pandas/data_type_ops/base.py +++ b/python/pyspark/pandas/data_type_ops/base.py @@ -331,10 +331,10 @@ class DataTypeOps(object, metaclass=ABCMeta): def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: raise TypeError("<= can not be applied to %s." % self.pretty_name) - def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: + def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: raise TypeError("> can not be applied to %s." % self.pretty_name) - def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: + def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: raise TypeError(">= can not be applied to %s." % self.pretty_name) def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py b/python/pyspark/pandas/data_type_ops/categorical_ops.py index b0f32cbe2f..1e93492c1f 100644 --- a/python/pyspark/pandas/data_type_ops/categorical_ops.py +++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py @@ -16,10 +16,11 @@ # from itertools import chain -from typing import Any, Union, cast +from typing import cast, Any, Callable, Union import pandas as pd -from pandas.api.types import CategoricalDtype +import numpy as np +from pandas.api.types import is_list_like, CategoricalDtype from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex from pyspark.pandas.base import column_op, IndexOpsMixin @@ -70,28 +71,59 @@ class CategoricalOps(DataTypeOps): scol = map_scol.getItem(index_ops.spark.column) return index_ops._with_new_scol(scol).astype(dtype) + def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: + return _compare(left, right, Column.__eq__, is_equality_comparison=True) + + def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: + return _compare(left, right, Column.__ne__, is_equality_comparison=True) + def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - _non_equality_comparison_input_check(left, right) - return column_op(Column.__lt__)(left, right) + return _compare(left, right, Column.__lt__) def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - _non_equality_comparison_input_check(left, right) - return column_op(Column.__le__)(left, right) + return _compare(left, right, Column.__le__) def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - _non_equality_comparison_input_check(left, right) - return column_op(Column.__gt__)(left, right) + return _compare(left, right, Column.__gt__) def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - _non_equality_comparison_input_check(left, right) - return column_op(Column.__ge__)(left, right) + return _compare(left, right, Column.__ge__) -def _non_equality_comparison_input_check(left: IndexOpsLike, right: Any) -> None: - if not cast(CategoricalDtype, left.dtype).ordered: - raise TypeError("Unordered Categoricals can only compare equality or not.") +def _compare( + left: IndexOpsLike, + right: Any, + f: Callable[..., Column], + *, + is_equality_comparison: bool = False +) -> SeriesOrIndex: + """ + Compare a Categorical operand `left` to `right` with the given Spark Column function. + + Parameters + ---------- + left: A Categorical operand + right: The other operand to compare with + f : The Spark Column function to apply + is_equality_comparison: True if it is equality comparison, ie. == or !=. False by default. + + Returns + ------- + SeriesOrIndex + """ if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, CategoricalDtype): + if not is_equality_comparison: + if not cast(CategoricalDtype, left.dtype).ordered: + raise TypeError("Unordered Categoricals can only compare equality or not.") + # Check if categoricals have the same dtype, same categories, and same ordered if hash(left.dtype) != hash(right.dtype): raise TypeError("Categoricals can only be compared if 'categories' are the same.") + return column_op(f)(left, right) + elif not is_list_like(right): + categories = cast(CategoricalDtype, left.dtype).categories + if right not in categories: + raise TypeError("Cannot compare a Categorical with a scalar, which is not a category.") + right_code = categories.get_loc(right) + return column_op(f)(left, right_code) else: raise TypeError("Cannot compare a Categorical with the given type.") diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py index cd94faa5d9..8de46cdc33 100644 --- a/python/pyspark/pandas/data_type_ops/num_ops.py +++ b/python/pyspark/pandas/data_type_ops/num_ops.py @@ -355,10 +355,10 @@ class DecimalOps(FractionalOps): def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: raise TypeError("<= can not be applied to %s." % self.pretty_name) - def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: + def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: raise TypeError("> can not be applied to %s." % self.pretty_name) - def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: + def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: raise TypeError(">= can not be applied to %s." % self.pretty_name) def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike: diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py index 840722c43a..c9d150c8b3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py @@ -28,42 +28,41 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils): + @property + def pdf(self): + return pd.DataFrame( + { + "this_numeric_cat": pd.Series([1, 2, 3], dtype="category"), + "that_numeric_cat": pd.Series([3, 2, 1], dtype="category"), + "this_ordered_numeric_cat": pd.Categorical( + [1, 2, 3], categories=[3, 2, 1], ordered=True + ), + "that_ordered_numeric_cat": pd.Categorical( + [2, 3, 1], categories=[3, 2, 1], ordered=True + ), + "this_string_cat": pd.Series(["x", "y", "z"], dtype="category"), + "that_string_cat": pd.Series(["z", "y", "x"], dtype="category"), + "this_ordered_string_cat": pd.Categorical( + ["x", "y", "z"], categories=["x", "z", "y"], ordered=True + ), + "that_ordered_string_cat": pd.Categorical( + ["z", "y", "x"], categories=["x", "z", "y"], ordered=True + ), + } + ) + + @property + def psdf(self): + return ps.from_pandas(self.pdf) + @property def pser(self): - return pd.Series([1, "x", "y"], dtype="category") + return pd.Series([1, 2, 3], dtype="category") @property def psser(self): return ps.from_pandas(self.pser) - @property - def other_pser(self): - return pd.Series(["y", "x", 1], dtype="category") - - @property - def other_psser(self): - return ps.from_pandas(self.other_pser) - - @property - def ordered_pser(self): - return pd.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True)) - - @property - def ordered_psser(self): - return ps.from_pandas(self.ordered_pser) - - @property - def other_ordered_pser(self): - return pd.Series([2, 1, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True)) - - @property - def other_ordered_psser(self): - return ps.from_pandas(self.other_ordered_pser) - - @property - def unordered_psser(self): - return ps.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1])) - def test_add(self): self.assertRaises(TypeError, lambda: self.psser + "x") self.assertRaises(TypeError, lambda: self.psser + 1) @@ -204,150 +203,301 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils): self.assertRaises(TypeError, lambda: ~self.psser) def test_eq(self): - with option_context("compute.ops_on_diff_frames", True): - self.assert_eq( - self.pser == self.other_pser, (self.psser == self.other_psser).sort_index() - ) - self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index()) + pdf, psdf = self.pdf, self.psdf + + pser, psser = pdf["this_numeric_cat"], psdf["this_numeric_cat"] + ordered_pser, ordered_psser = ( + pdf["this_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"], + ) + self.assert_eq(ordered_pser == 1, ordered_psser == 1) + self.assert_eq(pser == pser, psser == psser) + self.assert_eq(ordered_pser == ordered_pser, ordered_psser == ordered_psser) + + pser, psser = pdf["this_string_cat"], psdf["this_string_cat"] + ordered_pser, ordered_psser = ( + pdf["this_ordered_string_cat"], + psdf["this_ordered_string_cat"], + ) + self.assert_eq(pser == "x", psser == "x") + self.assert_eq(pser == pser, psser == psser) + self.assert_eq(ordered_pser == ordered_pser, ordered_psser == ordered_psser) + + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser == 4, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser == "a", + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser == ps.Series([1, 2, 3]), + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser == [1, 2, 3], + ) + + self.assert_eq( + pdf["this_numeric_cat"] == pdf["that_numeric_cat"], + psdf["this_numeric_cat"] == psdf["that_numeric_cat"], + ) + self.assert_eq( + pdf["this_string_cat"] == pdf["that_string_cat"], + psdf["this_string_cat"] == psdf["that_string_cat"], + ) def test_ne(self): - with option_context("compute.ops_on_diff_frames", True): - self.assert_eq( - self.pser != self.other_pser, (self.psser != self.other_psser).sort_index() - ) - self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index()) + pdf, psdf = self.pdf, self.psdf + + pser, psser = pdf["this_numeric_cat"], psdf["this_numeric_cat"] + ordered_pser, ordered_psser = ( + pdf["this_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"], + ) + self.assert_eq(ordered_pser != 1, ordered_psser != 1) + self.assert_eq(pser != pser, psser != psser) + self.assert_eq(ordered_pser != ordered_pser, ordered_psser != ordered_psser) + + pser, psser = pdf["this_string_cat"], psdf["this_string_cat"] + ordered_pser, ordered_psser = ( + pdf["this_ordered_string_cat"], + psdf["this_ordered_string_cat"], + ) + self.assert_eq(pser != "x", psser != "x") + self.assert_eq(pser != pser, psser != psser) + self.assert_eq(ordered_pser != ordered_pser, ordered_psser != ordered_psser) + + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser != 4, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser != "a", + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser != ps.Series([1, 2, 3]), + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser != [1, 2, 3], + ) + self.assert_eq( + pdf["this_numeric_cat"] != pdf["that_numeric_cat"], + psdf["this_numeric_cat"] != psdf["that_numeric_cat"], + ) + self.assert_eq( + pdf["this_string_cat"] != pdf["that_string_cat"], + psdf["this_string_cat"] != psdf["that_string_cat"], + ) def test_lt(self): - ordered_pser = self.ordered_pser - ordered_psser = self.ordered_psser + pdf, psdf = self.pdf, self.psdf + ordered_pser, ordered_psser = ( + pdf["this_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"], + ) + self.assert_eq(ordered_pser < 1, ordered_psser < 1) self.assert_eq(ordered_pser < ordered_pser, ordered_psser < ordered_psser) - with option_context("compute.ops_on_diff_frames", True): - self.assert_eq( - ordered_pser < self.other_ordered_pser, ordered_psser < self.other_ordered_psser - ) - self.assertRaisesRegex( - TypeError, - "Unordered Categoricals can only compare equality or not", - lambda: self.unordered_psser < ordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Categoricals can only be compared if 'categories' are the same", - lambda: ordered_psser < self.unordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Cannot compare a Categorical with the given type", - lambda: ordered_psser < ps.Series([1, 2, 3]), - ) + + ordered_pser, ordered_psser = ( + pdf["this_ordered_string_cat"], + psdf["this_ordered_string_cat"], + ) + self.assert_eq(ordered_pser < "x", ordered_psser < "x") + self.assert_eq(ordered_pser < ordered_pser, ordered_psser < ordered_psser) + + self.assertRaisesRegex( + TypeError, + "Unordered Categoricals can only compare equality or not", + lambda: self.psser < ordered_psser, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser < 4, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser < "a", + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser < ps.Series([1, 2, 3]), + ) self.assertRaisesRegex( TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser < [1, 2, 3], ) - self.assertRaisesRegex( - TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser < 1 + self.assert_eq( + pdf["this_ordered_numeric_cat"] < pdf["that_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"] < psdf["that_ordered_numeric_cat"], + ) + self.assert_eq( + pdf["this_ordered_string_cat"] < pdf["that_ordered_string_cat"], + psdf["this_ordered_string_cat"] < psdf["that_ordered_string_cat"], ) def test_le(self): - ordered_pser = self.ordered_pser - ordered_psser = self.ordered_psser + pdf, psdf = self.pdf, self.psdf + ordered_pser, ordered_psser = ( + pdf["this_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"], + ) + self.assert_eq(ordered_pser <= 1, ordered_psser <= 1) self.assert_eq(ordered_pser <= ordered_pser, ordered_psser <= ordered_psser) - with option_context("compute.ops_on_diff_frames", True): - self.assert_eq( - ordered_pser <= self.other_ordered_pser, ordered_psser <= self.other_ordered_psser - ) - self.assertRaisesRegex( - TypeError, - "Unordered Categoricals can only compare equality or not", - lambda: self.unordered_psser <= ordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Categoricals can only be compared if 'categories' are the same", - lambda: ordered_psser <= self.unordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Cannot compare a Categorical with the given type", - lambda: ordered_psser <= ps.Series([1, 2, 3]), - ) + ordered_pser, ordered_psser = ( + pdf["this_ordered_string_cat"], + psdf["this_ordered_string_cat"], + ) + self.assert_eq(ordered_pser <= "x", ordered_psser <= "x") + self.assert_eq(ordered_pser <= ordered_pser, ordered_psser <= ordered_psser) + self.assertRaisesRegex( + TypeError, + "Unordered Categoricals can only compare equality or not", + lambda: self.psser <= ordered_psser, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser <= 4, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser <= "a", + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser <= ps.Series([1, 2, 3]), + ) self.assertRaisesRegex( TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser <= [1, 2, 3], ) - self.assertRaisesRegex( - TypeError, - "Cannot compare a Categorical with the given type", - lambda: ordered_psser <= 1, + self.assert_eq( + pdf["this_ordered_numeric_cat"] <= pdf["that_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"] <= psdf["that_ordered_numeric_cat"], + ) + self.assert_eq( + pdf["this_ordered_string_cat"] <= pdf["that_ordered_string_cat"], + psdf["this_ordered_string_cat"] <= psdf["that_ordered_string_cat"], ) def test_gt(self): - ordered_pser = self.ordered_pser - ordered_psser = self.ordered_psser + pdf, psdf = self.pdf, self.psdf + ordered_pser, ordered_psser = ( + pdf["this_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"], + ) + self.assert_eq(ordered_pser > 1, ordered_psser > 1) self.assert_eq(ordered_pser > ordered_pser, ordered_psser > ordered_psser) - with option_context("compute.ops_on_diff_frames", True): - self.assert_eq( - ordered_pser > self.other_ordered_pser, ordered_psser > self.other_ordered_psser - ) - self.assertRaisesRegex( - TypeError, - "Unordered Categoricals can only compare equality or not", - lambda: self.unordered_psser > ordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Categoricals can only be compared if 'categories' are the same", - lambda: ordered_psser > self.unordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Cannot compare a Categorical with the given type", - lambda: ordered_psser > ps.Series([1, 2, 3]), - ) + + ordered_pser, ordered_psser = ( + pdf["this_ordered_string_cat"], + psdf["this_ordered_string_cat"], + ) + self.assert_eq(ordered_pser > "x", ordered_psser > "x") + self.assert_eq(ordered_pser > ordered_pser, ordered_psser > ordered_psser) + self.assertRaisesRegex( + TypeError, + "Unordered Categoricals can only compare equality or not", + lambda: self.psser > ordered_psser, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser > 4, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser > "a", + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser > ps.Series([1, 2, 3]), + ) self.assertRaisesRegex( TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser > [1, 2, 3], ) - self.assertRaisesRegex( - TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser > 1 + self.assert_eq( + pdf["this_ordered_numeric_cat"] > pdf["that_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"] > psdf["that_ordered_numeric_cat"], + ) + self.assert_eq( + pdf["this_ordered_string_cat"] > pdf["that_ordered_string_cat"], + psdf["this_ordered_string_cat"] > psdf["that_ordered_string_cat"], ) def test_ge(self): - ordered_pser = self.ordered_pser - ordered_psser = self.ordered_psser + pdf, psdf = self.pdf, self.psdf + ordered_pser, ordered_psser = ( + pdf["this_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"], + ) + self.assert_eq(ordered_pser >= 1, ordered_psser >= 1) self.assert_eq(ordered_pser >= ordered_pser, ordered_psser >= ordered_psser) - with option_context("compute.ops_on_diff_frames", True): - self.assert_eq( - ordered_pser >= self.other_ordered_pser, ordered_psser >= self.other_ordered_psser - ) - self.assertRaisesRegex( - TypeError, - "Unordered Categoricals can only compare equality or not", - lambda: self.unordered_psser >= ordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Categoricals can only be compared if 'categories' are the same", - lambda: ordered_psser >= self.unordered_psser, - ) - self.assertRaisesRegex( - TypeError, - "Cannot compare a Categorical with the given type", - lambda: ordered_psser >= ps.Series([1, 2, 3]), - ) + + ordered_pser, ordered_psser = ( + pdf["this_ordered_string_cat"], + psdf["this_ordered_string_cat"], + ) + self.assert_eq(ordered_pser >= "x", ordered_psser >= "x") + self.assert_eq(ordered_pser >= ordered_pser, ordered_psser >= ordered_psser) + self.assertRaisesRegex( + TypeError, + "Unordered Categoricals can only compare equality or not", + lambda: self.psser >= ordered_psser, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser >= 4, + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with a scalar, which is not a category", + lambda: ordered_psser >= "a", + ) + self.assertRaisesRegex( + TypeError, + "Cannot compare a Categorical with the given type", + lambda: ordered_psser >= ps.Series([1, 2, 3]), + ) self.assertRaisesRegex( TypeError, "Cannot compare a Categorical with the given type", lambda: ordered_psser >= [1, 2, 3], ) - self.assertRaisesRegex( - TypeError, - "Cannot compare a Categorical with the given type", - lambda: ordered_psser >= 1, + self.assert_eq( + pdf["this_ordered_numeric_cat"] >= pdf["that_ordered_numeric_cat"], + psdf["this_ordered_numeric_cat"] >= psdf["that_ordered_numeric_cat"], + ) + self.assert_eq( + pdf["this_ordered_string_cat"] >= pdf["that_ordered_string_cat"], + psdf["this_ordered_string_cat"] >= psdf["that_ordered_string_cat"], ) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py index d1b26ece98..2a8070c630 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py @@ -347,7 +347,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): with option_context("compute.ops_on_diff_frames", True): for pser, psser in self.numeric_pser_psser_pairs: if isinstance(psser.spark.data_type, DecimalType): - self.assertRaises(TypeError, lambda: psser < psser) + self.assertRaisesRegex( + TypeError, "< can not be applied to", lambda: psser < psser + ) else: self.assert_eq(pser < pser, (psser < psser).sort_index()) @@ -355,7 +357,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): with option_context("compute.ops_on_diff_frames", True): for pser, psser in self.numeric_pser_psser_pairs: if isinstance(psser.spark.data_type, DecimalType): - self.assertRaises(TypeError, lambda: psser <= psser) + self.assertRaisesRegex( + TypeError, "<= can not be applied to", lambda: psser <= psser + ) else: self.assert_eq(pser <= pser, (psser <= psser).sort_index()) @@ -363,7 +367,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): with option_context("compute.ops_on_diff_frames", True): for pser, psser in self.numeric_pser_psser_pairs: if isinstance(psser.spark.data_type, DecimalType): - self.assertRaises(TypeError, lambda: psser > psser) + self.assertRaisesRegex( + TypeError, "> can not be applied to", lambda: psser > psser + ) else: self.assert_eq(pser > pser, (psser > psser).sort_index()) @@ -371,7 +377,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): with option_context("compute.ops_on_diff_frames", True): for pser, psser in self.numeric_pser_psser_pairs: if isinstance(psser.spark.data_type, DecimalType): - self.assertRaises(TypeError, lambda: psser >= psser) + self.assertRaisesRegex( + TypeError, ">= can not be applied to", lambda: psser >= psser + ) else: self.assert_eq(pser >= pser, (psser >= psser).sort_index()) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py index d30cdd4a2b..232fec1fd5 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py @@ -149,16 +149,24 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils): self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index()) def test_lt(self): - self.assertRaises(TypeError, lambda: self.psser < self.psser) + self.assertRaisesRegex( + TypeError, "< can not be applied to", lambda: self.psser < self.psser + ) def test_le(self): - self.assertRaises(TypeError, lambda: self.psser <= self.psser) + self.assertRaisesRegex( + TypeError, "<= can not be applied to", lambda: self.psser <= self.psser + ) def test_gt(self): - self.assertRaises(TypeError, lambda: self.psser > self.psser) + self.assertRaisesRegex( + TypeError, "> can not be applied to", lambda: self.psser > self.psser + ) def test_ge(self): - self.assertRaises(TypeError, lambda: self.psser >= self.psser) + self.assertRaisesRegex( + TypeError, ">= can not be applied to", lambda: self.psser >= self.psser + ) if __name__ == "__main__":