From 606a99c01e84889109cfaf643a0f59075cc4d02d Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Mon, 12 Jul 2021 15:10:06 +0900 Subject: [PATCH] [SPARK-36003][PYTHON] Implement unary operator `invert` of integral ps.Series/Index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Implement unary operator `invert` of integral ps.Series/Index. ### Why are the changes needed? Currently, unary operator `invert` of integral ps.Series/Index is not supported. We ought to implement that following pandas' behaviors. ### Does this PR introduce _any_ user-facing change? Yes. Before: ```py >>> import pyspark.pandas as ps >>> psser = ps.Series([1, 2, 3]) >>> ~psser Traceback (most recent call last): ... NotImplementedError: Unary ~ can not be applied to integrals. ``` After: ```py >>> import pyspark.pandas as ps >>> psser = ps.Series([1, 2, 3]) >>> ~psser 0 -2 1 -3 2 -4 dtype: int64 ``` ### How was this patch tested? Unit tests. Closes #33285 from xinrong-databricks/numeric_invert. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit badb0393d46d7aef90710e51e233fb5077977423) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/data_type_ops/num_ops.py | 18 ++++-------------- .../pandas/tests/data_type_ops/test_num_ops.py | 14 +++++++------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py index ed089e53e6..1c7f05190a 100644 --- a/python/pyspark/pandas/data_type_ops/num_ops.py +++ b/python/pyspark/pandas/data_type_ops/num_ops.py @@ -122,38 +122,25 @@ class NumericOps(DataTypeOps): right = transform_boolean_operand_to_numeric(right) return column_op(rmod)(left, right) - # TODO(SPARK-36003): Implement unary operator `invert` as below def invert(self, operand: IndexOpsLike) -> IndexOpsLike: - raise NotImplementedError("Unary ~ can not be applied to %s." % self.pretty_name) + return cast(IndexOpsLike, column_op(F.bitwise_not)(operand)) def neg(self, operand: IndexOpsLike) -> IndexOpsLike: - from pyspark.pandas.base import column_op - return cast(IndexOpsLike, column_op(Column.__neg__)(operand)) def abs(self, operand: IndexOpsLike) -> IndexOpsLike: - from pyspark.pandas.base import column_op - return cast(IndexOpsLike, column_op(F.abs)(operand)) def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - return column_op(Column.__lt__)(left, right) def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - return column_op(Column.__le__)(left, right) def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - return column_op(Column.__ge__)(left, right) def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - return column_op(Column.__gt__)(left, right) @@ -317,6 +304,9 @@ class FractionalOps(NumericOps): right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return numpy_column_op(rfloordiv)(left, right) + def invert(self, operand: IndexOpsLike) -> IndexOpsLike: + raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name) + def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike: return index_ops._with_new_scol( index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column), diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py index bbacc7d422..9a05c6820a 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py @@ -30,7 +30,7 @@ from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, extension_float_dtypes_available, ) -from pyspark.sql.types import DecimalType +from pyspark.sql.types import ByteType, DecimalType, IntegerType, LongType from pyspark.testing.pandasutils import PandasOnSparkTestCase @@ -327,9 +327,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): self.assert_eq(abs(pser), abs(psser)) def test_invert(self): - for psser in self.numeric_pssers: - if not isinstance(psser.spark.data_type, DecimalType): - self.assertRaises(NotImplementedError, lambda: ~psser) + for pser, psser in self.numeric_pser_psser_pairs: + if type(psser.spark.data_type) in [ByteType, IntegerType, LongType]: + self.assert_eq(~pser, ~psser) else: self.assertRaises(TypeError, lambda: ~psser) @@ -426,8 +426,8 @@ class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils): self.check_extension(abs(pser), abs(psser)) def test_invert(self): - for psser in self.intergral_extension_pssers: - self.assertRaises(NotImplementedError, lambda: ~psser) + for pser, psser in self.intergral_extension_pser_psser_pairs: + self.check_extension(~pser, ~psser) def test_eq(self): with option_context("compute.ops_on_diff_frames", True): @@ -507,7 +507,7 @@ class FractionalExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils): def test_invert(self): for psser in self.fractional_extension_pssers: - self.assertRaises(NotImplementedError, lambda: ~psser) + self.assertRaises(TypeError, lambda: ~psser) def test_eq(self): with option_context("compute.ops_on_diff_frames", True):