Revert "[SPARK-35338][PYTHON] Separate arithmetic operations into data type based structures"

This reverts commit d1b24d8aba.
2021-05-19 16:49:47 -07:00 · 2021-05-19 16:49:47 -07:00 · d44e6c7f10
parent 586caae3cc
commit d44e6c7f10
23 changed files with 259 additions and 1881 deletions
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@ -611,12 +611,6 @@ pyspark_pandas = Module(
        "pyspark.pandas.spark.utils",
        "pyspark.pandas.typedef.typehints",
        # unittests
-        "pyspark.pandas.tests.data_type_ops.test_boolean_ops",
-        "pyspark.pandas.tests.data_type_ops.test_categorical_ops",
-        "pyspark.pandas.tests.data_type_ops.test_date_ops",
-        "pyspark.pandas.tests.data_type_ops.test_datetime_ops",
-        "pyspark.pandas.tests.data_type_ops.test_num_ops",
-        "pyspark.pandas.tests.data_type_ops.test_string_ops",
        "pyspark.pandas.tests.indexes.test_base",
        "pyspark.pandas.tests.indexes.test_category",
        "pyspark.pandas.tests.indexes.test_datetime",
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@ -19,6 +19,7 @@
 Base and utility classes for pandas-on-Spark objects.
 """
 from abc import ABCMeta, abstractmethod
+import datetime
 from functools import wraps, partial
 from itertools import chain
 from typing import Any, Callable, Optional, Tuple, Union, cast, TYPE_CHECKING
@ -34,6 +35,7 @@ from pyspark.sql.types import (
    DateType,
    DoubleType,
    FloatType,
+    IntegralType,
    LongType,
    NumericType,
    StringType,
@ -48,9 +50,11 @@ from pyspark.pandas.internal import (
    NATURAL_ORDER_COLUMN_NAME,
    SPARK_DEFAULT_INDEX_NAME,
 )
+from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.accessors import SparkIndexOpsMethods
 from pyspark.pandas.typedef import (
    Dtype,
+    as_spark_type,
    extension_dtypes,
    pandas_on_spark_type,
    spark_type_to_pandas_dtype,
@ -318,23 +322,100 @@ class IndexOpsMixin(object, metaclass=ABCMeta):

    spark_column.__doc__ = SparkIndexOpsMethods.column.__doc__

-    @property
-    def _dtype_op(self):
-        from pyspark.pandas.data_type_ops.base import DataTypeOps
-
-        return DataTypeOps(self.dtype, self.spark.data_type)
-
    # arithmetic operators
    __neg__ = column_op(Column.__neg__)

    def __add__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__add__(self, other)
+        if not isinstance(self.spark.data_type, StringType) and (
+            (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType))
+            or isinstance(other, str)
+        ):
+            raise TypeError("string addition can only be applied to string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("addition can not be applied to date times.")
+
+        if isinstance(self.spark.data_type, StringType):
+            # Concatenate string columns
+            if isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType):
+                return column_op(F.concat)(self, other)
+            # Handle df['col'] + 'literal'
+            elif isinstance(other, str):
+                return column_op(F.concat)(self, F.lit(other))
+            else:
+                raise TypeError("string addition can only be applied to string series or literals.")
+        else:
+            return column_op(Column.__add__)(self, other)

    def __sub__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__sub__(self, other)
+        if (
+            isinstance(self.spark.data_type, StringType)
+            or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType))
+            or isinstance(other, str)
+        ):
+            raise TypeError("substraction can not be applied to string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
+            # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
+            msg = (
+                "Note that there is a behavior difference of timestamp subtraction. "
+                "The timestamp subtraction returns an integer in seconds, "
+                "whereas pandas returns 'timedelta64[ns]'."
+            )
+            if isinstance(other, IndexOpsMixin) and isinstance(
+                other.spark.data_type, TimestampType
+            ):
+                warnings.warn(msg, UserWarning)
+                return self.astype("long") - other.astype("long")
+            elif isinstance(other, datetime.datetime):
+                warnings.warn(msg, UserWarning)
+                return self.astype("long") - F.lit(other).cast(as_spark_type("long"))
+            else:
+                raise TypeError("datetime subtraction can only be applied to datetime series.")
+        elif isinstance(self.spark.data_type, DateType):
+            # Note that date subtraction casts arguments to integer. This is to mimic pandas's
+            # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
+            msg = (
+                "Note that there is a behavior difference of date subtraction. "
+                "The date subtraction returns an integer in days, "
+                "whereas pandas returns 'timedelta64[ns]'."
+            )
+            if isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, DateType):
+                warnings.warn(msg, UserWarning)
+                return column_op(F.datediff)(self, other).astype("long")
+            elif isinstance(other, datetime.date) and not isinstance(other, datetime.datetime):
+                warnings.warn(msg, UserWarning)
+                return column_op(F.datediff)(self, F.lit(other)).astype("long")
+            else:
+                raise TypeError("date subtraction can only be applied to date series.")
+        return column_op(Column.__sub__)(self, other)

    def __mul__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__mul__(self, other)
+        if isinstance(other, str):
+            raise TypeError("multiplication can not be applied to a string literal.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("multiplication can not be applied to date times.")
+
+        if (
+            isinstance(self.spark.data_type, IntegralType)
+            and isinstance(other, IndexOpsMixin)
+            and isinstance(other.spark.data_type, StringType)
+        ):
+            return column_op(SF.repeat)(other, self)
+
+        if isinstance(self.spark.data_type, StringType):
+            if (
+                isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, IntegralType)
+            ) or isinstance(other, int):
+                return column_op(SF.repeat)(self, other)
+            else:
+                raise TypeError(
+                    "a string series can only be multiplied to an int series or literal"
+                )
+
+        return column_op(Column.__mul__)(self, other)

    def __truediv__(self, other) -> Union["Series", "Index"]:
        """
@ -353,22 +434,122 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
        |          -10          |   null  | -np.inf |
        +-----------------------|---------|---------+
        """
-        return self._dtype_op.__truediv__(self, other)
+
+        if (
+            isinstance(self.spark.data_type, StringType)
+            or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType))
+            or isinstance(other, str)
+        ):
+            raise TypeError("division can not be applied on string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("division can not be applied to date times.")
+
+        def truediv(left, right):
+            return F.when(F.lit(right != 0) | F.lit(right).isNull(), left.__div__(right)).otherwise(
+                F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise(
+                    F.lit(np.inf).__div__(left)
+                )
+            )
+
+        return numpy_column_op(truediv)(self, other)

    def __mod__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__mod__(self, other)
+        if (
+            isinstance(self.spark.data_type, StringType)
+            or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType))
+            or isinstance(other, str)
+        ):
+            raise TypeError("modulo can not be applied on string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("modulo can not be applied to date times.")
+
+        def mod(left, right):
+            return ((left % right) + right) % right
+
+        return column_op(mod)(self, other)

    def __radd__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__radd__(self, other)
+        # Handle 'literal' + df['col']
+        if not isinstance(self.spark.data_type, StringType) and isinstance(other, str):
+            raise TypeError("string addition can only be applied to string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("addition can not be applied to date times.")
+
+        if isinstance(self.spark.data_type, StringType):
+            if isinstance(other, str):
+                return self._with_new_scol(
+                    F.concat(F.lit(other), self.spark.column)
+                )  # TODO: dtype?
+            else:
+                raise TypeError("string addition can only be applied to string series or literals.")
+        else:
+            return column_op(Column.__radd__)(self, other)

    def __rsub__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__rsub__(self, other)
+        if isinstance(self.spark.data_type, StringType) or isinstance(other, str):
+            raise TypeError("substraction can not be applied to string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
+            # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
+            msg = (
+                "Note that there is a behavior difference of timestamp subtraction. "
+                "The timestamp subtraction returns an integer in seconds, "
+                "whereas pandas returns 'timedelta64[ns]'."
+            )
+            if isinstance(other, datetime.datetime):
+                warnings.warn(msg, UserWarning)
+                return -(self.astype("long") - F.lit(other).cast(as_spark_type("long")))
+            else:
+                raise TypeError("datetime subtraction can only be applied to datetime series.")
+        elif isinstance(self.spark.data_type, DateType):
+            # Note that date subtraction casts arguments to integer. This is to mimic pandas's
+            # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
+            msg = (
+                "Note that there is a behavior difference of date subtraction. "
+                "The date subtraction returns an integer in days, "
+                "whereas pandas returns 'timedelta64[ns]'."
+            )
+            if isinstance(other, datetime.date) and not isinstance(other, datetime.datetime):
+                warnings.warn(msg, UserWarning)
+                return -column_op(F.datediff)(self, F.lit(other)).astype("long")
+            else:
+                raise TypeError("date subtraction can only be applied to date series.")
+        return column_op(Column.__rsub__)(self, other)

    def __rmul__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__rmul__(self, other)
+        if isinstance(other, str):
+            raise TypeError("multiplication can not be applied to a string literal.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("multiplication can not be applied to date times.")
+
+        if isinstance(self.spark.data_type, StringType):
+            if isinstance(other, int):
+                return column_op(SF.repeat)(self, other)
+            else:
+                raise TypeError(
+                    "a string series can only be multiplied to an int series or literal"
+                )
+
+        return column_op(Column.__rmul__)(self, other)

    def __rtruediv__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__rtruediv__(self, other)
+        if isinstance(self.spark.data_type, StringType) or isinstance(other, str):
+            raise TypeError("division can not be applied on string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("division can not be applied to date times.")
+
+        def rtruediv(left, right):
+            return F.when(left == 0, F.lit(np.inf).__div__(right)).otherwise(
+                F.lit(right).__truediv__(left)
+            )
+
+        return numpy_column_op(rtruediv)(self, other)

    def __floordiv__(self, other) -> Union["Series", "Index"]:
        """
@ -387,19 +568,66 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
        |          -10          |   null  | -np.inf |
        +-----------------------|---------|---------+
        """
-        return self._dtype_op.__floordiv__(self, other)
+        if (
+            isinstance(self.spark.data_type, StringType)
+            or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType))
+            or isinstance(other, str)
+        ):
+            raise TypeError("division can not be applied on string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("division can not be applied to date times.")
+
+        def floordiv(left, right):
+            return F.when(F.lit(right is np.nan), np.nan).otherwise(
+                F.when(
+                    F.lit(right != 0) | F.lit(right).isNull(), F.floor(left.__div__(right))
+                ).otherwise(
+                    F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise(
+                        F.lit(np.inf).__div__(left)
+                    )
+                )
+            )
+
+        return numpy_column_op(floordiv)(self, other)

    def __rfloordiv__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__rfloordiv__(self, other)
+        if isinstance(self.spark.data_type, StringType) or isinstance(other, str):
+            raise TypeError("division can not be applied on string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("division can not be applied to date times.")
+
+        def rfloordiv(left, right):
+            return F.when(F.lit(left == 0), F.lit(np.inf).__div__(right)).otherwise(
+                F.when(F.lit(left) == np.nan, np.nan).otherwise(F.floor(F.lit(right).__div__(left)))
+            )
+
+        return numpy_column_op(rfloordiv)(self, other)

    def __rmod__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__rmod__(self, other)
+        if isinstance(self.spark.data_type, StringType) or isinstance(other, str):
+            raise TypeError("modulo can not be applied on string series or literals.")
+
+        if isinstance(self.spark.data_type, TimestampType):
+            raise TypeError("modulo can not be applied to date times.")
+
+        def rmod(left, right):
+            return ((right % left) + left) % left
+
+        return column_op(rmod)(self, other)

    def __pow__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__pow__(self, other)
+        def pow_func(left, right):
+            return F.when(left == 1, left).otherwise(Column.__pow__(left, right))
+
+        return column_op(pow_func)(self, other)

    def __rpow__(self, other) -> Union["Series", "Index"]:
-        return self._dtype_op.__rpow__(self, other)
+        def rpow_func(left, right):
+            return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right))
+
+        return column_op(rpow_func)(self, other)

    __abs__ = column_op(F.abs)

--- a/python/pyspark/pandas/data_type_ops/init.py
+++ b/python/pyspark/pandas/data_type_ops/init.py
@ -1,16 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
--- a/python/pyspark/pandas/data_type_ops/base.py
+++ b/python/pyspark/pandas/data_type_ops/base.py
@ -1,120 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from abc import ABCMeta, abstractmethod
-from typing import TYPE_CHECKING, Union
-
-from pandas.api.types import CategoricalDtype
-
-from pyspark.sql.types import (
-    BooleanType,
-    DataType,
-    DateType,
-    FractionalType,
-    IntegralType,
-    StringType,
-    TimestampType,
-)
-
-from pyspark.pandas.typedef import Dtype
-
-if TYPE_CHECKING:
-    from pyspark.pandas.indexes import Index  # noqa: F401 (SPARK-34943)
-    from pyspark.pandas.series import Series  # noqa: F401 (SPARK-34943)
-
-
-class DataTypeOps(object, metaclass=ABCMeta):
-    """The base class for binary operations of pandas-on-Spark objects (of different data types)."""
-
-    def __new__(cls, dtype: Dtype, spark_type: DataType):
-        from pyspark.pandas.data_type_ops.boolean_ops import BooleanOps
-        from pyspark.pandas.data_type_ops.categorical_ops import CategoricalOps
-        from pyspark.pandas.data_type_ops.date_ops import DateOps
-        from pyspark.pandas.data_type_ops.datetime_ops import DatetimeOps
-        from pyspark.pandas.data_type_ops.num_ops import (
-            IntegralOps,
-            FractionalOps,
-        )
-        from pyspark.pandas.data_type_ops.string_ops import StringOps
-
-        if isinstance(dtype, CategoricalDtype):
-            return object.__new__(CategoricalOps)
-        elif isinstance(spark_type, FractionalType):
-            return object.__new__(FractionalOps)
-        elif isinstance(spark_type, IntegralType):
-            return object.__new__(IntegralOps)
-        elif isinstance(spark_type, StringType):
-            return object.__new__(StringOps)
-        elif isinstance(spark_type, BooleanType):
-            return object.__new__(BooleanOps)
-        elif isinstance(spark_type, TimestampType):
-            return object.__new__(DatetimeOps)
-        elif isinstance(spark_type, DateType):
-            return object.__new__(DateOps)
-        else:
-            raise TypeError("Type %s was not understood." % dtype)
-
-    def __init__(self, dtype: Dtype, spark_type: DataType):
-        self.dtype = dtype
-        self.spark_type = spark_type
-
-    @property
-    @abstractmethod
-    def pretty_name(self) -> str:
-        raise NotImplementedError()
-
-    def __add__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Addition can not be applied to %s." % self.pretty_name)
-
-    def __sub__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Subtraction can not be applied to %s." % self.pretty_name)
-
-    def __mul__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Multiplication can not be applied to %s." % self.pretty_name)
-
-    def __truediv__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("True division can not be applied to %s." % self.pretty_name)
-
-    def __floordiv__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Floor division can not be applied to %s." % self.pretty_name)
-
-    def __mod__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Modulo can not be applied to %s." % self.pretty_name)
-
-    def __pow__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Exponentiation can not be applied to %s." % self.pretty_name)
-
-    def __radd__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Addition can not be applied to %s." % self.pretty_name)
-
-    def __rsub__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Subtraction can not be applied to %s." % self.pretty_name)
-
-    def __rmul__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Multiplication can not be applied to %s." % self.pretty_name)
-
-    def __rtruediv__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("True division can not be applied to %s." % self.pretty_name)
-
-    def __rfloordiv__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Floor division can not be applied to %s." % self.pretty_name)
-
-    def __rmod__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Modulo can not be applied to %s." % self.pretty_name)
-
-    def __rpow__(self, left, right) -> Union["Series", "Index"]:
-        raise TypeError("Exponentiation can not be applied to %s." % self.pretty_name)
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@ -1,28 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from pyspark.pandas.data_type_ops.base import DataTypeOps
-
-
-class BooleanOps(DataTypeOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with spark type: BooleanType.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'booleans'
--- a/python/pyspark/pandas/data_type_ops/categorical_ops.py
+++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@ -1,28 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from pyspark.pandas.data_type_ops.base import DataTypeOps
-
-
-class CategoricalOps(DataTypeOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with categorical types.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'categoricals'
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@ -1,71 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-import warnings
-from typing import TYPE_CHECKING, Union
-
-from pyspark.sql import functions as F
-from pyspark.sql.types import DateType
-
-from pyspark.pandas.base import column_op, IndexOpsMixin
-from pyspark.pandas.data_type_ops.base import DataTypeOps
-
-if TYPE_CHECKING:
-    from pyspark.pandas.indexes import Index  # noqa: F401 (SPARK-34943)
-    from pyspark.pandas.series import Series  # noqa: F401 (SPARK-34943)
-
-
-class DateOps(DataTypeOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with spark type: DateType.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'dates'
-
-    def __sub__(self, left, right) -> Union["Series", "Index"]:
-        # Note that date subtraction casts arguments to integer. This is to mimic pandas's
-        # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
-        msg = (
-            "Note that there is a behavior difference of date subtraction. "
-            "The date subtraction returns an integer in days, "
-            "whereas pandas returns 'timedelta64[ns]'."
-        )
-        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, DateType):
-            warnings.warn(msg, UserWarning)
-            return column_op(F.datediff)(left, right).astype("long")
-        elif isinstance(right, datetime.date) and not isinstance(right, datetime.datetime):
-            warnings.warn(msg, UserWarning)
-            return column_op(F.datediff)(left, F.lit(right)).astype("long")
-        else:
-            raise TypeError("date subtraction can only be applied to date series.")
-
-    def __rsub__(self, left, right) -> Union["Series", "Index"]:
-        # Note that date subtraction casts arguments to integer. This is to mimic pandas's
-        # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
-        msg = (
-            "Note that there is a behavior difference of date subtraction. "
-            "The date subtraction returns an integer in days, "
-            "whereas pandas returns 'timedelta64[ns]'."
-        )
-        if isinstance(right, datetime.date) and not isinstance(right, datetime.datetime):
-            warnings.warn(msg, UserWarning)
-            return -column_op(F.datediff)(left, F.lit(right)).astype("long")
-        else:
-            raise TypeError("date subtraction can only be applied to date series.")
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@ -1,72 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-import warnings
-from typing import TYPE_CHECKING, Union
-
-from pyspark.sql import functions as F
-from pyspark.sql.types import TimestampType
-
-from pyspark.pandas.base import IndexOpsMixin
-from pyspark.pandas.data_type_ops.base import DataTypeOps
-from pyspark.pandas.typedef import as_spark_type
-
-if TYPE_CHECKING:
-    from pyspark.pandas.indexes import Index  # noqa: F401 (SPARK-34943)
-    from pyspark.pandas.series import Series  # noqa: F401 (SPARK-34943)
-
-
-class DatetimeOps(DataTypeOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with spark type: TimestampType.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'datetimes'
-
-    def __sub__(self, left, right) -> Union["Series", "Index"]:
-        # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
-        # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
-        msg = (
-            "Note that there is a behavior difference of timestamp subtraction. "
-            "The timestamp subtraction returns an integer in seconds, "
-            "whereas pandas returns 'timedelta64[ns]'."
-        )
-        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, TimestampType):
-            warnings.warn(msg, UserWarning)
-            return left.astype("long") - right.astype("long")
-        elif isinstance(right, datetime.datetime):
-            warnings.warn(msg, UserWarning)
-            return left.astype("long") - F.lit(right).cast(as_spark_type("long"))
-        else:
-            raise TypeError("datetime subtraction can only be applied to datetime series.")
-
-    def __rsub__(self, left, right) -> Union["Series", "Index"]:
-        # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
-        # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
-        msg = (
-            "Note that there is a behavior difference of timestamp subtraction. "
-            "The timestamp subtraction returns an integer in seconds, "
-            "whereas pandas returns 'timedelta64[ns]'."
-        )
-        if isinstance(right, datetime.datetime):
-            warnings.warn(msg, UserWarning)
-            return -(left.astype("long") - F.lit(right).cast(as_spark_type("long")))
-        else:
-            raise TypeError("datetime subtraction can only be applied to datetime series.")
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@ -1,378 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import numbers
-from typing import TYPE_CHECKING, Union
-
-import numpy as np
-from pandas.api.types import CategoricalDtype
-
-from pyspark.sql import Column, functions as F
-from pyspark.sql.types import (
-    NumericType,
-    StringType,
-    TimestampType,
-)
-
-from pyspark.pandas.base import column_op, IndexOpsMixin, numpy_column_op
-from pyspark.pandas.data_type_ops.base import DataTypeOps
-from pyspark.pandas.spark import functions as SF
-
-if TYPE_CHECKING:
-    from pyspark.pandas.indexes import Index  # noqa: F401 (SPARK-34943)
-    from pyspark.pandas.series import Series  # noqa: F401 (SPARK-34943)
-
-
-class NumericOps(DataTypeOps):
-    """
-    The class for binary operations of numeric pandas-on-Spark objects.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'numerics'
-
-    def __add__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("string addition can only be applied to string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("addition can not be applied to given types.")
-
-        return column_op(Column.__add__)(left, right)
-
-    def __sub__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("subtraction can not be applied to string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("subtraction can not be applied to given types.")
-
-        return column_op(Column.__sub__)(left, right)
-
-    def __mod__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("modulo can not be applied on string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("modulo can not be applied to given types.")
-
-        def mod(left, right):
-            return ((left % right) + right) % right
-
-        return column_op(mod)(left, right)
-
-    def __pow__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("exponentiation can not be applied on string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("exponentiation can not be applied to given types.")
-
-        def pow_func(left, right):
-            return F.when(left == 1, left).otherwise(Column.__pow__(left, right))
-
-        return column_op(pow_func)(left, right)
-
-    def __radd__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("string addition can only be applied to string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("addition can not be applied to given types.")
-
-        return column_op(Column.__radd__)(left, right)
-
-    def __rsub__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("subtraction can not be applied to string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("subtraction can not be applied to given types.")
-        return column_op(Column.__rsub__)(left, right)
-
-    def __rmul__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("multiplication can not be applied to a string literal.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("multiplication can not be applied to given types.")
-        return column_op(Column.__rmul__)(left, right)
-
-    def __rpow__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("exponentiation can not be applied on string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("exponentiation can not be applied to given types.")
-
-        def rpow_func(left, right):
-            return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right))
-
-        return column_op(rpow_func)(left, right)
-
-    def __rmod__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("modulo can not be applied on string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("modulo can not be applied to given types.")
-
-        def rmod(left, right):
-            return ((right % left) + left) % left
-
-        return column_op(rmod)(left, right)
-
-
-class IntegralOps(NumericOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with spark types:
-    LongType, IntegerType, ByteType and ShortType.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'integrals'
-
-    def __mul__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("multiplication can not be applied to a string literal.")
-
-        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, TimestampType):
-            raise TypeError("multiplication can not be applied to date times.")
-
-        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
-            return column_op(SF.repeat)(right, left)
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or not isinstance(right.spark.data_type, NumericType)
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("multiplication can not be applied to given types.")
-
-        return column_op(Column.__mul__)(left, right)
-
-    def __truediv__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def truediv(left, right):
-            return F.when(F.lit(right != 0) | F.lit(right).isNull(), left.__div__(right)).otherwise(
-                F.lit(np.inf).__div__(left)
-            )
-
-        return numpy_column_op(truediv)(left, right)
-
-    def __floordiv__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def floordiv(left, right):
-            return F.when(F.lit(right is np.nan), np.nan).otherwise(
-                F.when(
-                    F.lit(right != 0) | F.lit(right).isNull(), F.floor(left.__div__(right))
-                ).otherwise(
-                    F.lit(np.inf).__div__(left)
-                )
-            )
-
-        return numpy_column_op(floordiv)(left, right)
-
-    def __rtruediv__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def rtruediv(left, right):
-            return F.when(left == 0, F.lit(np.inf).__div__(right)).otherwise(
-                F.lit(right).__truediv__(left)
-            )
-
-        return numpy_column_op(rtruediv)(left, right)
-
-    def __rfloordiv__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def rfloordiv(left, right):
-            return F.when(F.lit(left == 0), F.lit(np.inf).__div__(right)).otherwise(
-                F.floor(F.lit(right).__div__(left))
-            )
-
-        return numpy_column_op(rfloordiv)(left, right)
-
-
-class FractionalOps(NumericOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with spark types:
-    FloatType, DoubleType and DecimalType.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'fractions'
-
-    def __mul__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("multiplication can not be applied to a string literal.")
-
-        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, TimestampType):
-            raise TypeError("multiplication can not be applied to date times.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or not isinstance(right.spark.data_type, NumericType)
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("multiplication can not be applied to given types.")
-
-        return column_op(Column.__mul__)(left, right)
-
-    def __truediv__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def truediv(left, right):
-            return F.when(F.lit(right != 0) | F.lit(right).isNull(), left.__div__(right)).otherwise(
-                F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise(
-                    F.lit(np.inf).__div__(left)
-                )
-            )
-
-        return numpy_column_op(truediv)(left, right)
-
-    def __floordiv__(self, left, right) -> Union["Series", "Index"]:
-        if (
-            isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
-        ) or isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and (
-                isinstance(right.dtype, CategoricalDtype)
-                or (not isinstance(right.spark.data_type, NumericType))
-            )
-        ) and not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def floordiv(left, right):
-            return F.when(F.lit(right is np.nan), np.nan).otherwise(
-                F.when(
-                    F.lit(right != 0) | F.lit(right).isNull(), F.floor(left.__div__(right))
-                ).otherwise(
-                    F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise(
-                        F.lit(np.inf).__div__(left)
-                    )
-                )
-            )
-
-        return numpy_column_op(floordiv)(left, right)
-
-    def __rtruediv__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def rtruediv(left, right):
-            return F.when(left == 0, F.lit(np.inf).__div__(right)).otherwise(
-                F.lit(right).__truediv__(left)
-            )
-
-        return numpy_column_op(rtruediv)(left, right)
-
-    def __rfloordiv__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("division can not be applied on string series or literals.")
-        if not isinstance(right, numbers.Number):
-            raise TypeError("division can not be applied to given types.")
-
-        def rfloordiv(left, right):
-            return F.when(F.lit(left == 0), F.lit(np.inf).__div__(right)).otherwise(
-                F.when(F.lit(left) == np.nan, np.nan).otherwise(F.floor(F.lit(right).__div__(left)))
-            )
-
-        return numpy_column_op(rfloordiv)(left, right)
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@ -1,104 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from typing import TYPE_CHECKING, Union
-
-from pandas.api.types import CategoricalDtype
-
-from pyspark.sql import functions as F
-from pyspark.sql.types import IntegralType, StringType
-
-from pyspark.pandas.base import column_op, IndexOpsMixin
-from pyspark.pandas.data_type_ops.base import DataTypeOps
-from pyspark.pandas.spark import functions as SF
-
-if TYPE_CHECKING:
-    from pyspark.pandas.indexes import Index  # noqa: F401 (SPARK-34943)
-    from pyspark.pandas.series import Series  # noqa: F401 (SPARK-34943)
-
-
-class StringOps(DataTypeOps):
-    """
-    The class for binary operations of pandas-on-Spark objects with spark type: StringType.
-    """
-
-    @property
-    def pretty_name(self) -> str:
-        return 'strings'
-
-    def __add__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
-            return column_op(F.concat)(left, right)
-        elif isinstance(right, str):
-            return column_op(F.concat)(left, F.lit(right))
-        else:
-            raise TypeError("string addition can only be applied to string series or literals.")
-
-    def __sub__(self, left, right):
-        raise TypeError("subtraction can not be applied to string series or literals.")
-
-    def __mul__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            raise TypeError("multiplication can not be applied to a string literal.")
-
-        if (
-            isinstance(right, IndexOpsMixin)
-            and isinstance(right.spark.data_type, IntegralType)
-            and not isinstance(right.dtype, CategoricalDtype)
-        ) or isinstance(right, int):
-            return column_op(SF.repeat)(left, right)
-        else:
-            raise TypeError("a string series can only be multiplied to an int series or literal")
-
-    def __truediv__(self, left, right):
-        raise TypeError("division can not be applied on string series or literals.")
-
-    def __floordiv__(self, left, right):
-        raise TypeError("division can not be applied on string series or literals.")
-
-    def __mod__(self, left, right):
-        raise TypeError("modulo can not be applied on string series or literals.")
-
-    def __pow__(self, left, right):
-        raise TypeError("exponentiation can not be applied on string series or literals.")
-
-    def __radd__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, str):
-            return left._with_new_scol(F.concat(F.lit(right), left.spark.column))  # TODO: dtype?
-        else:
-            raise TypeError("string addition can only be applied to string series or literals.")
-
-    def __rsub__(self, left, right):
-        raise TypeError("subtraction can not be applied to string series or literals.")
-
-    def __rmul__(self, left, right) -> Union["Series", "Index"]:
-        if isinstance(right, int):
-            return column_op(SF.repeat)(left, right)
-        else:
-            raise TypeError("a string series can only be multiplied to an int series or literal")
-
-    def __rtruediv__(self, left, right):
-        raise TypeError("division can not be applied on string series or literals.")
-
-    def __rfloordiv__(self, left, right):
-        raise TypeError("division can not be applied on string series or literals.")
-
-    def __rpow__(self, left, right):
-        raise TypeError("exponentiation can not be applied on string series or literals.")
-
-    def __rmod__(self, left, right):
-        raise TypeError("modulo can not be applied on string series or literals.")
--- a/python/pyspark/pandas/tests/data_type_ops/init.py
+++ b/python/pyspark/pandas/tests/data_type_ops/init.py
@ -1,16 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
--- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
@ -1,150 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-import pandas as pd
-
-from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-
-
-class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils):
-    @property
-    def pser(self):
-        return pd.Series([True, True, False])
-
-    @property
-    def kser(self):
-        return ps.from_pandas(self.pser)
-
-    def test_add(self):
-        self.assertRaises(TypeError, lambda: self.kser + 1)
-        self.assertRaises(TypeError, lambda: self.kser + 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser + kser)
-
-    def test_sub(self):
-        self.assertRaises(TypeError, lambda: self.kser - 1)
-        self.assertRaises(TypeError, lambda: self.kser - 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser - kser)
-
-    def test_mul(self):
-        self.assertRaises(TypeError, lambda: self.kser * 1)
-        self.assertRaises(TypeError, lambda: self.kser * 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser * kser)
-
-    def test_truediv(self):
-        self.assertRaises(TypeError, lambda: self.kser / 1)
-        self.assertRaises(TypeError, lambda: self.kser / 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser / kser)
-
-    def test_floordiv(self):
-        self.assertRaises(TypeError, lambda: self.kser // 1)
-        self.assertRaises(TypeError, lambda: self.kser // 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser // kser)
-
-    def test_mod(self):
-        self.assertRaises(TypeError, lambda: self.kser % 1)
-        self.assertRaises(TypeError, lambda: self.kser % 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser % kser)
-
-    def test_pow(self):
-        self.assertRaises(TypeError, lambda: self.kser ** 1)
-        self.assertRaises(TypeError, lambda: self.kser ** 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser ** kser)
-
-    def test_radd(self):
-        self.assertRaises(TypeError, lambda: 1 + self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 + self.kser)
-        self.assertRaises(TypeError, lambda: "x" + self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) + self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) + self.kser)
-
-    def test_rsub(self):
-        self.assertRaises(TypeError, lambda: 1 - self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 - self.kser)
-        self.assertRaises(TypeError, lambda: "x" - self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) - self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) - self.kser)
-
-    def test_rmul(self):
-        self.assertRaises(TypeError, lambda: 1 * self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 * self.kser)
-        self.assertRaises(TypeError, lambda: "x" * self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) * self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) * self.kser)
-
-    def test_rtruediv(self):
-        self.assertRaises(TypeError, lambda: 1 / self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 / self.kser)
-        self.assertRaises(TypeError, lambda: "x" / self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) / self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) / self.kser)
-
-    def test_rfloordiv(self):
-        self.assertRaises(TypeError, lambda: 1 // self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 // self.kser)
-        self.assertRaises(TypeError, lambda: "x" + self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) // self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) // self.kser)
-
-    def test_rpow(self):
-        self.assertRaises(TypeError, lambda: 1 ** self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 ** self.kser)
-        self.assertRaises(TypeError, lambda: "x" ** self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** self.kser)
-
-    def test_rmod(self):
-        self.assertRaises(TypeError, lambda: 1 % self.kser)
-        self.assertRaises(TypeError, lambda: 0.1 % self.kser)
-        self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) % self.kser)
-        self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) % self.kser)
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.pandas.tests.data_type_ops.test_boolean_ops import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore[import]
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@ -1,128 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import pandas as pd
-
-from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-
-
-class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
-    @property
-    def pser(self):
-        return pd.Series([1, "x", "y"], dtype="category")
-
-    @property
-    def kser(self):
-        return ps.from_pandas(self.pser)
-
-    def test_add(self):
-        self.assertRaises(TypeError, lambda: self.kser + "x")
-        self.assertRaises(TypeError, lambda: self.kser + 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser + kser)
-
-    def test_sub(self):
-        self.assertRaises(TypeError, lambda: self.kser - "x")
-        self.assertRaises(TypeError, lambda: self.kser - 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser - kser)
-
-    def test_mul(self):
-        self.assertRaises(TypeError, lambda: self.kser * "x")
-        self.assertRaises(TypeError, lambda: self.kser * 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser * kser)
-
-    def test_truediv(self):
-        self.assertRaises(TypeError, lambda: self.kser / "x")
-        self.assertRaises(TypeError, lambda: self.kser / 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser / kser)
-
-    def test_floordiv(self):
-        self.assertRaises(TypeError, lambda: self.kser // "x")
-        self.assertRaises(TypeError, lambda: self.kser // 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser // kser)
-
-    def test_mod(self):
-        self.assertRaises(TypeError, lambda: self.kser % "x")
-        self.assertRaises(TypeError, lambda: self.kser % 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser % kser)
-
-    def test_pow(self):
-        self.assertRaises(TypeError, lambda: self.kser ** "x")
-        self.assertRaises(TypeError, lambda: self.kser ** 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser ** kser)
-
-    def test_radd(self):
-        self.assertRaises(TypeError, lambda: "x" + self.kser)
-        self.assertRaises(TypeError, lambda: 1 + self.kser)
-
-    def test_rsub(self):
-        self.assertRaises(TypeError, lambda: "x" - self.kser)
-        self.assertRaises(TypeError, lambda: 1 - self.kser)
-
-    def test_rmul(self):
-        self.assertRaises(TypeError, lambda: "x" * self.kser)
-        self.assertRaises(TypeError, lambda: 2 * self.kser)
-
-    def test_rtruediv(self):
-        self.assertRaises(TypeError, lambda: "x" / self.kser)
-        self.assertRaises(TypeError, lambda: 1 / self.kser)
-
-    def test_rfloordiv(self):
-        self.assertRaises(TypeError, lambda: "x" // self.kser)
-        self.assertRaises(TypeError, lambda: 1 // self.kser)
-
-    def test_rmod(self):
-        self.assertRaises(TypeError, lambda: 1 % self.kser)
-
-    def test_rpow(self):
-        self.assertRaises(TypeError, lambda: "x" ** self.kser)
-        self.assertRaises(TypeError, lambda: 1 ** self.kser)
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.pandas.tests.data_type_ops.test_categorical_ops import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore[import]
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@ -1,158 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-
-import pandas as pd
-
-from pyspark.sql.types import DateType
-
-from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-
-
-class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
-    @property
-    def pser(self):
-        return pd.Series(
-            [datetime.date(1994, 1, 31), datetime.date(1994, 2, 1), datetime.date(1994, 2, 2)]
-        )
-
-    @property
-    def kser(self):
-        return ps.from_pandas(self.pser)
-
-    @property
-    def some_date(self):
-        return datetime.date(1994, 1, 1)
-
-    def test_add(self):
-        self.assertRaises(TypeError, lambda: self.kser + "x")
-        self.assertRaises(TypeError, lambda: self.kser + 1)
-        self.assertRaises(TypeError, lambda: self.kser + self.some_date)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser + kser)
-
-    def test_sub(self):
-        self.assertRaises(TypeError, lambda: self.kser - "x")
-        self.assertRaises(TypeError, lambda: self.kser - 1)
-        self.assert_eq(
-            (self.pser - self.some_date).dt.days, self.kser - self.some_date,
-        )
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.pser_kser_pairs:
-                if isinstance(kser.spark.data_type, DateType):
-                    self.assert_eq((self.pser - pser).dt.days, (self.kser - kser).sort_index())
-                else:
-                    self.assertRaises(TypeError, lambda: self.kser - kser)
-
-    def test_mul(self):
-        self.assertRaises(TypeError, lambda: self.kser * "x")
-        self.assertRaises(TypeError, lambda: self.kser * 1)
-        self.assertRaises(TypeError, lambda: self.kser * self.some_date)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser * kser)
-
-    def test_truediv(self):
-        self.assertRaises(TypeError, lambda: self.kser / "x")
-        self.assertRaises(TypeError, lambda: self.kser / 1)
-        self.assertRaises(TypeError, lambda: self.kser / self.some_date)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser / kser)
-
-    def test_floordiv(self):
-        self.assertRaises(TypeError, lambda: self.kser // "x")
-        self.assertRaises(TypeError, lambda: self.kser // 1)
-        self.assertRaises(TypeError, lambda: self.kser // self.some_date)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser // kser)
-
-    def test_mod(self):
-        self.assertRaises(TypeError, lambda: self.kser % "x")
-        self.assertRaises(TypeError, lambda: self.kser % 1)
-        self.assertRaises(TypeError, lambda: self.kser % self.some_date)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser % kser)
-
-    def test_pow(self):
-        self.assertRaises(TypeError, lambda: self.kser ** "x")
-        self.assertRaises(TypeError, lambda: self.kser ** 1)
-        self.assertRaises(TypeError, lambda: self.kser ** self.some_date)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser ** kser)
-
-    def test_radd(self):
-        self.assertRaises(TypeError, lambda: "x" + self.kser)
-        self.assertRaises(TypeError, lambda: 1 + self.kser)
-        self.assertRaises(TypeError, lambda: self.some_date + self.kser)
-
-    def test_rsub(self):
-        self.assertRaises(TypeError, lambda: "x" - self.kser)
-        self.assertRaises(TypeError, lambda: 1 - self.kser)
-        self.assert_eq(
-            (self.some_date - self.pser).dt.days, self.some_date - self.kser,
-        )
-
-    def test_rmul(self):
-        self.assertRaises(TypeError, lambda: "x" * self.kser)
-        self.assertRaises(TypeError, lambda: 1 * self.kser)
-        self.assertRaises(TypeError, lambda: self.some_date * self.kser)
-
-    def test_rtruediv(self):
-        self.assertRaises(TypeError, lambda: "x" / self.kser)
-        self.assertRaises(TypeError, lambda: 1 / self.kser)
-        self.assertRaises(TypeError, lambda: self.some_date / self.kser)
-
-    def test_rfloordiv(self):
-        self.assertRaises(TypeError, lambda: "x" // self.kser)
-        self.assertRaises(TypeError, lambda: 1 // self.kser)
-        self.assertRaises(TypeError, lambda: self.some_date // self.kser)
-
-    def test_rmod(self):
-        self.assertRaises(TypeError, lambda: 1 % self.kser)
-        self.assertRaises(TypeError, lambda: self.some_date % self.kser)
-
-    def test_rpow(self):
-        self.assertRaises(TypeError, lambda: "x" ** self.kser)
-        self.assertRaises(TypeError, lambda: 1 ** self.kser)
-        self.assertRaises(TypeError, lambda: self.some_date ** self.kser)
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.pandas.tests.data_type_ops.test_date_ops import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore[import]
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@ -1,160 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-
-import numpy as np
-import pandas as pd
-
-from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-
-
-class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
-    @property
-    def pser(self):
-        return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="M"))
-
-    @property
-    def kser(self):
-        return ps.from_pandas(self.pser)
-
-    @property
-    def some_datetime(self):
-        return datetime.datetime(1994, 1, 31, 10, 30, 00)
-
-    def test_add(self):
-        self.assertRaises(TypeError, lambda: self.kser + "x")
-        self.assertRaises(TypeError, lambda: self.kser + 1)
-        self.assertRaises(TypeError, lambda: self.kser + self.some_datetime)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser + kser)
-
-    def test_sub(self):
-        self.assertRaises(TypeError, lambda: self.kser - "x")
-        self.assertRaises(TypeError, lambda: self.kser - 1)
-        self.assert_eq(
-            (self.pser - self.some_datetime).dt.total_seconds().astype("int"),
-            self.kser - self.some_datetime,
-        )
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.pser_kser_pairs:
-                if pser.dtype == np.dtype("<M8[ns]"):
-                    self.assert_eq(
-                        (self.pser - pser).dt.total_seconds().astype("int"),
-                        (self.kser - kser).sort_index(),
-                    )
-                else:
-                    self.assertRaises(TypeError, lambda: self.kser - kser)
-
-    def test_mul(self):
-        self.assertRaises(TypeError, lambda: self.kser * "x")
-        self.assertRaises(TypeError, lambda: self.kser * 1)
-        self.assertRaises(TypeError, lambda: self.kser * self.some_datetime)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser * kser)
-
-    def test_truediv(self):
-        self.assertRaises(TypeError, lambda: self.kser / "x")
-        self.assertRaises(TypeError, lambda: self.kser / 1)
-        self.assertRaises(TypeError, lambda: self.kser / self.some_datetime)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser / kser)
-
-    def test_floordiv(self):
-        self.assertRaises(TypeError, lambda: self.kser // "x")
-        self.assertRaises(TypeError, lambda: self.kser // 1)
-        self.assertRaises(TypeError, lambda: self.kser // self.some_datetime)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser // kser)
-
-    def test_mod(self):
-        self.assertRaises(TypeError, lambda: self.kser % "x")
-        self.assertRaises(TypeError, lambda: self.kser % 1)
-        self.assertRaises(TypeError, lambda: self.kser % self.some_datetime)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser % kser)
-
-    def test_pow(self):
-        self.assertRaises(TypeError, lambda: self.kser ** "x")
-        self.assertRaises(TypeError, lambda: self.kser ** 1)
-        self.assertRaises(TypeError, lambda: self.kser ** self.some_datetime)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser ** kser)
-
-    def test_radd(self):
-        self.assertRaises(TypeError, lambda: "x" + self.kser)
-        self.assertRaises(TypeError, lambda: 1 + self.kser)
-        self.assertRaises(TypeError, lambda: self.some_datetime + self.kser)
-
-    def test_rsub(self):
-        self.assertRaises(TypeError, lambda: "x" - self.kser)
-        self.assertRaises(TypeError, lambda: 1 - self.kser)
-        self.assert_eq(
-            (self.some_datetime - self.pser).dt.total_seconds().astype("int"),
-            self.some_datetime - self.kser,
-        )
-
-    def test_rmul(self):
-        self.assertRaises(TypeError, lambda: "x" * self.kser)
-        self.assertRaises(TypeError, lambda: 1 * self.kser)
-        self.assertRaises(TypeError, lambda: self.some_datetime * self.kser)
-
-    def test_rtruediv(self):
-        self.assertRaises(TypeError, lambda: "x" / self.kser)
-        self.assertRaises(TypeError, lambda: 1 / self.kser)
-        self.assertRaises(TypeError, lambda: self.some_datetime / self.kser)
-
-    def test_rfloordiv(self):
-        self.assertRaises(TypeError, lambda: "x" // self.kser)
-        self.assertRaises(TypeError, lambda: 1 // self.kser)
-        self.assertRaises(TypeError, lambda: self.some_datetime // self.kser)
-
-    def test_rmod(self):
-        self.assertRaises(TypeError, lambda: 1 % self.kser)
-        self.assertRaises(TypeError, lambda: self.some_datetime % self.kser)
-
-    def test_rpow(self):
-        self.assertRaises(TypeError, lambda: "x" ** self.kser)
-        self.assertRaises(TypeError, lambda: 1 ** self.kser)
-        self.assertRaises(TypeError, lambda: self.some_datetime ** self.kser)
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.pandas.tests.data_type_ops.test_datetime_ops import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore[import]
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@ -1,195 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-import numpy as np
-
-from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-
-
-class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
-    """Unit tests for arithmetic operations of numeric data types.
-
-    A few test cases are disabled because pandas-on-Spark returns float64 whereas pandas
-    returns float32.
-    The underlying reason is the respective Spark operations return DoubleType always.
-    """
-    def test_add(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(pser + pser, kser + kser)
-            self.assert_eq(pser + 1, kser + 1)
-            # self.assert_eq(pser + 0.1, kser + 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                self.assertRaises(TypeError, lambda: kser + self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser + self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser + self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser + self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser + self.non_numeric_ksers["bool"])
-
-    def test_sub(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(pser - pser, kser - kser)
-            self.assert_eq(pser - 1, kser - 1)
-            # self.assert_eq(pser - 0.1, kser - 0.1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                self.assertRaises(TypeError, lambda: kser - self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser - self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser - self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser - self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser - self.non_numeric_ksers["bool"])
-
-    def test_mul(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(pser * pser, kser * kser)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                if kser.dtype in [int, np.int32]:
-                    self.assert_eq(
-                        (kser * self.non_numeric_ksers["string"]).sort_index(),
-                        pser * self.non_numeric_psers["string"],
-                    )
-                else:
-                    self.assertRaises(TypeError, lambda: kser * self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser * self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser * self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser * self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser * self.non_numeric_ksers["bool"])
-
-    def test_truediv(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            if kser.dtype in [float, int, np.int32]:
-                self.assert_eq(pser / pser, kser / kser)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                self.assertRaises(TypeError, lambda: kser / self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser / self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser / self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser / self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser / self.non_numeric_ksers["bool"])
-
-    def test_floordiv(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            if kser.dtype == float:
-                self.assert_eq(pser // pser, kser // kser)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                self.assertRaises(TypeError, lambda: kser // self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser // self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser // self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser // self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser // self.non_numeric_ksers["bool"])
-
-    def test_mod(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(pser % pser, kser % kser)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                self.assertRaises(TypeError, lambda: kser % self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser % self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser % self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser % self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser % self.non_numeric_ksers["bool"])
-
-    def test_pow(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            if kser.dtype == float:
-                self.assert_eq(pser ** pser, kser ** kser)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.numeric_pser_kser_pairs:
-                self.assertRaises(TypeError, lambda: kser ** self.non_numeric_ksers["string"])
-                self.assertRaises(TypeError, lambda: kser ** self.non_numeric_ksers["datetime"])
-                self.assertRaises(TypeError, lambda: kser ** self.non_numeric_ksers["date"])
-                self.assertRaises(TypeError, lambda: kser ** self.non_numeric_ksers["categorical"])
-                self.assertRaises(TypeError, lambda: kser ** self.non_numeric_ksers["bool"])
-
-    def test_radd(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(1 + pser, 1 + kser)
-            # self.assert_eq(0.1 + pser, 0.1 + kser)
-            self.assertRaises(TypeError, lambda: "x" + kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) + kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) + kser)
-
-    def test_rsub(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(1 - pser, 1 - kser)
-            # self.assert_eq(0.1 - pser, 0.1 - kser)
-            self.assertRaises(TypeError, lambda: "x" - kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) - kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) - kser)
-
-    def test_rmul(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(1 * pser, 1 * kser)
-            # self.assert_eq(0.1 * pser, 0.1 * kser)
-            self.assertRaises(TypeError, lambda: "x" * kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) * kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) * kser)
-
-    def test_rtruediv(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            # self.assert_eq(5 / pser, 5 / kser)
-            # self.assert_eq(0.1 / pser, 0.1 / kser)
-            self.assertRaises(TypeError, lambda: "x" + kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) / kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) / kser)
-
-    def test_rfloordiv(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            # self.assert_eq(5 // pser, 5 // kser)
-            # self.assert_eq(0.1 // pser, 0.1 // kser)
-            self.assertRaises(TypeError, lambda: "x" // kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) // kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) // kser)
-
-    def test_rpow(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            # self.assert_eq(1 ** pser, 1 ** kser)
-            # self.assert_eq(0.1 ** pser, 0.1 ** kser)
-            self.assertRaises(TypeError, lambda: "x" ** kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** kser)
-
-    def test_rmod(self):
-        for pser, kser in self.numeric_pser_kser_pairs:
-            self.assert_eq(1 % pser, 1 % kser)
-            # self.assert_eq(0.1 % pser, 0.1 % kser)
-            self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) % kser)
-            self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) % kser)
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.pandas.tests.data_type_ops.test_string_ops import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore[import]
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
--- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
@ -1,140 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import numpy as np
-import pandas as pd
-
-from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-
-
-class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils):
-    @property
-    def pser(self):
-        return pd.Series(["x", "y", "z"])
-
-    @property
-    def kser(self):
-        return ps.from_pandas(self.pser)
-
-    def test_add(self):
-        self.assert_eq(self.pser + "x", self.kser + "x")
-        self.assertRaises(TypeError, lambda: self.kser + 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            self.assert_eq(
-                self.pser + self.non_numeric_psers["string"],
-                (self.kser + self.non_numeric_ksers["string"]).sort_index(),
-            )
-            self.assertRaises(TypeError, lambda: self.kser + self.non_numeric_ksers["datetime"])
-            self.assertRaises(TypeError, lambda: self.kser + self.non_numeric_ksers["date"])
-            self.assertRaises(TypeError, lambda: self.kser + self.non_numeric_ksers["categorical"])
-            self.assertRaises(TypeError, lambda: self.kser + self.non_numeric_ksers["bool"])
-            for kser in self.numeric_ksers:
-                self.assertRaises(TypeError, lambda: self.kser + kser)
-
-    def test_sub(self):
-        self.assertRaises(TypeError, lambda: self.kser - "x")
-        self.assertRaises(TypeError, lambda: self.kser - 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser - kser)
-
-    def test_mul(self):
-        self.assertRaises(TypeError, lambda: self.kser * "x")
-        self.assert_eq(self.pser * 1, self.kser * 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for pser, kser in self.pser_kser_pairs:
-                if kser.dtype in [np.int64, np.int32]:
-                    self.assert_eq(self.pser * pser, (self.kser * kser).sort_index())
-                else:
-                    self.assertRaises(TypeError, lambda: self.kser * kser)
-
-    def test_truediv(self):
-        self.assertRaises(TypeError, lambda: self.kser / "x")
-        self.assertRaises(TypeError, lambda: self.kser / 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser / kser)
-
-    def test_floordiv(self):
-        self.assertRaises(TypeError, lambda: self.kser // "x")
-        self.assertRaises(TypeError, lambda: self.kser // 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser // kser)
-
-    def test_mod(self):
-        self.assertRaises(TypeError, lambda: self.kser % "x")
-        self.assertRaises(TypeError, lambda: self.kser % 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser % kser)
-
-    def test_pow(self):
-        self.assertRaises(TypeError, lambda: self.kser ** "x")
-        self.assertRaises(TypeError, lambda: self.kser ** 1)
-
-        with option_context("compute.ops_on_diff_frames", True):
-            for kser in self.ksers:
-                self.assertRaises(TypeError, lambda: self.kser ** kser)
-
-    def test_radd(self):
-        self.assert_eq("x" + self.pser, "x" + self.kser)
-        self.assertRaises(TypeError, lambda: 1 + self.kser)
-
-    def test_rsub(self):
-        self.assertRaises(TypeError, lambda: "x" - self.kser)
-        self.assertRaises(TypeError, lambda: 1 - self.kser)
-
-    def test_rmul(self):
-        self.assertRaises(TypeError, lambda: "x" * self.kser)
-        self.assert_eq(1 * self.pser, 1 * self.kser)
-
-    def test_rtruediv(self):
-        self.assertRaises(TypeError, lambda: "x" / self.kser)
-        self.assertRaises(TypeError, lambda: 1 / self.kser)
-
-    def test_rfloordiv(self):
-        self.assertRaises(TypeError, lambda: "x" // self.kser)
-        self.assertRaises(TypeError, lambda: 1 // self.kser)
-
-    def test_rmod(self):
-        self.assertRaises(TypeError, lambda: 1 % self.kser)
-
-    def test_rpow(self):
-        self.assertRaises(TypeError, lambda: "x" ** self.kser)
-        self.assertRaises(TypeError, lambda: 1 ** self.kser)
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.pandas.tests.data_type_ops.test_num_ops import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore[import]
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
--- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
@ -1,75 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import datetime
-import decimal
-
-import numpy as np
-import pandas as pd
-
-import pyspark.pandas as ps
-
-
-class TestCasesUtils(object):
-    """A utility holding common test cases for arithmetic operations of different data types."""
-    @property
-    def numeric_psers(self):
-        dtypes = [np.float32, float, int, np.int32]
-        sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes]
-        sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]))
-        return sers
-
-    @property
-    def numeric_ksers(self):
-        return [ps.from_pandas(pser) for pser in self.numeric_psers]
-
-    @property
-    def numeric_pser_kser_pairs(self):
-        return zip(self.numeric_psers, self.numeric_ksers)
-
-    @property
-    def non_numeric_psers(self):
-        psers = {
-            "string": pd.Series(["x", "y", "z"]),
-            "datetime": pd.to_datetime(pd.Series([1, 2, 3])),
-            "bool": pd.Series([True, True, False]),
-            "date": pd.Series(
-                [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]
-            ),
-            "categorical": pd.Series(["a", "b", "a"], dtype="category"),
-        }
-        return psers
-
-    @property
-    def non_numeric_ksers(self):
-        ksers = {}
-
-        for k, v in self.non_numeric_psers.items():
-            ksers[k] = ps.from_pandas(v)
-        return ksers
-
-    @property
-    def ksers(self):
-        return self.numeric_ksers + list(self.non_numeric_ksers.values())
-
-    @property
-    def psers(self):
-        return self.numeric_psers + list(self.non_numeric_psers.values())
-
-    @property
-    def pser_kser_pairs(self):
-        return zip(self.psers, self.ksers)
--- a/python/pyspark/pandas/tests/indexes/test_datetime.py
+++ b/python/pyspark/pandas/tests/indexes/test_datetime.py
@ -192,23 +192,21 @@ class DatetimeIndexTest(PandasOnSparkTestCase, TestUtils):
        for kidx, pidx in self.idx_pairs:
            py_datetime = pidx.to_pydatetime()
            for other in [1, 0.1, kidx, kidx.to_series().reset_index(drop=True), py_datetime]:
-                expected_err_msg = "Addition can not be applied to datetimes."
+                expected_err_msg = "addition can not be applied to date times."
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx + other)
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other + kidx)

-                expected_err_msg = "Multiplication can not be applied to datetimes."
+                expected_err_msg = "multiplication can not be applied to date times."
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx * other)
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other * kidx)

-                expected_err_msg = "True division can not be applied to datetimes."
+                expected_err_msg = "division can not be applied to date times."
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx / other)
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other / kidx)
-
-                expected_err_msg = "Floor division can not be applied to datetimes."
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx // other)
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other // kidx)

-                expected_err_msg = "Modulo can not be applied to datetimes."
+                expected_err_msg = "modulo can not be applied to date times."
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx % other)
                self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other % kidx)

--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@ -2355,7 +2355,7 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):

        # Negative
        kdf = ps.DataFrame({"a": ["x"], "b": [1]})
-        ks_err_msg = "subtraction can not be applied to string series or literals"
+        ks_err_msg = "substraction can not be applied to string series or literals"

        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: kdf["a"] - kdf["b"])
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: kdf["b"] - kdf["a"])
@ -2430,12 +2430,12 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: kdf["b"] * "literal")
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" * kdf["b"])
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: kdf["a"] * "literal")
+        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" * kdf["a"])

        ks_err_msg = "a string series can only be multiplied to an int series or literal"
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: kdf["a"] * kdf["a"])
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: kdf["a"] * 0.1)
        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 0.1 * kdf["a"])
-        self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" * kdf["a"])

    def test_sample(self):
        pdf = pd.DataFrame({"A": [0, 2, 4]})
--- a/python/pyspark/pandas/tests/test_series_datetime.py
+++ b/python/pyspark/pandas/tests/test_series_datetime.py
@ -84,23 +84,21 @@ class SeriesDateTimeTest(PandasOnSparkTestCase, SQLTestUtils):
        datetime_index = ps.Index(self.pd_start_date)

        for other in [1, 0.1, kser, datetime_index, py_datetime]:
-            expected_err_msg = "Addition can not be applied to datetimes."
+            expected_err_msg = "addition can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kser + other)
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other + kser)

-            expected_err_msg = "Multiplication can not be applied to datetimes."
+            expected_err_msg = "multiplication can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kser * other)
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other * kser)

-            expected_err_msg = "True division can not be applied to datetimes."
+            expected_err_msg = "division can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kser / other)
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other / kser)
-
-            expected_err_msg = "Floor division can not be applied to datetimes."
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kser // other)
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other // kser)

-            expected_err_msg = "Modulo can not be applied to datetimes."
+            expected_err_msg = "modulo can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kser % other)
            self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other % kser)

--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@ -70,7 +70,7 @@ class PandasOnSparkTestCase(unittest.TestCase, SQLTestUtils):
    def tearDownClass(cls):
        # We don't stop Spark session to reuse across all tests.
        # The Spark session will be started and stopped at PyTest session level.
-        # Please see pyspark/pandas/conftest.py.
+        # Please see databricks/koalas/conftest.py.
        pass

    def assertPandasEqual(self, left, right, check_exact=True):
--- a/python/setup.py
+++ b/python/setup.py
@ -221,7 +221,6 @@ try:
                  'pyspark.sbin',
                  'pyspark.jars',
                  'pyspark.pandas',
-                  'pyspark.pandas.data_type_ops',
                  'pyspark.pandas.indexes',
                  'pyspark.pandas.missing',
                  'pyspark.pandas.plot',