[SPARK-35032][PYTHON] Port Koalas Index unit tests into PySpark
### What changes were proposed in this pull request? Now that we merged the Koalas main code into the PySpark code base (#32036), we should port the Koalas Index unit tests to PySpark. ### Why are the changes needed? Currently, the pandas-on-Spark modules are not tested fully. We should enable the Index unit tests. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Enable Index unit tests. Closes #32139 from xinrong-databricks/port.indexes_tests. Authored-by: Xinrong Meng <xinrong.meng@databricks.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
ba92de0ae5
commit
4aee19efb4
|
@ -611,43 +611,47 @@ pyspark_pandas = Module(
|
|||
"pyspark.pandas.spark.utils",
|
||||
"pyspark.pandas.typedef.typehints",
|
||||
# unittests
|
||||
"pyspark.pandas.tests.test_dataframe",
|
||||
"pyspark.pandas.tests.test_config",
|
||||
"pyspark.pandas.tests.test_default_index",
|
||||
"pyspark.pandas.tests.test_extension",
|
||||
"pyspark.pandas.tests.test_internal",
|
||||
"pyspark.pandas.tests.test_numpy_compat",
|
||||
"pyspark.pandas.tests.test_typedef",
|
||||
"pyspark.pandas.tests.test_utils",
|
||||
"pyspark.pandas.tests.test_dataframe_conversion",
|
||||
"pyspark.pandas.tests.test_dataframe_spark_io",
|
||||
"pyspark.pandas.tests.test_frame_spark",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling",
|
||||
"pyspark.pandas.tests.test_series",
|
||||
"pyspark.pandas.tests.test_series_conversion",
|
||||
"pyspark.pandas.tests.test_series_datetime",
|
||||
"pyspark.pandas.tests.test_series_string",
|
||||
"pyspark.pandas.tests.test_categorical",
|
||||
"pyspark.pandas.tests.test_csv",
|
||||
"pyspark.pandas.tests.test_groupby",
|
||||
"pyspark.pandas.tests.test_expanding",
|
||||
"pyspark.pandas.tests.test_indexing",
|
||||
"pyspark.pandas.tests.test_namespace",
|
||||
"pyspark.pandas.tests.test_repr",
|
||||
"pyspark.pandas.tests.test_reshape",
|
||||
"pyspark.pandas.tests.test_rolling",
|
||||
"pyspark.pandas.tests.test_sql",
|
||||
"pyspark.pandas.tests.test_stats",
|
||||
"pyspark.pandas.tests.test_window",
|
||||
"pyspark.pandas.tests.indexes.test_base",
|
||||
"pyspark.pandas.tests.indexes.test_category",
|
||||
"pyspark.pandas.tests.indexes.test_datetime",
|
||||
"pyspark.pandas.tests.plot.test_frame_plot",
|
||||
"pyspark.pandas.tests.plot.test_frame_plot_matplotlib",
|
||||
"pyspark.pandas.tests.plot.test_frame_plot_plotly",
|
||||
"pyspark.pandas.tests.plot.test_series_plot",
|
||||
"pyspark.pandas.tests.plot.test_series_plot_matplotlib",
|
||||
"pyspark.pandas.tests.plot.test_series_plot_plotly",
|
||||
"pyspark.pandas.tests.test_categorical",
|
||||
"pyspark.pandas.tests.test_config",
|
||||
"pyspark.pandas.tests.test_csv",
|
||||
"pyspark.pandas.tests.test_dataframe",
|
||||
"pyspark.pandas.tests.test_dataframe_conversion",
|
||||
"pyspark.pandas.tests.test_dataframe_spark_io",
|
||||
"pyspark.pandas.tests.test_default_index",
|
||||
"pyspark.pandas.tests.test_expanding",
|
||||
"pyspark.pandas.tests.test_extension",
|
||||
"pyspark.pandas.tests.test_frame_spark",
|
||||
"pyspark.pandas.tests.test_groupby",
|
||||
"pyspark.pandas.tests.test_indexing",
|
||||
"pyspark.pandas.tests.test_indexops_spark",
|
||||
"pyspark.pandas.tests.test_internal",
|
||||
"pyspark.pandas.tests.test_namespace",
|
||||
"pyspark.pandas.tests.test_numpy_compat",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding",
|
||||
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling",
|
||||
"pyspark.pandas.tests.test_repr",
|
||||
"pyspark.pandas.tests.test_reshape",
|
||||
"pyspark.pandas.tests.test_rolling",
|
||||
"pyspark.pandas.tests.test_series",
|
||||
"pyspark.pandas.tests.test_series_conversion",
|
||||
"pyspark.pandas.tests.test_series_datetime",
|
||||
"pyspark.pandas.tests.test_series_string",
|
||||
"pyspark.pandas.tests.test_sql",
|
||||
"pyspark.pandas.tests.test_stats",
|
||||
"pyspark.pandas.tests.test_typedef",
|
||||
"pyspark.pandas.tests.test_utils",
|
||||
"pyspark.pandas.tests.test_window",
|
||||
],
|
||||
excluded_python_implementations=[
|
||||
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
|
||||
|
|
16
python/pyspark/pandas/tests/indexes/__init__.py
Normal file
16
python/pyspark/pandas/tests/indexes/__init__.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
2308
python/pyspark/pandas/tests/indexes/test_base.py
Normal file
2308
python/pyspark/pandas/tests/indexes/test_base.py
Normal file
File diff suppressed because it is too large
Load diff
124
python/pyspark/pandas/tests/indexes/test_category.py
Normal file
124
python/pyspark/pandas/tests/indexes/test_category.py
Normal file
|
@ -0,0 +1,124 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from distutils.version import LooseVersion
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
import pyspark.pandas as ps
|
||||
from pyspark.pandas.testing.utils import ReusedSQLTestCase, TestUtils
|
||||
|
||||
|
||||
class CategoricalIndexTest(ReusedSQLTestCase, TestUtils):
|
||||
def test_categorical_index(self):
|
||||
pidx = pd.CategoricalIndex([1, 2, 3])
|
||||
kidx = ps.CategoricalIndex([1, 2, 3])
|
||||
|
||||
self.assert_eq(kidx, pidx)
|
||||
self.assert_eq(kidx.categories, pidx.categories)
|
||||
self.assert_eq(kidx.codes, pd.Index(pidx.codes))
|
||||
self.assert_eq(kidx.ordered, pidx.ordered)
|
||||
|
||||
pidx = pd.Index([1, 2, 3], dtype="category")
|
||||
kidx = ps.Index([1, 2, 3], dtype="category")
|
||||
|
||||
self.assert_eq(kidx, pidx)
|
||||
self.assert_eq(kidx.categories, pidx.categories)
|
||||
self.assert_eq(kidx.codes, pd.Index(pidx.codes))
|
||||
self.assert_eq(kidx.ordered, pidx.ordered)
|
||||
|
||||
pdf = pd.DataFrame(
|
||||
{
|
||||
"a": pd.Categorical([1, 2, 3, 1, 2, 3]),
|
||||
"b": pd.Categorical(["a", "b", "c", "a", "b", "c"], categories=["c", "b", "a"]),
|
||||
},
|
||||
index=pd.Categorical([10, 20, 30, 20, 30, 10], categories=[30, 10, 20], ordered=True),
|
||||
)
|
||||
kdf = ps.from_pandas(pdf)
|
||||
|
||||
pidx = pdf.set_index("b").index
|
||||
kidx = kdf.set_index("b").index
|
||||
|
||||
self.assert_eq(kidx, pidx)
|
||||
self.assert_eq(kidx.categories, pidx.categories)
|
||||
self.assert_eq(kidx.codes, pd.Index(pidx.codes))
|
||||
self.assert_eq(kidx.ordered, pidx.ordered)
|
||||
|
||||
pidx = pdf.set_index(["a", "b"]).index.get_level_values(0)
|
||||
kidx = kdf.set_index(["a", "b"]).index.get_level_values(0)
|
||||
|
||||
self.assert_eq(kidx, pidx)
|
||||
self.assert_eq(kidx.categories, pidx.categories)
|
||||
self.assert_eq(kidx.codes, pd.Index(pidx.codes))
|
||||
self.assert_eq(kidx.ordered, pidx.ordered)
|
||||
|
||||
def test_astype(self):
|
||||
pidx = pd.Index(["a", "b", "c"])
|
||||
kidx = ps.from_pandas(pidx)
|
||||
|
||||
self.assert_eq(kidx.astype("category"), pidx.astype("category"))
|
||||
self.assert_eq(
|
||||
kidx.astype(CategoricalDtype(["c", "a", "b"])),
|
||||
pidx.astype(CategoricalDtype(["c", "a", "b"])),
|
||||
)
|
||||
|
||||
pcidx = pidx.astype(CategoricalDtype(["c", "a", "b"]))
|
||||
kcidx = kidx.astype(CategoricalDtype(["c", "a", "b"]))
|
||||
|
||||
self.assert_eq(kcidx.astype("category"), pcidx.astype("category"))
|
||||
|
||||
if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
|
||||
self.assert_eq(
|
||||
kcidx.astype(CategoricalDtype(["b", "c", "a"])),
|
||||
pcidx.astype(CategoricalDtype(["b", "c", "a"])),
|
||||
)
|
||||
else:
|
||||
self.assert_eq(
|
||||
kcidx.astype(CategoricalDtype(["b", "c", "a"])),
|
||||
pidx.astype(CategoricalDtype(["b", "c", "a"])),
|
||||
)
|
||||
|
||||
self.assert_eq(kcidx.astype(str), pcidx.astype(str))
|
||||
|
||||
def test_factorize(self):
|
||||
pidx = pd.CategoricalIndex([1, 2, 3, None])
|
||||
kidx = ps.from_pandas(pidx)
|
||||
|
||||
pcodes, puniques = pidx.factorize()
|
||||
kcodes, kuniques = kidx.factorize()
|
||||
|
||||
self.assert_eq(kcodes.tolist(), pcodes.tolist())
|
||||
self.assert_eq(kuniques, puniques)
|
||||
|
||||
pcodes, puniques = pidx.factorize(na_sentinel=-2)
|
||||
kcodes, kuniques = kidx.factorize(na_sentinel=-2)
|
||||
|
||||
self.assert_eq(kcodes.tolist(), pcodes.tolist())
|
||||
self.assert_eq(kuniques, puniques)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
from pyspark.pandas.tests.indexes.test_category import * # noqa: F401
|
||||
|
||||
try:
|
||||
import xmlrunner # type: ignore[import]
|
||||
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
|
||||
except ImportError:
|
||||
testRunner = None
|
||||
unittest.main(testRunner=testRunner, verbosity=2)
|
232
python/pyspark/pandas/tests/indexes/test_datetime.py
Normal file
232
python/pyspark/pandas/tests/indexes/test_datetime.py
Normal file
|
@ -0,0 +1,232 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import datetime
|
||||
|
||||
from distutils.version import LooseVersion
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import pyspark.pandas as ps
|
||||
from pyspark.pandas.testing.utils import ReusedSQLTestCase, TestUtils
|
||||
|
||||
|
||||
class DatetimeIndexTest(ReusedSQLTestCase, TestUtils):
|
||||
@property
|
||||
def fixed_freqs(self):
|
||||
return [
|
||||
"D",
|
||||
"H",
|
||||
"T", # min
|
||||
"S",
|
||||
"L", # ms
|
||||
"U", # us
|
||||
# 'N' not supported
|
||||
]
|
||||
|
||||
@property
|
||||
def non_fixed_freqs(self):
|
||||
return ["W", "Q"]
|
||||
|
||||
@property
|
||||
def pidxs(self):
|
||||
return [
|
||||
pd.DatetimeIndex([0]),
|
||||
pd.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]),
|
||||
] + [
|
||||
pd.date_range("2000-01-01", periods=3, freq=freq)
|
||||
for freq in (self.fixed_freqs + self.non_fixed_freqs)
|
||||
]
|
||||
|
||||
@property
|
||||
def kidxs(self):
|
||||
return [ps.from_pandas(pidx) for pidx in self.pidxs]
|
||||
|
||||
@property
|
||||
def idx_pairs(self):
|
||||
return list(zip(self.kidxs, self.pidxs))
|
||||
|
||||
def _disallow_nanoseconds(self, f):
|
||||
self.assertRaises(ValueError, lambda: f(freq="ns"))
|
||||
self.assertRaises(ValueError, lambda: f(freq="N"))
|
||||
|
||||
def test_properties(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(kidx.year, pidx.year)
|
||||
self.assert_eq(kidx.month, pidx.month)
|
||||
self.assert_eq(kidx.day, pidx.day)
|
||||
self.assert_eq(kidx.hour, pidx.hour)
|
||||
self.assert_eq(kidx.minute, pidx.minute)
|
||||
self.assert_eq(kidx.second, pidx.second)
|
||||
self.assert_eq(kidx.microsecond, pidx.microsecond)
|
||||
self.assert_eq(kidx.week, pidx.week)
|
||||
self.assert_eq(kidx.weekofyear, pidx.weekofyear)
|
||||
self.assert_eq(kidx.dayofweek, pidx.dayofweek)
|
||||
self.assert_eq(kidx.weekday, pidx.weekday)
|
||||
self.assert_eq(kidx.dayofyear, pidx.dayofyear)
|
||||
self.assert_eq(kidx.quarter, pidx.quarter)
|
||||
self.assert_eq(kidx.daysinmonth, pidx.daysinmonth)
|
||||
self.assert_eq(kidx.days_in_month, pidx.days_in_month)
|
||||
self.assert_eq(kidx.is_month_start, pd.Index(pidx.is_month_start))
|
||||
self.assert_eq(kidx.is_month_end, pd.Index(pidx.is_month_end))
|
||||
self.assert_eq(kidx.is_quarter_start, pd.Index(pidx.is_quarter_start))
|
||||
self.assert_eq(kidx.is_quarter_end, pd.Index(pidx.is_quarter_end))
|
||||
self.assert_eq(kidx.is_year_start, pd.Index(pidx.is_year_start))
|
||||
self.assert_eq(kidx.is_year_end, pd.Index(pidx.is_year_end))
|
||||
self.assert_eq(kidx.is_leap_year, pd.Index(pidx.is_leap_year))
|
||||
|
||||
if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
|
||||
self.assert_eq(kidx.day_of_year, pidx.day_of_year)
|
||||
self.assert_eq(kidx.day_of_week, pidx.day_of_week)
|
||||
|
||||
def test_ceil(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
for freq in self.fixed_freqs:
|
||||
self.assert_eq(kidx.ceil(freq), pidx.ceil(freq))
|
||||
|
||||
self._disallow_nanoseconds(self.kidxs[0].ceil)
|
||||
|
||||
def test_floor(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
for freq in self.fixed_freqs:
|
||||
self.assert_eq(kidx.floor(freq), pidx.floor(freq))
|
||||
|
||||
self._disallow_nanoseconds(self.kidxs[0].floor)
|
||||
|
||||
def test_round(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
for freq in self.fixed_freqs:
|
||||
self.assert_eq(kidx.round(freq), pidx.round(freq))
|
||||
|
||||
self._disallow_nanoseconds(self.kidxs[0].round)
|
||||
|
||||
def test_day_name(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(kidx.day_name(), pidx.day_name())
|
||||
|
||||
def test_month_name(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(kidx.day_name(), pidx.day_name())
|
||||
|
||||
def test_normalize(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(kidx.normalize(), pidx.normalize())
|
||||
|
||||
def test_strftime(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(
|
||||
kidx.strftime(date_format="%B %d, %Y"), pidx.strftime(date_format="%B %d, %Y")
|
||||
)
|
||||
|
||||
def test_indexer_between_time(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(
|
||||
kidx.indexer_between_time("00:00:00", "00:01:00").sort_values(),
|
||||
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00")),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_between_time(
|
||||
datetime.time(0, 0, 0), datetime.time(0, 1, 0)
|
||||
).sort_values(),
|
||||
pd.Index(pidx.indexer_between_time(datetime.time(0, 0, 0), datetime.time(0, 1, 0))),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_between_time("00:00:00", "00:01:00", True, False).sort_values(),
|
||||
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", True, False)),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_between_time("00:00:00", "00:01:00", False, True).sort_values(),
|
||||
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", False, True)),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_between_time("00:00:00", "00:01:00", False, False).sort_values(),
|
||||
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", False, False)),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_between_time("00:00:00", "00:01:00", True, True).sort_values(),
|
||||
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", True, True)),
|
||||
)
|
||||
|
||||
def test_indexer_at_time(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
self.assert_eq(
|
||||
kidx.indexer_at_time("00:00:00").sort_values(),
|
||||
pd.Index(pidx.indexer_at_time("00:00:00")),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_at_time(datetime.time(0, 1, 0)).sort_values(),
|
||||
pd.Index(pidx.indexer_at_time(datetime.time(0, 1, 0))),
|
||||
)
|
||||
|
||||
self.assert_eq(
|
||||
kidx.indexer_at_time("00:00:01").sort_values(),
|
||||
pd.Index(pidx.indexer_at_time("00:00:01")),
|
||||
)
|
||||
|
||||
self.assertRaises(
|
||||
NotImplementedError,
|
||||
lambda: ps.DatetimeIndex([0]).indexer_at_time("00:00:00", asof=True),
|
||||
)
|
||||
|
||||
def test_arithmetic_op_exceptions(self):
|
||||
for kidx, pidx in self.idx_pairs:
|
||||
py_datetime = pidx.to_pydatetime()
|
||||
for other in [1, 0.1, kidx, kidx.to_series().reset_index(drop=True), py_datetime]:
|
||||
expected_err_msg = "addition can not be applied to date times."
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx + other)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other + kidx)
|
||||
|
||||
expected_err_msg = "multiplication can not be applied to date times."
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx * other)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other * kidx)
|
||||
|
||||
expected_err_msg = "division can not be applied to date times."
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx / other)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other / kidx)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx // other)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other // kidx)
|
||||
|
||||
expected_err_msg = "modulo can not be applied to date times."
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx % other)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other % kidx)
|
||||
|
||||
expected_err_msg = "datetime subtraction can only be applied to datetime series."
|
||||
|
||||
for other in [1, 0.1]:
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx - other)
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other - kidx)
|
||||
|
||||
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: kidx - other)
|
||||
self.assertRaises(NotImplementedError, lambda: py_datetime - kidx)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
from pyspark.pandas.tests.indexes.test_datetime import * # noqa: F401
|
||||
|
||||
try:
|
||||
import xmlrunner # type: ignore[import]
|
||||
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
|
||||
except ImportError:
|
||||
testRunner = None
|
||||
unittest.main(testRunner=testRunner, verbosity=2)
|
74
python/pyspark/pandas/tests/test_indexops_spark.py
Normal file
74
python/pyspark/pandas/tests/test_indexops_spark.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import pandas as pd
|
||||
from pyspark.sql.utils import AnalysisException
|
||||
from pyspark.sql import functions as F
|
||||
|
||||
from pyspark import pandas as ps
|
||||
from pyspark.pandas.testing.utils import ReusedSQLTestCase, SQLTestUtils
|
||||
|
||||
|
||||
class SparkIndexOpsMethodsTest(ReusedSQLTestCase, SQLTestUtils):
|
||||
@property
|
||||
def pser(self):
|
||||
return pd.Series([1, 2, 3, 4, 5, 6, 7], name="x")
|
||||
|
||||
@property
|
||||
def kser(self):
|
||||
return ps.from_pandas(self.pser)
|
||||
|
||||
def test_series_transform_negative(self):
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, "The output of the function.* pyspark.sql.Column.*int"
|
||||
):
|
||||
self.kser.spark.transform(lambda scol: 1)
|
||||
|
||||
with self.assertRaisesRegex(AnalysisException, "cannot resolve.*non-existent.*"):
|
||||
self.kser.spark.transform(lambda scol: F.col("non-existent"))
|
||||
|
||||
def test_multiindex_transform_negative(self):
|
||||
with self.assertRaisesRegex(
|
||||
NotImplementedError, "MultiIndex does not support spark.transform yet"
|
||||
):
|
||||
midx = pd.MultiIndex(
|
||||
[["lama", "cow", "falcon"], ["speed", "weight", "length"]],
|
||||
[[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 1, 1, 1, 2, 1, 2, 2]],
|
||||
)
|
||||
s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
|
||||
s.index.spark.transform(lambda scol: scol)
|
||||
|
||||
def test_series_apply_negative(self):
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, "The output of the function.* pyspark.sql.Column.*int"
|
||||
):
|
||||
self.kser.spark.apply(lambda scol: 1)
|
||||
|
||||
with self.assertRaisesRegex(AnalysisException, "cannot resolve.*non-existent.*"):
|
||||
self.kser.spark.transform(lambda scol: F.col("non-existent"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
from pyspark.pandas.tests.test_indexops_spark import * # noqa: F401
|
||||
|
||||
try:
|
||||
import xmlrunner # type: ignore[import]
|
||||
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
|
||||
except ImportError:
|
||||
testRunner = None
|
||||
unittest.main(testRunner=testRunner, verbosity=2)
|
Loading…
Reference in a new issue