spark-instrumented-optimizer/python/pyspark/pandas/tests/test_utils.py
Xinrong Meng 47d62af2a9 [SPARK-35035][PYTHON] Port Koalas internal implementation unit tests into PySpark
### What changes were proposed in this pull request?
Now that we merged the Koalas main code into the PySpark code base (#32036), we should port the Koalas internal implementation unit tests to PySpark.

### Why are the changes needed?
Currently, the pandas-on-Spark modules are not tested fully. We should enable the internal implementation unit tests.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Enable internal implementation unit tests.

Closes #32137 from xinrong-databricks/port.test_internal_impl.

Lead-authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Co-authored-by: xinrong-databricks <47337188+xinrong-databricks@users.noreply.github.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
2021-04-14 13:59:33 +09:00

105 lines
3.6 KiB
Python

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pandas as pd
from pyspark.pandas.testing.utils import ReusedSQLTestCase, SQLTestUtils
from pyspark.pandas.utils import (
lazy_property,
validate_arguments_and_invoke_function,
validate_bool_kwarg,
)
some_global_variable = 0
class UtilsTest(ReusedSQLTestCase, SQLTestUtils):
# a dummy to_html version with an extra parameter that pandas does not support
# used in test_validate_arguments_and_invoke_function
def to_html(self, max_rows=None, unsupported_param=None):
args = locals()
pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 3])
validate_arguments_and_invoke_function(pdf, self.to_html, pd.DataFrame.to_html, args)
def to_clipboard(self, sep=",", **kwargs):
args = locals()
pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 3])
validate_arguments_and_invoke_function(
pdf, self.to_clipboard, pd.DataFrame.to_clipboard, args
)
# Support for **kwargs
self.to_clipboard(sep=",", index=False)
def test_validate_arguments_and_invoke_function(self):
# This should pass and run fine
self.to_html()
self.to_html(unsupported_param=None)
self.to_html(max_rows=5)
# This should fail because we are explicitly setting an unsupported param
# to a non-default value
with self.assertRaises(TypeError):
self.to_html(unsupported_param=1)
def test_lazy_property(self):
obj = TestClassForLazyProp()
# If lazy prop is not working, the second test would fail (because it'd be 2)
self.assert_eq(obj.lazy_prop, 1)
self.assert_eq(obj.lazy_prop, 1)
def test_validate_bool_kwarg(self):
# This should pass and run fine
koalas = True
self.assert_eq(validate_bool_kwarg(koalas, "koalas"), True)
koalas = False
self.assert_eq(validate_bool_kwarg(koalas, "koalas"), False)
koalas = None
self.assert_eq(validate_bool_kwarg(koalas, "koalas"), None)
# This should fail because we are explicitly setting a non-boolean value
koalas = "true"
with self.assertRaisesRegex(
ValueError, 'For argument "koalas" expected type bool, received type str.'
):
validate_bool_kwarg(koalas, "koalas")
class TestClassForLazyProp:
def __init__(self):
self.some_variable = 0
@lazy_property
def lazy_prop(self):
self.some_variable += 1
return self.some_variable
if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.test_utils import * # noqa: F401
try:
import xmlrunner # type: ignore[import]
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)