spark-instrumented-optimizer/python/pyspark/tests/test_worker.py

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import sys
import tempfile
import threading
import time
import unittest
has_resource_module = True
try:
    import resource
except ImportError:
    has_resource_module = False

from py4j.protocol import Py4JJavaError

from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest

if sys.version_info[0] >= 3:
    xrange = range


class WorkerTests(ReusedPySparkTestCase):
    def test_cancel_task(self):
        temp = tempfile.NamedTemporaryFile(delete=True)
        temp.close()
        path = temp.name

        def sleep(x):
            import os
            import time
            with open(path, 'w') as f:
                f.write("%d %d" % (os.getppid(), os.getpid()))
            time.sleep(100)

        # start job in background thread
        def run():
            try:
                self.sc.parallelize(range(1), 1).foreach(sleep)
            except Exception:
                pass
        import threading
        t = threading.Thread(target=run)
        t.daemon = True
        t.start()

        daemon_pid, worker_pid = 0, 0
        while True:
            if os.path.exists(path):
                with open(path) as f:
                    data = f.read().split(' ')
                daemon_pid, worker_pid = map(int, data)
                break
            time.sleep(0.1)

        # cancel jobs
        self.sc.cancelAllJobs()
        t.join()

        for i in range(50):
            try:
                os.kill(worker_pid, 0)
                time.sleep(0.1)
            except OSError:
                break  # worker was killed
        else:
            self.fail("worker has not been killed after 5 seconds")

        try:
            os.kill(daemon_pid, 0)
        except OSError:
            self.fail("daemon had been killed")

        # run a normal job
        rdd = self.sc.parallelize(xrange(100), 1)
        self.assertEqual(100, rdd.map(str).count())

    def test_after_exception(self):
        def raise_exception(_):
            raise Exception()
        rdd = self.sc.parallelize(xrange(100), 1)
        with QuietTest(self.sc):
            self.assertRaises(Exception, lambda: rdd.foreach(raise_exception))
        self.assertEqual(100, rdd.map(str).count())

    def test_after_jvm_exception(self):
        tempFile = tempfile.NamedTemporaryFile(delete=False)
        tempFile.write(b"Hello World!")
        tempFile.close()
        data = self.sc.textFile(tempFile.name, 1)
        filtered_data = data.filter(lambda x: True)
        self.assertEqual(1, filtered_data.count())
        os.unlink(tempFile.name)
        with QuietTest(self.sc):
            self.assertRaises(Exception, lambda: filtered_data.count())

        rdd = self.sc.parallelize(xrange(100), 1)
        self.assertEqual(100, rdd.map(str).count())

    def test_accumulator_when_reuse_worker(self):
        from pyspark.accumulators import INT_ACCUMULATOR_PARAM
        acc1 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
        self.sc.parallelize(xrange(100), 20).foreach(lambda x: acc1.add(x))
        self.assertEqual(sum(range(100)), acc1.value)

        acc2 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
        self.sc.parallelize(xrange(100), 20).foreach(lambda x: acc2.add(x))
        self.assertEqual(sum(range(100)), acc2.value)
        self.assertEqual(sum(range(100)), acc1.value)

    def test_reuse_worker_after_take(self):
        rdd = self.sc.parallelize(xrange(100000), 1)
        self.assertEqual(0, rdd.first())

        def count():
            try:
                rdd.count()
            except Exception:
                pass

        t = threading.Thread(target=count)
        t.daemon = True
        t.start()
        t.join(5)
        self.assertTrue(not t.isAlive())
        self.assertEqual(100000, rdd.count())

    def test_with_different_versions_of_python(self):
        rdd = self.sc.parallelize(range(10))
        rdd.count()
        version = self.sc.pythonVer
        self.sc.pythonVer = "2.0"
        try:
            with QuietTest(self.sc):
                self.assertRaises(Py4JJavaError, lambda: rdd.count())
        finally:
            self.sc.pythonVer = version


class WorkerReuseTest(PySparkTestCase):

    def test_reuse_worker_of_parallelize_xrange(self):
        rdd = self.sc.parallelize(xrange(20), 8)
        previous_pids = rdd.map(lambda x: os.getpid()).collect()
        current_pids = rdd.map(lambda x: os.getpid()).collect()
        for pid in current_pids:
            self.assertTrue(pid in previous_pids)


@unittest.skipIf(
    not has_resource_module,
    "Memory limit feature in Python worker is dependent on "
    "Python's 'resource' module; however, not found.")
class WorkerMemoryTest(PySparkTestCase):

    def test_memory_limit(self):
        self.sc._conf.set("spark.executor.pyspark.memory", "1m")
        rdd = self.sc.parallelize(xrange(1), 1)

        def getrlimit():
            import resource
            return resource.getrlimit(resource.RLIMIT_AS)

        actual = rdd.map(lambda _: getrlimit()).collect()
        self.assertTrue(len(actual) == 1)
        self.assertTrue(len(actual[0]) == 2)
        [(soft_limit, hard_limit)] = actual
        self.assertEqual(soft_limit, 1024 * 1024)
        self.assertEqual(hard_limit, 1024 * 1024)


if __name__ == "__main__":
    import unittest
    from pyspark.tests.test_worker import *

    try:
        import xmlrunner
        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
    except ImportError:
        testRunner = None
    unittest.main(testRunner=testRunner, verbosity=2)
[SPARK-26036][PYTHON] Break large tests.py files into smaller files ## What changes were proposed in this pull request? This PR continues to break down a big large file into smaller files. See https://github.com/apache/spark/pull/23021. It targets to follow https://github.com/numpy/numpy/tree/master/numpy. Basically this PR proposes to break down `pyspark/tests.py` into ...: ``` pyspark ... ├── testing ... │ └── utils.py ├── tests │ ├── __init__.py │ ├── test_appsubmit.py │ ├── test_broadcast.py │ ├── test_conf.py │ ├── test_context.py │ ├── test_daemon.py │ ├── test_join.py │ ├── test_profiler.py │ ├── test_rdd.py │ ├── test_readwrite.py │ ├── test_serializers.py │ ├── test_shuffle.py │ ├── test_taskcontext.py │ ├── test_util.py │ └── test_worker.py ... ``` ## How was this patch tested? Existing tests should cover. `cd python` and .`/run-tests-with-coverage`. Manually checked they are actually being ran. Each test (not officially) can be ran via: ```bash SPARK_TESTING=1 ./bin/pyspark pyspark.tests.test_context ``` Note that if you're using Mac and Python 3, you might have to `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`. Closes #23033 from HyukjinKwon/SPARK-26036. Authored-by: hyukjinkwon <gurwls223@apache.org> Signed-off-by: hyukjinkwon <gurwls223@apache.org> 2018-11-14 23:30:52 -05:00			`#`
			`# Licensed to the Apache Software Foundation (ASF) under one or more`
			`# contributor license agreements. See the NOTICE file distributed with`
			`# this work for additional information regarding copyright ownership.`
			`# The ASF licenses this file to You under the Apache License, Version 2.0`
			`# (the "License"); you may not use this file except in compliance with`
			`# the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`
			`import os`
			`import sys`
			`import tempfile`
			`import threading`
			`import time`
[SPARK-26743][PYTHON] Adds a test to check the actual resource limit set via 'spark.executor.pyspark.memory' ## What changes were proposed in this pull request? https://github.com/apache/spark/pull/21977 added a feature to limit Python worker resource limit. This PR is kind of a followup of it. It proposes to add a test that checks the actual resource limit set by 'spark.executor.pyspark.memory'. ## How was this patch tested? Unit tests were added. Closes #23663 from HyukjinKwon/test_rlimit. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-01-27 21:02:27 -05:00			`import unittest`
			`has_resource_module = True`
			`try:`
			`import resource`
			`except ImportError:`
			`has_resource_module = False`
[SPARK-26036][PYTHON] Break large tests.py files into smaller files ## What changes were proposed in this pull request? This PR continues to break down a big large file into smaller files. See https://github.com/apache/spark/pull/23021. It targets to follow https://github.com/numpy/numpy/tree/master/numpy. Basically this PR proposes to break down `pyspark/tests.py` into ...: ``` pyspark ... ├── testing ... │ └── utils.py ├── tests │ ├── __init__.py │ ├── test_appsubmit.py │ ├── test_broadcast.py │ ├── test_conf.py │ ├── test_context.py │ ├── test_daemon.py │ ├── test_join.py │ ├── test_profiler.py │ ├── test_rdd.py │ ├── test_readwrite.py │ ├── test_serializers.py │ ├── test_shuffle.py │ ├── test_taskcontext.py │ ├── test_util.py │ └── test_worker.py ... ``` ## How was this patch tested? Existing tests should cover. `cd python` and .`/run-tests-with-coverage`. Manually checked they are actually being ran. Each test (not officially) can be ran via: ```bash SPARK_TESTING=1 ./bin/pyspark pyspark.tests.test_context ``` Note that if you're using Mac and Python 3, you might have to `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`. Closes #23033 from HyukjinKwon/SPARK-26036. Authored-by: hyukjinkwon <gurwls223@apache.org> Signed-off-by: hyukjinkwon <gurwls223@apache.org> 2018-11-14 23:30:52 -05:00
			`from py4j.protocol import Py4JJavaError`

[SPARK-26549][PYSPARK] Fix for python worker reuse take no effect for parallelize lazy iterable range ## What changes were proposed in this pull request? During the follow-up work(#23435) for PySpark worker reuse scenario, we found that the worker reuse takes no effect for `sc.parallelize(xrange(...))`. It happened because of the specialize rdd.parallelize logic for xrange(introduced in #3264) generated data by lazy iterable range, which don't need to use the passed-in iterator. But this will break the end of stream checking in python worker and finally cause worker reuse takes no effect. See more details in [SPARK-26549](https://issues.apache.org/jira/browse/SPARK-26549) description. We fix this by force using the passed-in iterator. ## How was this patch tested? New UT in test_worker.py. Closes #23470 from xuanyuanking/SPARK-26549. Authored-by: Yuanjian Li <xyliyuanjian@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-01-08 22:55:12 -05:00			`from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest`
[SPARK-26036][PYTHON] Break large tests.py files into smaller files ## What changes were proposed in this pull request? This PR continues to break down a big large file into smaller files. See https://github.com/apache/spark/pull/23021. It targets to follow https://github.com/numpy/numpy/tree/master/numpy. Basically this PR proposes to break down `pyspark/tests.py` into ...: ``` pyspark ... ├── testing ... │ └── utils.py ├── tests │ ├── __init__.py │ ├── test_appsubmit.py │ ├── test_broadcast.py │ ├── test_conf.py │ ├── test_context.py │ ├── test_daemon.py │ ├── test_join.py │ ├── test_profiler.py │ ├── test_rdd.py │ ├── test_readwrite.py │ ├── test_serializers.py │ ├── test_shuffle.py │ ├── test_taskcontext.py │ ├── test_util.py │ └── test_worker.py ... ``` ## How was this patch tested? Existing tests should cover. `cd python` and .`/run-tests-with-coverage`. Manually checked they are actually being ran. Each test (not officially) can be ran via: ```bash SPARK_TESTING=1 ./bin/pyspark pyspark.tests.test_context ``` Note that if you're using Mac and Python 3, you might have to `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`. Closes #23033 from HyukjinKwon/SPARK-26036. Authored-by: hyukjinkwon <gurwls223@apache.org> Signed-off-by: hyukjinkwon <gurwls223@apache.org> 2018-11-14 23:30:52 -05:00
			`if sys.version_info[0] >= 3:`
			`xrange = range`


			`class WorkerTests(ReusedPySparkTestCase):`
			`def test_cancel_task(self):`
			`temp = tempfile.NamedTemporaryFile(delete=True)`
			`temp.close()`
			`path = temp.name`

			`def sleep(x):`
			`import os`
			`import time`
			`with open(path, 'w') as f:`
			`f.write("%d %d" % (os.getppid(), os.getpid()))`
			`time.sleep(100)`

			`# start job in background thread`
			`def run():`
			`try:`
			`self.sc.parallelize(range(1), 1).foreach(sleep)`
			`except Exception:`
			`pass`
			`import threading`
			`t = threading.Thread(target=run)`
			`t.daemon = True`
			`t.start()`

			`daemon_pid, worker_pid = 0, 0`
			`while True:`
			`if os.path.exists(path):`
			`with open(path) as f:`
			`data = f.read().split(' ')`
			`daemon_pid, worker_pid = map(int, data)`
			`break`
			`time.sleep(0.1)`

			`# cancel jobs`
			`self.sc.cancelAllJobs()`
			`t.join()`

			`for i in range(50):`
			`try:`
			`os.kill(worker_pid, 0)`
			`time.sleep(0.1)`
			`except OSError:`
			`break # worker was killed`
			`else:`
			`self.fail("worker has not been killed after 5 seconds")`

			`try:`
			`os.kill(daemon_pid, 0)`
			`except OSError:`
			`self.fail("daemon had been killed")`

			`# run a normal job`
			`rdd = self.sc.parallelize(xrange(100), 1)`
			`self.assertEqual(100, rdd.map(str).count())`

			`def test_after_exception(self):`
			`def raise_exception(_):`
			`raise Exception()`
			`rdd = self.sc.parallelize(xrange(100), 1)`
			`with QuietTest(self.sc):`
			`self.assertRaises(Exception, lambda: rdd.foreach(raise_exception))`
			`self.assertEqual(100, rdd.map(str).count())`

			`def test_after_jvm_exception(self):`
			`tempFile = tempfile.NamedTemporaryFile(delete=False)`
			`tempFile.write(b"Hello World!")`
			`tempFile.close()`
			`data = self.sc.textFile(tempFile.name, 1)`
			`filtered_data = data.filter(lambda x: True)`
			`self.assertEqual(1, filtered_data.count())`
			`os.unlink(tempFile.name)`
			`with QuietTest(self.sc):`
			`self.assertRaises(Exception, lambda: filtered_data.count())`

			`rdd = self.sc.parallelize(xrange(100), 1)`
			`self.assertEqual(100, rdd.map(str).count())`

			`def test_accumulator_when_reuse_worker(self):`
			`from pyspark.accumulators import INT_ACCUMULATOR_PARAM`
			`acc1 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)`
			`self.sc.parallelize(xrange(100), 20).foreach(lambda x: acc1.add(x))`
			`self.assertEqual(sum(range(100)), acc1.value)`

			`acc2 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)`
			`self.sc.parallelize(xrange(100), 20).foreach(lambda x: acc2.add(x))`
			`self.assertEqual(sum(range(100)), acc2.value)`
			`self.assertEqual(sum(range(100)), acc1.value)`

			`def test_reuse_worker_after_take(self):`
			`rdd = self.sc.parallelize(xrange(100000), 1)`
			`self.assertEqual(0, rdd.first())`

			`def count():`
			`try:`
			`rdd.count()`
			`except Exception:`
			`pass`

			`t = threading.Thread(target=count)`
			`t.daemon = True`
			`t.start()`
			`t.join(5)`
			`self.assertTrue(not t.isAlive())`
			`self.assertEqual(100000, rdd.count())`

			`def test_with_different_versions_of_python(self):`
			`rdd = self.sc.parallelize(range(10))`
			`rdd.count()`
			`version = self.sc.pythonVer`
			`self.sc.pythonVer = "2.0"`
			`try:`
			`with QuietTest(self.sc):`
			`self.assertRaises(Py4JJavaError, lambda: rdd.count())`
			`finally:`
			`self.sc.pythonVer = version`


[SPARK-26549][PYSPARK] Fix for python worker reuse take no effect for parallelize lazy iterable range ## What changes were proposed in this pull request? During the follow-up work(#23435) for PySpark worker reuse scenario, we found that the worker reuse takes no effect for `sc.parallelize(xrange(...))`. It happened because of the specialize rdd.parallelize logic for xrange(introduced in #3264) generated data by lazy iterable range, which don't need to use the passed-in iterator. But this will break the end of stream checking in python worker and finally cause worker reuse takes no effect. See more details in [SPARK-26549](https://issues.apache.org/jira/browse/SPARK-26549) description. We fix this by force using the passed-in iterator. ## How was this patch tested? New UT in test_worker.py. Closes #23470 from xuanyuanking/SPARK-26549. Authored-by: Yuanjian Li <xyliyuanjian@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-01-08 22:55:12 -05:00			`class WorkerReuseTest(PySparkTestCase):`

			`def test_reuse_worker_of_parallelize_xrange(self):`
			`rdd = self.sc.parallelize(xrange(20), 8)`
			`previous_pids = rdd.map(lambda x: os.getpid()).collect()`
			`current_pids = rdd.map(lambda x: os.getpid()).collect()`
			`for pid in current_pids:`
			`self.assertTrue(pid in previous_pids)`


[SPARK-26743][PYTHON] Adds a test to check the actual resource limit set via 'spark.executor.pyspark.memory' ## What changes were proposed in this pull request? https://github.com/apache/spark/pull/21977 added a feature to limit Python worker resource limit. This PR is kind of a followup of it. It proposes to add a test that checks the actual resource limit set by 'spark.executor.pyspark.memory'. ## How was this patch tested? Unit tests were added. Closes #23663 from HyukjinKwon/test_rlimit. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-01-27 21:02:27 -05:00			`@unittest.skipIf(`
			`not has_resource_module,`
			`"Memory limit feature in Python worker is dependent on "`
			`"Python's 'resource' module; however, not found.")`
			`class WorkerMemoryTest(PySparkTestCase):`

			`def test_memory_limit(self):`
			`self.sc._conf.set("spark.executor.pyspark.memory", "1m")`
			`rdd = self.sc.parallelize(xrange(1), 1)`

			`def getrlimit():`
			`import resource`
			`return resource.getrlimit(resource.RLIMIT_AS)`

			`actual = rdd.map(lambda _: getrlimit()).collect()`
			`self.assertTrue(len(actual) == 1)`
			`self.assertTrue(len(actual[0]) == 2)`
			`[(soft_limit, hard_limit)] = actual`
			`self.assertEqual(soft_limit, 1024 * 1024)`
			`self.assertEqual(hard_limit, 1024 * 1024)`


[SPARK-26036][PYTHON] Break large tests.py files into smaller files ## What changes were proposed in this pull request? This PR continues to break down a big large file into smaller files. See https://github.com/apache/spark/pull/23021. It targets to follow https://github.com/numpy/numpy/tree/master/numpy. Basically this PR proposes to break down `pyspark/tests.py` into ...: ``` pyspark ... ├── testing ... │ └── utils.py ├── tests │ ├── __init__.py │ ├── test_appsubmit.py │ ├── test_broadcast.py │ ├── test_conf.py │ ├── test_context.py │ ├── test_daemon.py │ ├── test_join.py │ ├── test_profiler.py │ ├── test_rdd.py │ ├── test_readwrite.py │ ├── test_serializers.py │ ├── test_shuffle.py │ ├── test_taskcontext.py │ ├── test_util.py │ └── test_worker.py ... ``` ## How was this patch tested? Existing tests should cover. `cd python` and .`/run-tests-with-coverage`. Manually checked they are actually being ran. Each test (not officially) can be ran via: ```bash SPARK_TESTING=1 ./bin/pyspark pyspark.tests.test_context ``` Note that if you're using Mac and Python 3, you might have to `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`. Closes #23033 from HyukjinKwon/SPARK-26036. Authored-by: hyukjinkwon <gurwls223@apache.org> Signed-off-by: hyukjinkwon <gurwls223@apache.org> 2018-11-14 23:30:52 -05:00			`if __name__ == "__main__":`
			`import unittest`
			`from pyspark.tests.test_worker import *`

			`try:`
			`import xmlrunner`
[SPARK-28130][PYTHON] Print pretty messages for skipped tests when xmlrunner is available in PySpark ## What changes were proposed in this pull request? Currently, pretty skipped message added by https://github.com/apache/spark/commit/f7435bec6a9348cfbbe26b13c230c08545d16067 mechanism seems not working when xmlrunner is installed apparently. This PR fixes two things: 1. When `xmlrunner` is installed, seems `xmlrunner` does not respect `vervosity` level in unittests (default is level 1). So the output looks as below ``` Running tests... ---------------------------------------------------------------------- SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS ---------------------------------------------------------------------- ``` So it is not caught by our message detection mechanism. 2. If we manually set the `vervocity` level to `xmlrunner`, it prints messages as below: ``` test_mixed_udf (pyspark.sql.tests.test_pandas_udf_scalar.ScalarPandasUDFTests) ... SKIP (0.000s) test_mixed_udf_and_sql (pyspark.sql.tests.test_pandas_udf_scalar.ScalarPandasUDFTests) ... SKIP (0.000s) ... ``` This is different in our Jenkins machine: ``` test_createDataFrame_column_name_encoding (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.23.2 must be installed; however, it was not found.' test_createDataFrame_does_not_modify_input (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.23.2 must be installed; however, it was not found.' ... ``` Note that last `SKIP` is different. This PR fixes the regular expression to catch `SKIP` case as well. ## How was this patch tested? Manually tested. Before: ``` Starting test(python2.7): pyspark.... Finished test(python2.7): pyspark.... (0s) ... Tests passed in 562 seconds ======================================================================== ... ``` After: ``` Starting test(python2.7): pyspark.... Finished test(python2.7): pyspark.... (48s) ... 93 tests were skipped ... Tests passed in 560 seconds Skipped tests pyspark.... with python2.7: pyspark...(...) ... SKIP (0.000s) ... ======================================================================== ... ``` Closes #24927 from HyukjinKwon/SPARK-28130. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2019-06-23 20:58:17 -04:00			`testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)`
[SPARK-26036][PYTHON] Break large tests.py files into smaller files ## What changes were proposed in this pull request? This PR continues to break down a big large file into smaller files. See https://github.com/apache/spark/pull/23021. It targets to follow https://github.com/numpy/numpy/tree/master/numpy. Basically this PR proposes to break down `pyspark/tests.py` into ...: ``` pyspark ... ├── testing ... │ └── utils.py ├── tests │ ├── __init__.py │ ├── test_appsubmit.py │ ├── test_broadcast.py │ ├── test_conf.py │ ├── test_context.py │ ├── test_daemon.py │ ├── test_join.py │ ├── test_profiler.py │ ├── test_rdd.py │ ├── test_readwrite.py │ ├── test_serializers.py │ ├── test_shuffle.py │ ├── test_taskcontext.py │ ├── test_util.py │ └── test_worker.py ... ``` ## How was this patch tested? Existing tests should cover. `cd python` and .`/run-tests-with-coverage`. Manually checked they are actually being ran. Each test (not officially) can be ran via: ```bash SPARK_TESTING=1 ./bin/pyspark pyspark.tests.test_context ``` Note that if you're using Mac and Python 3, you might have to `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`. Closes #23033 from HyukjinKwon/SPARK-26036. Authored-by: hyukjinkwon <gurwls223@apache.org> Signed-off-by: hyukjinkwon <gurwls223@apache.org> 2018-11-14 23:30:52 -05:00			`except ImportError:`
			`testRunner = None`
			`unittest.main(testRunner=testRunner, verbosity=2)`