spark-instrumented-optimizer/python/pyspark/tests/test_worker.py
Hyukjin Kwon 310cd8eef1 [SPARK-36092][INFRA][BUILD][PYTHON] Migrate to GitHub Actions with Codecov from Jenkins
This PR proposes to migrate Coverage report from Jenkins to GitHub Actions by setting a dailly cron job.

For some background, currently PySpark code coverage is being reported in this specific Jenkins job: https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/

Because of the security issue between [Codecov service](https://app.codecov.io/gh/) and Jenkins machines, we had to work around by manually hosting a coverage site via GitHub pages, see also https://spark-test.github.io/pyspark-coverage-site/ by spark-test account (which is shared to only subset of PMC members).

Since we now run the build via GitHub Actions, we can leverage [Codecov plugin](https://github.com/codecov/codecov-action), and remove the workaround we used.

Virtually no. Coverage site (UI) might change but the information it holds should be virtually the same.

I manually tested:
- Scheduled run: https://github.com/HyukjinKwon/spark/actions/runs/1082261484
- Coverage report: 73f0291a7d/python/pyspark
- Run against a PR: https://github.com/HyukjinKwon/spark/actions/runs/1082367175

Closes #33591 from HyukjinKwon/SPARK-36092.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit c0d1860f25)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
2021-08-01 21:38:39 +09:00

261 lines
8.4 KiB
Python

# -*- encoding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import tempfile
import threading
import time
import unittest
has_resource_module = True
try:
import resource # noqa: F401
except ImportError:
has_resource_module = False
from py4j.protocol import Py4JJavaError
from pyspark import SparkConf, SparkContext
from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest
class WorkerTests(ReusedPySparkTestCase):
def test_cancel_task(self):
temp = tempfile.NamedTemporaryFile(delete=True)
temp.close()
path = temp.name
def sleep(x):
import os
import time
with open(path, 'w') as f:
f.write("%d %d" % (os.getppid(), os.getpid()))
time.sleep(100)
# start job in background thread
def run():
try:
self.sc.parallelize(range(1), 1).foreach(sleep)
except Exception:
pass
import threading
t = threading.Thread(target=run)
t.daemon = True
t.start()
daemon_pid, worker_pid = 0, 0
cnt = 0
while True:
if os.path.exists(path):
with open(path) as f:
data = f.read().split(' ')
try:
daemon_pid, worker_pid = map(int, data)
except ValueError:
pass
# In case the value is not written yet.
cnt += 1
if cnt == 10:
raise
else:
break
time.sleep(1)
# cancel jobs
self.sc.cancelAllJobs()
t.join()
for i in range(50):
try:
os.kill(worker_pid, 0)
time.sleep(0.1)
except OSError:
break # worker was killed
else:
self.fail("worker has not been killed after 5 seconds")
try:
os.kill(daemon_pid, 0)
except OSError:
self.fail("daemon had been killed")
# run a normal job
rdd = self.sc.parallelize(range(100), 1)
self.assertEqual(100, rdd.map(str).count())
def test_after_exception(self):
def raise_exception(_):
raise RuntimeError()
rdd = self.sc.parallelize(range(100), 1)
with QuietTest(self.sc):
self.assertRaises(Py4JJavaError, lambda: rdd.foreach(raise_exception))
self.assertEqual(100, rdd.map(str).count())
def test_after_non_exception_error(self):
# SPARK-33339: Pyspark application will hang due to non Exception
def raise_system_exit(_):
raise SystemExit()
rdd = self.sc.parallelize(range(100), 1)
with QuietTest(self.sc):
self.assertRaises(Exception, lambda: rdd.foreach(raise_system_exit))
self.assertEqual(100, rdd.map(str).count())
def test_after_jvm_exception(self):
tempFile = tempfile.NamedTemporaryFile(delete=False)
tempFile.write(b"Hello World!")
tempFile.close()
data = self.sc.textFile(tempFile.name, 1)
filtered_data = data.filter(lambda x: True)
self.assertEqual(1, filtered_data.count())
os.unlink(tempFile.name)
with QuietTest(self.sc):
self.assertRaises(Exception, lambda: filtered_data.count())
rdd = self.sc.parallelize(range(100), 1)
self.assertEqual(100, rdd.map(str).count())
def test_accumulator_when_reuse_worker(self):
from pyspark.accumulators import INT_ACCUMULATOR_PARAM
acc1 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
self.sc.parallelize(range(100), 20).foreach(lambda x: acc1.add(x))
self.assertEqual(sum(range(100)), acc1.value)
acc2 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
self.sc.parallelize(range(100), 20).foreach(lambda x: acc2.add(x))
self.assertEqual(sum(range(100)), acc2.value)
self.assertEqual(sum(range(100)), acc1.value)
def test_reuse_worker_after_take(self):
rdd = self.sc.parallelize(range(100000), 1)
self.assertEqual(0, rdd.first())
def count():
try:
rdd.count()
except Exception:
pass
t = threading.Thread(target=count)
t.daemon = True
t.start()
t.join(5)
self.assertTrue(not t.is_alive())
self.assertEqual(100000, rdd.count())
def test_with_different_versions_of_python(self):
rdd = self.sc.parallelize(range(10))
rdd.count()
version = self.sc.pythonVer
self.sc.pythonVer = "2.0"
try:
with QuietTest(self.sc):
self.assertRaises(Py4JJavaError, lambda: rdd.count())
finally:
self.sc.pythonVer = version
def test_python_exception_non_hanging(self):
# SPARK-21045: exceptions with no ascii encoding shall not hanging PySpark.
try:
def f():
raise RuntimeError("exception with 中 and \xd6\xd0")
self.sc.parallelize([1]).map(lambda x: f()).count()
except Py4JJavaError as e:
self.assertRegex(str(e), "exception with 中")
class WorkerReuseTest(PySparkTestCase):
def test_reuse_worker_of_parallelize_range(self):
rdd = self.sc.parallelize(range(20), 8)
previous_pids = rdd.map(lambda x: os.getpid()).collect()
current_pids = rdd.map(lambda x: os.getpid()).collect()
for pid in current_pids:
self.assertTrue(pid in previous_pids)
@unittest.skipIf(
not has_resource_module,
"Memory limit feature in Python worker is dependent on "
"Python's 'resource' module; however, not found.")
class WorkerMemoryTest(unittest.TestCase):
def setUp(self):
class_name = self.__class__.__name__
conf = SparkConf().set("spark.executor.pyspark.memory", "2g")
self.sc = SparkContext('local[4]', class_name, conf=conf)
def test_memory_limit(self):
rdd = self.sc.parallelize(range(1), 1)
def getrlimit():
import resource
return resource.getrlimit(resource.RLIMIT_AS)
actual = rdd.map(lambda _: getrlimit()).collect()
self.assertTrue(len(actual) == 1)
self.assertTrue(len(actual[0]) == 2)
[(soft_limit, hard_limit)] = actual
self.assertEqual(soft_limit, 2 * 1024 * 1024 * 1024)
self.assertEqual(hard_limit, 2 * 1024 * 1024 * 1024)
def tearDown(self):
self.sc.stop()
class WorkerSegfaultTest(ReusedPySparkTestCase):
@classmethod
def conf(cls):
_conf = super(WorkerSegfaultTest, cls).conf()
_conf.set("spark.python.worker.faulthandler.enabled", "true")
return _conf
def test_python_segfault(self):
try:
def f():
import ctypes
ctypes.string_at(0)
self.sc.parallelize([1]).map(lambda x: f()).count()
except Py4JJavaError as e:
self.assertRegex(str(e), "Segmentation fault")
@unittest.skipIf(
"COVERAGE_PROCESS_START" in os.environ,
"Flaky with coverage enabled, skipping for now."
)
class WorkerSegfaultNonDaemonTest(WorkerSegfaultTest):
@classmethod
def conf(cls):
_conf = super(WorkerSegfaultNonDaemonTest, cls).conf()
_conf.set("spark.python.use.daemon", "false")
return _conf
if __name__ == "__main__":
import unittest
from pyspark.tests.test_worker import * # noqa: F401
try:
import xmlrunner # type: ignore[import]
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)