[SPARK-11295][PYSPARK] Add packages to JUnit output for Python tests
This is #9263 from gliptak (improving grouping/display of test case results) with a small fix of bisecting k-means unit test. Author: Gábor Lipták <gliptak@gmail.com> Author: Xiangrui Meng <meng@databricks.com> Closes #10850 from mengxr/SPARK-11295.
This commit is contained in:
parent
9376ae723e
commit
9bb35c5b59
|
@ -394,6 +394,7 @@ class CrossValidatorTests(PySparkTestCase):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pyspark.ml.tests import *
|
||||
if xmlrunner:
|
||||
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'))
|
||||
else:
|
||||
|
|
|
@ -77,21 +77,24 @@ except:
|
|||
pass
|
||||
|
||||
ser = PickleSerializer()
|
||||
sc = SparkContext('local[4]', "MLlib tests")
|
||||
|
||||
|
||||
class MLlibTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.sc = sc
|
||||
self.sc = SparkContext('local[4]', "MLlib tests")
|
||||
|
||||
def tearDown(self):
|
||||
self.sc.stop()
|
||||
|
||||
|
||||
class MLLibStreamingTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.sc = sc
|
||||
self.sc = SparkContext('local[4]', "MLlib tests")
|
||||
self.ssc = StreamingContext(self.sc, 1.0)
|
||||
|
||||
def tearDown(self):
|
||||
self.ssc.stop(False)
|
||||
self.sc.stop()
|
||||
|
||||
@staticmethod
|
||||
def _eventually(condition, timeout=30.0, catch_assertions=False):
|
||||
|
@ -423,7 +426,7 @@ class ListTests(MLlibTestCase):
|
|||
from pyspark.mllib.clustering import BisectingKMeans
|
||||
data = array([0.0, 0.0, 1.0, 1.0, 9.0, 8.0, 8.0, 9.0]).reshape(4, 2)
|
||||
bskm = BisectingKMeans()
|
||||
model = bskm.train(sc.parallelize(data, 2), k=4)
|
||||
model = bskm.train(self.sc.parallelize(data, 2), k=4)
|
||||
p = array([0.0, 0.0])
|
||||
rdd_p = self.sc.parallelize([p])
|
||||
self.assertEqual(model.predict(p), model.predict(rdd_p).first())
|
||||
|
@ -1166,7 +1169,7 @@ class StreamingKMeansTest(MLLibStreamingTestCase):
|
|||
clusterWeights=[1.0, 1.0, 1.0, 1.0])
|
||||
|
||||
predict_data = [[[1.5, 1.5]], [[-1.5, 1.5]], [[-1.5, -1.5]], [[1.5, -1.5]]]
|
||||
predict_data = [sc.parallelize(batch, 1) for batch in predict_data]
|
||||
predict_data = [self.sc.parallelize(batch, 1) for batch in predict_data]
|
||||
predict_stream = self.ssc.queueStream(predict_data)
|
||||
predict_val = stkm.predictOn(predict_stream)
|
||||
|
||||
|
@ -1197,7 +1200,7 @@ class StreamingKMeansTest(MLLibStreamingTestCase):
|
|||
# classification based in the initial model would have been 0
|
||||
# proving that the model is updated.
|
||||
batches = [[[-0.5], [0.6], [0.8]], [[0.2], [-0.1], [0.3]]]
|
||||
batches = [sc.parallelize(batch) for batch in batches]
|
||||
batches = [self.sc.parallelize(batch) for batch in batches]
|
||||
input_stream = self.ssc.queueStream(batches)
|
||||
predict_results = []
|
||||
|
||||
|
@ -1230,7 +1233,7 @@ class LinearDataGeneratorTests(MLlibTestCase):
|
|||
self.assertEqual(len(point.features), 3)
|
||||
|
||||
linear_data = LinearDataGenerator.generateLinearRDD(
|
||||
sc=sc, nexamples=6, nfeatures=2, eps=0.1,
|
||||
sc=self.sc, nexamples=6, nfeatures=2, eps=0.1,
|
||||
nParts=2, intercept=0.0).collect()
|
||||
self.assertEqual(len(linear_data), 6)
|
||||
for point in linear_data:
|
||||
|
@ -1406,7 +1409,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
|
|||
for i in range(10):
|
||||
batch = LinearDataGenerator.generateLinearInput(
|
||||
0.0, [10.0, 10.0], xMean, xVariance, 100, 42 + i, 0.1)
|
||||
batches.append(sc.parallelize(batch))
|
||||
batches.append(self.sc.parallelize(batch))
|
||||
|
||||
input_stream = self.ssc.queueStream(batches)
|
||||
slr.trainOn(input_stream)
|
||||
|
@ -1430,7 +1433,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
|
|||
for i in range(10):
|
||||
batch = LinearDataGenerator.generateLinearInput(
|
||||
0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1)
|
||||
batches.append(sc.parallelize(batch))
|
||||
batches.append(self.sc.parallelize(batch))
|
||||
|
||||
model_weights = []
|
||||
input_stream = self.ssc.queueStream(batches)
|
||||
|
@ -1463,7 +1466,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
|
|||
0.0, [10.0, 10.0], [0.0, 0.0], [1.0 / 3.0, 1.0 / 3.0],
|
||||
100, 42 + i, 0.1)
|
||||
batches.append(
|
||||
sc.parallelize(batch).map(lambda lp: (lp.label, lp.features)))
|
||||
self.sc.parallelize(batch).map(lambda lp: (lp.label, lp.features)))
|
||||
|
||||
input_stream = self.ssc.queueStream(batches)
|
||||
output_stream = slr.predictOnValues(input_stream)
|
||||
|
@ -1494,7 +1497,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
|
|||
for i in range(10):
|
||||
batch = LinearDataGenerator.generateLinearInput(
|
||||
0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1)
|
||||
batches.append(sc.parallelize(batch))
|
||||
batches.append(self.sc.parallelize(batch))
|
||||
|
||||
predict_batches = [
|
||||
b.map(lambda lp: (lp.label, lp.features)) for b in batches]
|
||||
|
@ -1580,6 +1583,7 @@ class ALSTests(MLlibTestCase):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pyspark.mllib.tests import *
|
||||
if not _have_scipy:
|
||||
print("NOTE: Skipping SciPy tests as it does not seem to be installed")
|
||||
if xmlrunner:
|
||||
|
|
|
@ -1259,6 +1259,7 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pyspark.sql.tests import *
|
||||
if xmlrunner:
|
||||
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'))
|
||||
else:
|
||||
|
|
|
@ -1635,6 +1635,7 @@ kinesis_test_environ_var = "ENABLE_KINESIS_TESTS"
|
|||
are_kinesis_tests_enabled = os.environ.get(kinesis_test_environ_var) == '1'
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pyspark.streaming.tests import *
|
||||
kafka_assembly_jar = search_kafka_assembly_jar()
|
||||
flume_assembly_jar = search_flume_assembly_jar()
|
||||
mqtt_assembly_jar = search_mqtt_assembly_jar()
|
||||
|
|
|
@ -2008,6 +2008,7 @@ class NumPyTests(PySparkTestCase):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pyspark.tests import *
|
||||
if not _have_scipy:
|
||||
print("NOTE: Skipping SciPy tests as it does not seem to be installed")
|
||||
if not _have_numpy:
|
||||
|
|
Loading…
Reference in a new issue