From 4a33cd928df4739e69ae9530aae23964e470d2f8 Mon Sep 17 00:00:00 2001 From: Alessandro Patti Date: Wed, 21 Oct 2020 18:14:21 -0700 Subject: [PATCH] [SPARK-33203][PYTHON][TEST] Fix tests failing with rounding errors ### What changes were proposed in this pull request? Increase tolerance for two tests that fail in some environments and fail in others (flaky? Pass/fail is constant within the same environment) ### Why are the changes needed? The tests `pyspark.ml.recommendation` and `pyspark.ml.tests.test_algorithms` fail with ``` File "/home/jenkins/python/pyspark/ml/tests/test_algorithms.py", line 96, in test_raw_and_probability_prediction self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1)) AssertionError: False is not true ``` ``` File "/home/jenkins/python/pyspark/ml/recommendation.py", line 256, in _main_.ALS Failed example: predictions[0] Expected: Row(user=0, item=2, newPrediction=0.6929101347923279) Got: Row(user=0, item=2, newPrediction=0.6929104924201965) ... ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? This path changes a test target. Just executed the tests to verify they pass. Closes #30104 from AlessandroPatti/apatti/rounding-errors. Authored-by: Alessandro Patti Signed-off-by: Dongjoon Hyun --- python/pyspark/ml/recommendation.py | 6 +++--- python/pyspark/ml/tests/test_algorithms.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index 4f39c5abec..4ef3853444 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -254,11 +254,11 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable): >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"]) >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0]) >>> predictions[0] - Row(user=0, item=2, newPrediction=0.6929101347923279) + Row(user=0, item=2, newPrediction=0.692910...) >>> predictions[1] - Row(user=1, item=0, newPrediction=3.47356915473938) + Row(user=1, item=0, newPrediction=3.473569...) >>> predictions[2] - Row(user=2, item=0, newPrediction=-0.8991986513137817) + Row(user=2, item=0, newPrediction=-0.899198...) >>> user_recs = model.recommendForAllUsers(3) >>> user_recs.where(user_recs.user == 0)\ .select("recommendations.item", "recommendations.rating").collect() diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 03653c25b4..f8b61b7c57 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -86,7 +86,7 @@ class MultilayerPerceptronClassifierTest(SparkSessionTestCase): expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045] self.assertTrue(result.prediction, expected_prediction) self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4)) - self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1)) + self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, rtol=0.1)) class OneVsRestTests(SparkSessionTestCase):