From 4a33cd928df4739e69ae9530aae23964e470d2f8 Mon Sep 17 00:00:00 2001
From: Alessandro Patti <ale812@yahoo.it>
Date: Wed, 21 Oct 2020 18:14:21 -0700
Subject: [PATCH] [SPARK-33203][PYTHON][TEST] Fix tests failing with rounding
 errors

### What changes were proposed in this pull request?

Increase tolerance for two tests that fail in some environments and fail in others (flaky? Pass/fail is constant within the same environment)

### Why are the changes needed?
The tests `pyspark.ml.recommendation` and `pyspark.ml.tests.test_algorithms` fail with
```
File "/home/jenkins/python/pyspark/ml/tests/test_algorithms.py", line 96, in test_raw_and_probability_prediction
    self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1))
AssertionError: False is not true
```
```
File "/home/jenkins/python/pyspark/ml/recommendation.py", line 256, in _main_.ALS
Failed example:
    predictions[0]
Expected:
    Row(user=0, item=2, newPrediction=0.6929101347923279)
Got:
    Row(user=0, item=2, newPrediction=0.6929104924201965)
...
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

This path changes a test target. Just executed the tests to verify they pass.

Closes #30104 from AlessandroPatti/apatti/rounding-errors.

Authored-by: Alessandro Patti <ale812@yahoo.it>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/ml/recommendation.py        | 6 +++---
 python/pyspark/ml/tests/test_algorithms.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 4f39c5abec..4ef3853444 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -254,11 +254,11 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
     >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
     >>> predictions[0]
-    Row(user=0, item=2, newPrediction=0.6929101347923279)
+    Row(user=0, item=2, newPrediction=0.692910...)
     >>> predictions[1]
-    Row(user=1, item=0, newPrediction=3.47356915473938)
+    Row(user=1, item=0, newPrediction=3.473569...)
     >>> predictions[2]
-    Row(user=2, item=0, newPrediction=-0.8991986513137817)
+    Row(user=2, item=0, newPrediction=-0.899198...)
     >>> user_recs = model.recommendForAllUsers(3)
     >>> user_recs.where(user_recs.user == 0)\
         .select("recommendations.item", "recommendations.rating").collect()
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index 03653c25b4..f8b61b7c57 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -86,7 +86,7 @@ class MultilayerPerceptronClassifierTest(SparkSessionTestCase):
         expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045]
         self.assertTrue(result.prediction, expected_prediction)
         self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4))
-        self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1))
+        self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, rtol=0.1))
 
 
 class OneVsRestTests(SparkSessionTestCase):