From 5892bbf447f195d73f89b9ec64eb8abf671672f9 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 5 Dec 2019 11:54:45 -0600 Subject: [PATCH] [SPARK-30124][MLLIB] unnecessary persist in PythonMLLibAPI.scala ### What changes were proposed in this pull request? Removed unnecessary persist. ### Why are the changes needed? Persist in `PythonMLLibAPI.scala` is unnecessary because later in `run()` of `gmmAlg` is caching the data. https://github.com/apache/spark/blob/710ddab39e20f49e917311c3e27d142b5a2bcc71/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala#L167-L171 ### Does this PR introduce any user-facing change? No ### How was this patch tested? Manually Closes #26758 from amanomer/improperPersist. Authored-by: Aman Omer Signed-off-by: Sean Owen --- .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 6 +----- .../org/apache/spark/mllib/clustering/GaussianMixture.scala | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index bafaafb720..259ecb3a17 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -407,11 +407,7 @@ private[python] class PythonMLLibAPI extends Serializable { if (seed != null) gmmAlg.setSeed(seed) - try { - new GaussianMixtureModelWrapper(gmmAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK))) - } finally { - data.rdd.unpersist() - } + new GaussianMixtureModelWrapper(gmmAlg.run(data.rdd)) } /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala index a9ed36e909..cde23096b4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala @@ -234,6 +234,7 @@ class GaussianMixture private ( iter += 1 compute.destroy() } + breezeData.unpersist() new GaussianMixtureModel(weights, gaussians) }