From 99a0085790a06daa7f7498e65fa43deb7c202707 Mon Sep 17 00:00:00 2001 From: Tim Armstrong Date: Fri, 13 Aug 2021 12:04:42 +0900 Subject: [PATCH] [SPARK-36501][ML] Fix random col names in LSHModel.approxSimilarityJoin ### What changes were proposed in this pull request? Random.nextString() can include characters that are not valid in identifiers or likely to be buggy, e.g. non-printing characters, ".", "`". Instead use a utility that will always generate valid alphanumeric identifiers ### Why are the changes needed? To deflake BucketedRandomProjectionLSHSuite and avoid similar failures that could be encountered by users. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Ran org.apache.spark.ml.feature.BucketedRandomProjectionLSHSuite Closes #33730 from timarmstrong/flaky-lsb. Authored-by: Tim Armstrong Signed-off-by: Hyukjin Kwon (cherry picked from commit 886dbe01cdd9082f3a82bb31598e22fd4c9a7e5a) Signed-off-by: Hyukjin Kwon --- mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala index c3304047fc..7963fc8869 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml.feature -import scala.util.Random - import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{IntParam, ParamValidators} @@ -280,7 +278,7 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]] val explodedB = if (datasetA != datasetB) { processDataset(datasetB, rightColName, explodeCols) } else { - val recreatedB = recreateCol(datasetB, $(inputCol), s"${$(inputCol)}#${Random.nextString(5)}") + val recreatedB = recreateCol(datasetB, $(inputCol), Identifiable.randomUID(inputCol.name)) processDataset(recreatedB, rightColName, explodeCols) }