diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index c2994ec7ea..19de6d88bb 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -5,7 +5,7 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.19//aircompressor-0.19.jar -algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar +algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8//antlr4-runtime-4.8.jar @@ -28,9 +28,9 @@ avro-mapred/1.10.2//avro-mapred-1.10.2.jar avro/1.10.2//avro-1.10.2.jar blas/2.2.0//blas-2.2.0.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar -breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar -breeze_2.12/1.0//breeze_2.12-1.0.jar -cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar +breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar +breeze_2.12/1.2//breeze_2.12-1.2.jar +cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar @@ -182,7 +182,6 @@ libthrift/0.12.0//libthrift-0.12.0.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.7.1//lz4-java-1.7.1.jar -machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar metrics-core/4.2.0//metrics-core-4.2.0.jar @@ -224,10 +223,10 @@ slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.27//snakeyaml-1.27.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar -spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar -spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar -spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar -spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar +spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar +spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar +spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar +spire_2.12/0.17.0//spire_2.12-0.17.0.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index f574770d7d..d59496a70d 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -5,7 +5,7 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.19//aircompressor-0.19.jar -algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar +algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8//antlr4-runtime-4.8.jar @@ -23,9 +23,9 @@ avro-mapred/1.10.2//avro-mapred-1.10.2.jar avro/1.10.2//avro-1.10.2.jar blas/2.2.0//blas-2.2.0.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar -breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar -breeze_2.12/1.0//breeze_2.12-1.0.jar -cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar +breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar +breeze_2.12/1.2//breeze_2.12-1.2.jar +cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-cli/1.2//commons-cli-1.2.jar @@ -153,7 +153,6 @@ libthrift/0.12.0//libthrift-0.12.0.jar log4j/1.2.17//log4j-1.2.17.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.7.1//lz4-java-1.7.1.jar -machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar metrics-core/4.2.0//metrics-core-4.2.0.jar @@ -195,10 +194,10 @@ slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.27//snakeyaml-1.27.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar -spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar -spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar -spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar -spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar +spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar +spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar +spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar +spire_2.12/0.17.0//spire_2.12-0.17.0.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala index 093d02ea7a..4dbd224a38 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala @@ -142,7 +142,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext solverType = WeightedLeastSquares.Cholesky) val wlsModelWithIntercept = wlsWithIntercept.fit(instances) val wls = new WeightedLeastSquares(false, 0.0, 0.0, true, true, - solverType = WeightedLeastSquares.Cholesky) + solverType = WeightedLeastSquares.Cholesky, tol = 1e-14, maxIter = 100000) val wlsModel = wls.fit(instances) assert(expectedWithIntercept ~== wlsModelWithIntercept.diagInvAtWA relTol 1e-4) @@ -169,7 +169,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext solver <- Seq(WeightedLeastSquares.Auto, WeightedLeastSquares.QuasiNewton)) { val singularModel = new WeightedLeastSquares(fitIntercept, regParam = 0.0, elasticNetParam = 0.0, standardizeFeatures = standardization, - standardizeLabel = standardization, solverType = solver).fit(collinearInstances) + standardizeLabel = standardization, solverType = solver, + tol = 1e-14, maxIter = 100000).fit(collinearInstances) collinearInstances.collect().foreach { case Instance(l, w, f) => val pred = BLAS.dot(singularModel.coefficients, f) + singularModel.intercept @@ -202,6 +203,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext for (solver <- WeightedLeastSquares.supportedSolvers) { val wls = new WeightedLeastSquares(fitIntercept, regParam = 0.0, elasticNetParam = 0.0, standardizeFeatures = standardization, standardizeLabel = standardization, + tol = 1e-14, maxIter = 100000, solverType = solver).fit(instances) val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1)) assert(actual ~== expected(idx) absTol 1e-4) @@ -305,7 +307,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext for (fitIntercept <- Seq(false, true)) { val wls = new WeightedLeastSquares(fitIntercept = fitIntercept, regParam = 0.5, elasticNetParam = 0.0, standardizeFeatures = true, - standardizeLabel = true, solverType = WeightedLeastSquares.Cholesky) + standardizeLabel = true, solverType = WeightedLeastSquares.Cholesky, + tol = 1e-14, maxIter = 100000) .fit(constantFeaturesInstances) val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1)) assert(actual ~== expectedCholesky(idx) absTol 1e-6) @@ -363,7 +366,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext (lambda, alpha) <- Seq((0.0, 0.0), (0.5, 0.0), (0.5, 0.5), (0.5, 1.0))) { val wls = new WeightedLeastSquares(fitIntercept, regParam = lambda, elasticNetParam = alpha, standardizeFeatures = standardization, standardizeLabel = true, - solverType = WeightedLeastSquares.QuasiNewton) + solverType = WeightedLeastSquares.QuasiNewton, tol = 1e-14, maxIter = 100000) val model = wls.fit(constantFeaturesInstances) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~== expectedQuasiNewton(idx) absTol 1e-6) @@ -473,7 +476,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext elasticNetParam <- Seq(0.1, 0.5, 1.0)) { val wls = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam, standardizeFeatures = standardization, standardizeLabel = true, - solverType = WeightedLeastSquares.Auto) + solverType = WeightedLeastSquares.Auto, tol = 1e-14, maxIter = 100000) .fit(instances) val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1)) assert(actual ~== expected(idx) absTol 1e-4) @@ -531,7 +534,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext standardization <- Seq(false, true)) { for (solver <- WeightedLeastSquares.supportedSolvers) { val wls = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam = 0.0, - standardizeFeatures = standardization, standardizeLabel = true, solverType = solver) + standardizeFeatures = standardization, standardizeLabel = true, solverType = solver, + tol = 1e-14, maxIter = 100000) .fit(instances) val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1)) assert(actual ~== expected(idx) absTol 1e-4) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index baac015a1c..70ba4d3049 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuilder import scala.reflect.ClassTag import scala.util.Random -import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM} +import breeze.linalg.{DenseMatrix => BDM} import org.json4s.jackson.JsonMethods.{parse => parseJson} import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} @@ -295,7 +295,9 @@ class VectorsSuite extends SparkFunSuite with Logging { val denseVector1 = Vectors.dense(sparseVector1.toArray) val denseVector2 = Vectors.dense(sparseVector2.toArray) - val squaredDist = breezeSquaredDistance(sparseVector1.asBreeze, sparseVector2.asBreeze) + val squaredDist = sparseVector1.toArray.zip(sparseVector2.toArray).map { + case (a, b) => (a - b) * (a - b) + }.sum // SparseVector vs. SparseVector assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index fb3bc9f798..69ce683d93 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -22,11 +22,10 @@ import java.nio.charset.StandardCharsets import scala.io.Source -import breeze.linalg.{squaredDistance => breezeSquaredDistance} import com.google.common.io.Files import org.apache.spark.{SparkException, SparkFunSuite} -import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vectors} +import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils._ import org.apache.spark.mllib.util.TestingUtils._ @@ -50,6 +49,12 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { val v1 = Vectors.dense(a) val norm1 = Vectors.norm(v1, 2.0) val precision = 1e-6 + + def squaredDistance(v1: Vector, v2: Vector): Double = + v1.toArray.zip(v2.toArray).map { + case (a, b) => (a - b) * (a - b) + }.sum + for (m <- 0 until n) { val indices = (0 to m).toArray val values = indices.map(i => a(i)) @@ -57,13 +62,13 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { val norm2 = Vectors.norm(v2, 2.0) val v3 = Vectors.sparse(n, indices, indices.map(i => a(i) + 0.5)) val norm3 = Vectors.norm(v3, 2.0) - val squaredDist = breezeSquaredDistance(v1.asBreeze, v2.asBreeze) + val squaredDist = squaredDistance(v1, v2) val fastSquaredDist1 = fastSquaredDistance(v1, norm1, v2, norm2, precision) assert((fastSquaredDist1 - squaredDist) <= precision * squaredDist, s"failed with m = $m") val fastSquaredDist2 = fastSquaredDistance(v1, norm1, Vectors.dense(v2.toArray), norm2, precision) assert((fastSquaredDist2 - squaredDist) <= precision * squaredDist, s"failed with m = $m") - val squaredDist2 = breezeSquaredDistance(v2.asBreeze, v3.asBreeze) + val squaredDist2 = squaredDistance(v2, v3) val fastSquaredDist3 = fastSquaredDistance(v2, norm2, v3, norm3, precision) assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, s"failed with m = $m") @@ -71,7 +76,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { val v4 = Vectors.sparse(n, indices.slice(0, m - 10), indices.map(i => a(i) + 0.5).slice(0, m - 10)) val norm4 = Vectors.norm(v4, 2.0) - val squaredDist = breezeSquaredDistance(v2.asBreeze, v4.asBreeze) + val squaredDist = squaredDistance(v2, v4) val fastSquaredDist = fastSquaredDistance(v2, norm2, v4, norm4, precision) assert((fastSquaredDist - squaredDist) <= precision * squaredDist, s"failed with m = $m") diff --git a/pom.xml b/pom.xml index 0f8e32b3f7..3d2548f1d7 100644 --- a/pom.xml +++ b/pom.xml @@ -880,7 +880,7 @@ org.scalanlp breeze_${scala.binary.version} - 1.0 + 1.2 org.apache.commons