[SPARK-35310][MLLIB] Update to breeze 1.2

Update to the latest breeze 1.2

Minor bug fixes

No.

Existing tests

Closes #33449 from srowen/SPARK-35310.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
This commit is contained in:
Sean Owen 2021-07-22 13:58:01 -05:00
parent 96944ac17d
commit c7d246ba4e
6 changed files with 41 additions and 32 deletions

View file

@ -5,7 +5,7 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
ST4/4.0.4//ST4-4.0.4.jar
activation/1.1.1//activation-1.1.1.jar
aircompressor/0.19//aircompressor-0.19.jar
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
annotations/17.0.0//annotations-17.0.0.jar
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
antlr4-runtime/4.8//antlr4-runtime-4.8.jar
@ -28,9 +28,9 @@ avro-mapred/1.10.2//avro-mapred-1.10.2.jar
avro/1.10.2//avro-1.10.2.jar
blas/2.2.0//blas-2.2.0.jar
bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
breeze_2.12/1.0//breeze_2.12-1.0.jar
cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar
breeze_2.12/1.2//breeze_2.12-1.2.jar
cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
chill-java/0.10.0//chill-java-0.10.0.jar
chill_2.12/0.10.0//chill_2.12-0.10.0.jar
commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
@ -182,7 +182,6 @@ libthrift/0.12.0//libthrift-0.12.0.jar
log4j/1.2.17//log4j-1.2.17.jar
logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
lz4-java/1.7.1//lz4-java-1.7.1.jar
machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
metrics-core/4.2.0//metrics-core-4.2.0.jar
@ -224,10 +223,10 @@ slf4j-api/1.7.30//slf4j-api-1.7.30.jar
slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
snakeyaml/1.27//snakeyaml-1.27.jar
snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
spire_2.12/0.17.0//spire_2.12-0.17.0.jar
stax-api/1.0.1//stax-api-1.0.1.jar
stream/2.9.6//stream-2.9.6.jar
super-csv/2.2.0//super-csv-2.2.0.jar

View file

@ -5,7 +5,7 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
ST4/4.0.4//ST4-4.0.4.jar
activation/1.1.1//activation-1.1.1.jar
aircompressor/0.19//aircompressor-0.19.jar
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
annotations/17.0.0//annotations-17.0.0.jar
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
antlr4-runtime/4.8//antlr4-runtime-4.8.jar
@ -23,9 +23,9 @@ avro-mapred/1.10.2//avro-mapred-1.10.2.jar
avro/1.10.2//avro-1.10.2.jar
blas/2.2.0//blas-2.2.0.jar
bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
breeze_2.12/1.0//breeze_2.12-1.0.jar
cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar
breeze_2.12/1.2//breeze_2.12-1.2.jar
cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
chill-java/0.10.0//chill-java-0.10.0.jar
chill_2.12/0.10.0//chill_2.12-0.10.0.jar
commons-cli/1.2//commons-cli-1.2.jar
@ -153,7 +153,6 @@ libthrift/0.12.0//libthrift-0.12.0.jar
log4j/1.2.17//log4j-1.2.17.jar
logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
lz4-java/1.7.1//lz4-java-1.7.1.jar
machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
metrics-core/4.2.0//metrics-core-4.2.0.jar
@ -195,10 +194,10 @@ slf4j-api/1.7.30//slf4j-api-1.7.30.jar
slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
snakeyaml/1.27//snakeyaml-1.27.jar
snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
spire_2.12/0.17.0//spire_2.12-0.17.0.jar
stax-api/1.0.1//stax-api-1.0.1.jar
stream/2.9.6//stream-2.9.6.jar
super-csv/2.2.0//super-csv-2.2.0.jar

View file

@ -142,7 +142,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
solverType = WeightedLeastSquares.Cholesky)
val wlsModelWithIntercept = wlsWithIntercept.fit(instances)
val wls = new WeightedLeastSquares(false, 0.0, 0.0, true, true,
solverType = WeightedLeastSquares.Cholesky)
solverType = WeightedLeastSquares.Cholesky, tol = 1e-14, maxIter = 100000)
val wlsModel = wls.fit(instances)
assert(expectedWithIntercept ~== wlsModelWithIntercept.diagInvAtWA relTol 1e-4)
@ -169,7 +169,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
solver <- Seq(WeightedLeastSquares.Auto, WeightedLeastSquares.QuasiNewton)) {
val singularModel = new WeightedLeastSquares(fitIntercept, regParam = 0.0,
elasticNetParam = 0.0, standardizeFeatures = standardization,
standardizeLabel = standardization, solverType = solver).fit(collinearInstances)
standardizeLabel = standardization, solverType = solver,
tol = 1e-14, maxIter = 100000).fit(collinearInstances)
collinearInstances.collect().foreach { case Instance(l, w, f) =>
val pred = BLAS.dot(singularModel.coefficients, f) + singularModel.intercept
@ -202,6 +203,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
for (solver <- WeightedLeastSquares.supportedSolvers) {
val wls = new WeightedLeastSquares(fitIntercept, regParam = 0.0, elasticNetParam = 0.0,
standardizeFeatures = standardization, standardizeLabel = standardization,
tol = 1e-14, maxIter = 100000,
solverType = solver).fit(instances)
val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1))
assert(actual ~== expected(idx) absTol 1e-4)
@ -305,7 +307,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
for (fitIntercept <- Seq(false, true)) {
val wls = new WeightedLeastSquares(fitIntercept = fitIntercept, regParam = 0.5,
elasticNetParam = 0.0, standardizeFeatures = true,
standardizeLabel = true, solverType = WeightedLeastSquares.Cholesky)
standardizeLabel = true, solverType = WeightedLeastSquares.Cholesky,
tol = 1e-14, maxIter = 100000)
.fit(constantFeaturesInstances)
val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1))
assert(actual ~== expectedCholesky(idx) absTol 1e-6)
@ -363,7 +366,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
(lambda, alpha) <- Seq((0.0, 0.0), (0.5, 0.0), (0.5, 0.5), (0.5, 1.0))) {
val wls = new WeightedLeastSquares(fitIntercept, regParam = lambda, elasticNetParam = alpha,
standardizeFeatures = standardization, standardizeLabel = true,
solverType = WeightedLeastSquares.QuasiNewton)
solverType = WeightedLeastSquares.QuasiNewton, tol = 1e-14, maxIter = 100000)
val model = wls.fit(constantFeaturesInstances)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~== expectedQuasiNewton(idx) absTol 1e-6)
@ -473,7 +476,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
elasticNetParam <- Seq(0.1, 0.5, 1.0)) {
val wls = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam,
standardizeFeatures = standardization, standardizeLabel = true,
solverType = WeightedLeastSquares.Auto)
solverType = WeightedLeastSquares.Auto, tol = 1e-14, maxIter = 100000)
.fit(instances)
val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1))
assert(actual ~== expected(idx) absTol 1e-4)
@ -531,7 +534,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
standardization <- Seq(false, true)) {
for (solver <- WeightedLeastSquares.supportedSolvers) {
val wls = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam = 0.0,
standardizeFeatures = standardization, standardizeLabel = true, solverType = solver)
standardizeFeatures = standardization, standardizeLabel = true, solverType = solver,
tol = 1e-14, maxIter = 100000)
.fit(instances)
val actual = Vectors.dense(wls.intercept, wls.coefficients(0), wls.coefficients(1))
assert(actual ~== expected(idx) absTol 1e-4)

View file

@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuilder
import scala.reflect.ClassTag
import scala.util.Random
import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
import breeze.linalg.{DenseMatrix => BDM}
import org.json4s.jackson.JsonMethods.{parse => parseJson}
import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
@ -295,7 +295,9 @@ class VectorsSuite extends SparkFunSuite with Logging {
val denseVector1 = Vectors.dense(sparseVector1.toArray)
val denseVector2 = Vectors.dense(sparseVector2.toArray)
val squaredDist = breezeSquaredDistance(sparseVector1.asBreeze, sparseVector2.asBreeze)
val squaredDist = sparseVector1.toArray.zip(sparseVector2.toArray).map {
case (a, b) => (a - b) * (a - b)
}.sum
// SparseVector vs. SparseVector
assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8)

View file

@ -22,11 +22,10 @@ import java.nio.charset.StandardCharsets
import scala.io.Source
import breeze.linalg.{squaredDistance => breezeSquaredDistance}
import com.google.common.io.Files
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vectors}
import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vector, Vectors}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils._
import org.apache.spark.mllib.util.TestingUtils._
@ -50,6 +49,12 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
val v1 = Vectors.dense(a)
val norm1 = Vectors.norm(v1, 2.0)
val precision = 1e-6
def squaredDistance(v1: Vector, v2: Vector): Double =
v1.toArray.zip(v2.toArray).map {
case (a, b) => (a - b) * (a - b)
}.sum
for (m <- 0 until n) {
val indices = (0 to m).toArray
val values = indices.map(i => a(i))
@ -57,13 +62,13 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
val norm2 = Vectors.norm(v2, 2.0)
val v3 = Vectors.sparse(n, indices, indices.map(i => a(i) + 0.5))
val norm3 = Vectors.norm(v3, 2.0)
val squaredDist = breezeSquaredDistance(v1.asBreeze, v2.asBreeze)
val squaredDist = squaredDistance(v1, v2)
val fastSquaredDist1 = fastSquaredDistance(v1, norm1, v2, norm2, precision)
assert((fastSquaredDist1 - squaredDist) <= precision * squaredDist, s"failed with m = $m")
val fastSquaredDist2 =
fastSquaredDistance(v1, norm1, Vectors.dense(v2.toArray), norm2, precision)
assert((fastSquaredDist2 - squaredDist) <= precision * squaredDist, s"failed with m = $m")
val squaredDist2 = breezeSquaredDistance(v2.asBreeze, v3.asBreeze)
val squaredDist2 = squaredDistance(v2, v3)
val fastSquaredDist3 =
fastSquaredDistance(v2, norm2, v3, norm3, precision)
assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, s"failed with m = $m")
@ -71,7 +76,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
val v4 = Vectors.sparse(n, indices.slice(0, m - 10),
indices.map(i => a(i) + 0.5).slice(0, m - 10))
val norm4 = Vectors.norm(v4, 2.0)
val squaredDist = breezeSquaredDistance(v2.asBreeze, v4.asBreeze)
val squaredDist = squaredDistance(v2, v4)
val fastSquaredDist =
fastSquaredDistance(v2, norm2, v4, norm4, precision)
assert((fastSquaredDist - squaredDist) <= precision * squaredDist, s"failed with m = $m")

View file

@ -880,7 +880,7 @@
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.binary.version}</artifactId>
<version>1.0</version>
<version>1.2</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>