[SPARK-14549][ML] Copy the Vector and Matrix classes from mllib to ml in mllib-local

## What changes were proposed in this pull request?

This task will copy the Vector and Matrix classes from mllib to ml package in mllib-local jar. The UDTs and `since` annotation in ml vector and matrix will be removed from now. UDTs will be achieved by #SPARK-14487, and `since` will be replaced by /*  since 1.2.0 */

The BLAS implementation will be copied, and some of the test utilities will be copies as well.

Summary of changes:

1. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/BLAS.scala
  - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/BLAS.scala
  - logDebug("gemm: alpha is equal to 0 and beta is equal to 1. Returning C.") is removed in ml version.
2. In  mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/Matrices.scala
  - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Matrices.scala
  - `Since` was removed, and we'll use standard `/* Since /*` Java doc. Will be in another PR.
  - `UDT` related code was removed, and will use `SPARK-13944` https://github.com/apache/spark/pull/12259  to replace the annotation.
3. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/Vectors.scala
  - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Vectors.scala
  - `Since` was removed.
  - `UDT` related code was removed.
  - In `def parseNumeric`, it was throwing `throw new SparkException(s"Cannot parse $other.")`, and now it's throwing `throw new IllegalArgumentException(s"Cannot parse $other.")`
4. In mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Vectors.scala
  - For consistency with ML version of vector, `def parseNumeric` is now throwing `throw new IllegalArgumentException(s"Cannot parse $other.")`
5. mllib/src/main/scala/org/apache/spark/**mllib**/util/NumericParser.scala is moved to mllib-local/src/main/scala/org/apache/spark/**ml**/util/NumericParser.scala
  - All the `throw new SparkException` were replaced by `throw new IllegalArgumentException`

## How was this patch tested?

unit tests

Author: DB Tsai <dbt@netflix.com>

Closes #12317 from dbtsai/dbtsai-ml-vector.
This commit is contained in:
DB Tsai 2016-04-15 01:17:03 -07:00 committed by Xiangrui Meng
parent a9324a06ef
commit 96534aa47c
14 changed files with 4339 additions and 33 deletions

View file

@ -48,6 +48,10 @@
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>

View file

@ -0,0 +1,723 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
/**
* BLAS routines for MLlib's vectors and matrices.
*/
private[spark] object BLAS extends Serializable {
@transient private var _f2jBLAS: NetlibBLAS = _
@transient private var _nativeBLAS: NetlibBLAS = _
// For level-1 routines, we use Java implementation.
private def f2jBLAS: NetlibBLAS = {
if (_f2jBLAS == null) {
_f2jBLAS = new F2jBLAS
}
_f2jBLAS
}
/**
* y += a * x
*/
def axpy(a: Double, x: Vector, y: Vector): Unit = {
require(x.size == y.size)
y match {
case dy: DenseVector =>
x match {
case sx: SparseVector =>
axpy(a, sx, dy)
case dx: DenseVector =>
axpy(a, dx, dy)
case _ =>
throw new UnsupportedOperationException(
s"axpy doesn't support x type ${x.getClass}.")
}
case _ =>
throw new IllegalArgumentException(
s"axpy only supports adding to a dense vector but got type ${y.getClass}.")
}
}
/**
* y += a * x
*/
private def axpy(a: Double, x: DenseVector, y: DenseVector): Unit = {
val n = x.size
f2jBLAS.daxpy(n, a, x.values, 1, y.values, 1)
}
/**
* y += a * x
*/
private def axpy(a: Double, x: SparseVector, y: DenseVector): Unit = {
val xValues = x.values
val xIndices = x.indices
val yValues = y.values
val nnz = xIndices.length
if (a == 1.0) {
var k = 0
while (k < nnz) {
yValues(xIndices(k)) += xValues(k)
k += 1
}
} else {
var k = 0
while (k < nnz) {
yValues(xIndices(k)) += a * xValues(k)
k += 1
}
}
}
/** Y += a * x */
private[spark] def axpy(a: Double, X: DenseMatrix, Y: DenseMatrix): Unit = {
require(X.numRows == Y.numRows && X.numCols == Y.numCols, "Dimension mismatch: " +
s"size(X) = ${(X.numRows, X.numCols)} but size(Y) = ${(Y.numRows, Y.numCols)}.")
f2jBLAS.daxpy(X.numRows * X.numCols, a, X.values, 1, Y.values, 1)
}
/**
* dot(x, y)
*/
def dot(x: Vector, y: Vector): Double = {
require(x.size == y.size,
"BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes:" +
" x.size = " + x.size + ", y.size = " + y.size)
(x, y) match {
case (dx: DenseVector, dy: DenseVector) =>
dot(dx, dy)
case (sx: SparseVector, dy: DenseVector) =>
dot(sx, dy)
case (dx: DenseVector, sy: SparseVector) =>
dot(sy, dx)
case (sx: SparseVector, sy: SparseVector) =>
dot(sx, sy)
case _ =>
throw new IllegalArgumentException(s"dot doesn't support (${x.getClass}, ${y.getClass}).")
}
}
/**
* dot(x, y)
*/
private def dot(x: DenseVector, y: DenseVector): Double = {
val n = x.size
f2jBLAS.ddot(n, x.values, 1, y.values, 1)
}
/**
* dot(x, y)
*/
private def dot(x: SparseVector, y: DenseVector): Double = {
val xValues = x.values
val xIndices = x.indices
val yValues = y.values
val nnz = xIndices.length
var sum = 0.0
var k = 0
while (k < nnz) {
sum += xValues(k) * yValues(xIndices(k))
k += 1
}
sum
}
/**
* dot(x, y)
*/
private def dot(x: SparseVector, y: SparseVector): Double = {
val xValues = x.values
val xIndices = x.indices
val yValues = y.values
val yIndices = y.indices
val nnzx = xIndices.length
val nnzy = yIndices.length
var kx = 0
var ky = 0
var sum = 0.0
// y catching x
while (kx < nnzx && ky < nnzy) {
val ix = xIndices(kx)
while (ky < nnzy && yIndices(ky) < ix) {
ky += 1
}
if (ky < nnzy && yIndices(ky) == ix) {
sum += xValues(kx) * yValues(ky)
ky += 1
}
kx += 1
}
sum
}
/**
* y = x
*/
def copy(x: Vector, y: Vector): Unit = {
val n = y.size
require(x.size == n)
y match {
case dy: DenseVector =>
x match {
case sx: SparseVector =>
val sxIndices = sx.indices
val sxValues = sx.values
val dyValues = dy.values
val nnz = sxIndices.length
var i = 0
var k = 0
while (k < nnz) {
val j = sxIndices(k)
while (i < j) {
dyValues(i) = 0.0
i += 1
}
dyValues(i) = sxValues(k)
i += 1
k += 1
}
while (i < n) {
dyValues(i) = 0.0
i += 1
}
case dx: DenseVector =>
Array.copy(dx.values, 0, dy.values, 0, n)
}
case _ =>
throw new IllegalArgumentException(s"y must be dense in copy but got ${y.getClass}")
}
}
/**
* x = a * x
*/
def scal(a: Double, x: Vector): Unit = {
x match {
case sx: SparseVector =>
f2jBLAS.dscal(sx.values.length, a, sx.values, 1)
case dx: DenseVector =>
f2jBLAS.dscal(dx.values.length, a, dx.values, 1)
case _ =>
throw new IllegalArgumentException(s"scal doesn't support vector type ${x.getClass}.")
}
}
// For level-3 routines, we use the native BLAS.
private def nativeBLAS: NetlibBLAS = {
if (_nativeBLAS == null) {
_nativeBLAS = NativeBLAS
}
_nativeBLAS
}
/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
*
* @param U the upper triangular part of the matrix in a [[DenseVector]](column major)
*/
def spr(alpha: Double, v: Vector, U: DenseVector): Unit = {
spr(alpha, v, U.values)
}
/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
*
* @param U the upper triangular part of the matrix packed in an array (column major)
*/
def spr(alpha: Double, v: Vector, U: Array[Double]): Unit = {
val n = v.size
v match {
case DenseVector(values) =>
NativeBLAS.dspr("U", n, alpha, values, 1, U)
case SparseVector(size, indices, values) =>
val nnz = indices.length
var colStartIdx = 0
var prevCol = 0
var col = 0
var j = 0
var i = 0
var av = 0.0
while (j < nnz) {
col = indices(j)
// Skip empty columns.
colStartIdx += (col - prevCol) * (col + prevCol + 1) / 2
col = indices(j)
av = alpha * values(j)
i = 0
while (i <= j) {
U(colStartIdx + indices(i)) += av * values(i)
i += 1
}
j += 1
prevCol = col
}
}
}
/**
* A := alpha * x * x^T^ + A
* @param alpha a real scalar that will be multiplied to x * x^T^.
* @param x the vector x that contains the n elements.
* @param A the symmetric matrix A. Size of n x n.
*/
def syr(alpha: Double, x: Vector, A: DenseMatrix) {
val mA = A.numRows
val nA = A.numCols
require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
require(mA == x.size, s"The size of x doesn't match the rank of A. A: $mA x $nA, x: ${x.size}")
x match {
case dv: DenseVector => syr(alpha, dv, A)
case sv: SparseVector => syr(alpha, sv, A)
case _ =>
throw new IllegalArgumentException(s"syr doesn't support vector type ${x.getClass}.")
}
}
private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
val nA = A.numRows
val mA = A.numCols
nativeBLAS.dsyr("U", x.size, alpha, x.values, 1, A.values, nA)
// Fill lower triangular part of A
var i = 0
while (i < mA) {
var j = i + 1
while (j < nA) {
A(j, i) = A(i, j)
j += 1
}
i += 1
}
}
private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
val mA = A.numCols
val xIndices = x.indices
val xValues = x.values
val nnz = xValues.length
val Avalues = A.values
var i = 0
while (i < nnz) {
val multiplier = alpha * xValues(i)
val offset = xIndices(i) * mA
var j = 0
while (j < nnz) {
Avalues(xIndices(j) + offset) += multiplier * xValues(j)
j += 1
}
i += 1
}
}
/**
* C := alpha * A * B + beta * C
* @param alpha a scalar to scale the multiplication A * B.
* @param A the matrix A that will be left multiplied to B. Size of m x k.
* @param B the matrix B that will be left multiplied by A. Size of k x n.
* @param beta a scalar that can be used to scale matrix C.
* @param C the resulting matrix C. Size of m x n. C.isTransposed must be false.
*/
def gemm(
alpha: Double,
A: Matrix,
B: DenseMatrix,
beta: Double,
C: DenseMatrix): Unit = {
require(!C.isTransposed,
"The matrix C cannot be the product of a transpose() call. C.isTransposed must be false.")
if (alpha == 0.0 && beta == 1.0) {
// gemm: alpha is equal to 0 and beta is equal to 1. Returning C.
return
} else if (alpha == 0.0) {
f2jBLAS.dscal(C.values.length, beta, C.values, 1)
} else {
A match {
case sparse: SparseMatrix => gemm(alpha, sparse, B, beta, C)
case dense: DenseMatrix => gemm(alpha, dense, B, beta, C)
case _ =>
throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
}
}
}
/**
* C := alpha * A * B + beta * C
* For `DenseMatrix` A.
*/
private def gemm(
alpha: Double,
A: DenseMatrix,
B: DenseMatrix,
beta: Double,
C: DenseMatrix): Unit = {
val tAstr = if (A.isTransposed) "T" else "N"
val tBstr = if (B.isTransposed) "T" else "N"
val lda = if (!A.isTransposed) A.numRows else A.numCols
val ldb = if (!B.isTransposed) B.numRows else B.numCols
require(A.numCols == B.numRows,
s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}")
require(A.numRows == C.numRows,
s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}")
require(B.numCols == C.numCols,
s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}")
nativeBLAS.dgemm(tAstr, tBstr, A.numRows, B.numCols, A.numCols, alpha, A.values, lda,
B.values, ldb, beta, C.values, C.numRows)
}
/**
* C := alpha * A * B + beta * C
* For `SparseMatrix` A.
*/
private def gemm(
alpha: Double,
A: SparseMatrix,
B: DenseMatrix,
beta: Double,
C: DenseMatrix): Unit = {
val mA: Int = A.numRows
val nB: Int = B.numCols
val kA: Int = A.numCols
val kB: Int = B.numRows
require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
require(nB == C.numCols,
s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
val Avals = A.values
val Bvals = B.values
val Cvals = C.values
val ArowIndices = A.rowIndices
val AcolPtrs = A.colPtrs
// Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
if (A.isTransposed) {
var colCounterForB = 0
if (!B.isTransposed) { // Expensive to put the check inside the loop
while (colCounterForB < nB) {
var rowCounterForA = 0
val Cstart = colCounterForB * mA
val Bstart = colCounterForB * kA
while (rowCounterForA < mA) {
var i = AcolPtrs(rowCounterForA)
val indEnd = AcolPtrs(rowCounterForA + 1)
var sum = 0.0
while (i < indEnd) {
sum += Avals(i) * Bvals(Bstart + ArowIndices(i))
i += 1
}
val Cindex = Cstart + rowCounterForA
Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
rowCounterForA += 1
}
colCounterForB += 1
}
} else {
while (colCounterForB < nB) {
var rowCounterForA = 0
val Cstart = colCounterForB * mA
while (rowCounterForA < mA) {
var i = AcolPtrs(rowCounterForA)
val indEnd = AcolPtrs(rowCounterForA + 1)
var sum = 0.0
while (i < indEnd) {
sum += Avals(i) * B(ArowIndices(i), colCounterForB)
i += 1
}
val Cindex = Cstart + rowCounterForA
Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
rowCounterForA += 1
}
colCounterForB += 1
}
}
} else {
// Scale matrix first if `beta` is not equal to 1.0
if (beta != 1.0) {
f2jBLAS.dscal(C.values.length, beta, C.values, 1)
}
// Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
// B, and added to C.
var colCounterForB = 0 // the column to be updated in C
if (!B.isTransposed) { // Expensive to put the check inside the loop
while (colCounterForB < nB) {
var colCounterForA = 0 // The column of A to multiply with the row of B
val Bstart = colCounterForB * kB
val Cstart = colCounterForB * mA
while (colCounterForA < kA) {
var i = AcolPtrs(colCounterForA)
val indEnd = AcolPtrs(colCounterForA + 1)
val Bval = Bvals(Bstart + colCounterForA) * alpha
while (i < indEnd) {
Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
i += 1
}
colCounterForA += 1
}
colCounterForB += 1
}
} else {
while (colCounterForB < nB) {
var colCounterForA = 0 // The column of A to multiply with the row of B
val Cstart = colCounterForB * mA
while (colCounterForA < kA) {
var i = AcolPtrs(colCounterForA)
val indEnd = AcolPtrs(colCounterForA + 1)
val Bval = B(colCounterForA, colCounterForB) * alpha
while (i < indEnd) {
Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
i += 1
}
colCounterForA += 1
}
colCounterForB += 1
}
}
}
}
/**
* y := alpha * A * x + beta * y
* @param alpha a scalar to scale the multiplication A * x.
* @param A the matrix A that will be left multiplied to x. Size of m x n.
* @param x the vector x that will be left multiplied by A. Size of n x 1.
* @param beta a scalar that can be used to scale vector y.
* @param y the resulting vector y. Size of m x 1.
*/
def gemv(
alpha: Double,
A: Matrix,
x: Vector,
beta: Double,
y: DenseVector): Unit = {
require(A.numCols == x.size,
s"The columns of A don't match the number of elements of x. A: ${A.numCols}, x: ${x.size}")
require(A.numRows == y.size,
s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.size}")
if (alpha == 0.0 && beta == 1.0) {
// gemv: alpha is equal to 0 and beta is equal to 1. Returning y.
return
} else if (alpha == 0.0) {
scal(beta, y)
} else {
(A, x) match {
case (smA: SparseMatrix, dvx: DenseVector) =>
gemv(alpha, smA, dvx, beta, y)
case (smA: SparseMatrix, svx: SparseVector) =>
gemv(alpha, smA, svx, beta, y)
case (dmA: DenseMatrix, dvx: DenseVector) =>
gemv(alpha, dmA, dvx, beta, y)
case (dmA: DenseMatrix, svx: SparseVector) =>
gemv(alpha, dmA, svx, beta, y)
case _ =>
throw new IllegalArgumentException(s"gemv doesn't support running on matrix type " +
s"${A.getClass} and vector type ${x.getClass}.")
}
}
}
/**
* y := alpha * A * x + beta * y
* For `DenseMatrix` A and `DenseVector` x.
*/
private def gemv(
alpha: Double,
A: DenseMatrix,
x: DenseVector,
beta: Double,
y: DenseVector): Unit = {
val tStrA = if (A.isTransposed) "T" else "N"
val mA = if (!A.isTransposed) A.numRows else A.numCols
val nA = if (!A.isTransposed) A.numCols else A.numRows
nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta,
y.values, 1)
}
/**
* y := alpha * A * x + beta * y
* For `DenseMatrix` A and `SparseVector` x.
*/
private def gemv(
alpha: Double,
A: DenseMatrix,
x: SparseVector,
beta: Double,
y: DenseVector): Unit = {
val mA: Int = A.numRows
val nA: Int = A.numCols
val Avals = A.values
val xIndices = x.indices
val xNnz = xIndices.length
val xValues = x.values
val yValues = y.values
if (A.isTransposed) {
var rowCounterForA = 0
while (rowCounterForA < mA) {
var sum = 0.0
var k = 0
while (k < xNnz) {
sum += xValues(k) * Avals(xIndices(k) + rowCounterForA * nA)
k += 1
}
yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
rowCounterForA += 1
}
} else {
var rowCounterForA = 0
while (rowCounterForA < mA) {
var sum = 0.0
var k = 0
while (k < xNnz) {
sum += xValues(k) * Avals(xIndices(k) * mA + rowCounterForA)
k += 1
}
yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
rowCounterForA += 1
}
}
}
/**
* y := alpha * A * x + beta * y
* For `SparseMatrix` A and `SparseVector` x.
*/
private def gemv(
alpha: Double,
A: SparseMatrix,
x: SparseVector,
beta: Double,
y: DenseVector): Unit = {
val xValues = x.values
val xIndices = x.indices
val xNnz = xIndices.length
val yValues = y.values
val mA: Int = A.numRows
val nA: Int = A.numCols
val Avals = A.values
val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
if (A.isTransposed) {
var rowCounter = 0
while (rowCounter < mA) {
var i = Arows(rowCounter)
val indEnd = Arows(rowCounter + 1)
var sum = 0.0
var k = 0
while (k < xNnz && i < indEnd) {
if (xIndices(k) == Acols(i)) {
sum += Avals(i) * xValues(k)
i += 1
}
k += 1
}
yValues(rowCounter) = sum * alpha + beta * yValues(rowCounter)
rowCounter += 1
}
} else {
if (beta != 1.0) scal(beta, y)
var colCounterForA = 0
var k = 0
while (colCounterForA < nA && k < xNnz) {
if (xIndices(k) == colCounterForA) {
var i = Acols(colCounterForA)
val indEnd = Acols(colCounterForA + 1)
val xTemp = xValues(k) * alpha
while (i < indEnd) {
val rowIndex = Arows(i)
yValues(Arows(i)) += Avals(i) * xTemp
i += 1
}
k += 1
}
colCounterForA += 1
}
}
}
/**
* y := alpha * A * x + beta * y
* For `SparseMatrix` A and `DenseVector` x.
*/
private def gemv(
alpha: Double,
A: SparseMatrix,
x: DenseVector,
beta: Double,
y: DenseVector): Unit = {
val xValues = x.values
val yValues = y.values
val mA: Int = A.numRows
val nA: Int = A.numCols
val Avals = A.values
val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
// Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
if (A.isTransposed) {
var rowCounter = 0
while (rowCounter < mA) {
var i = Arows(rowCounter)
val indEnd = Arows(rowCounter + 1)
var sum = 0.0
while (i < indEnd) {
sum += Avals(i) * xValues(Acols(i))
i += 1
}
yValues(rowCounter) = beta * yValues(rowCounter) + sum * alpha
rowCounter += 1
}
} else {
if (beta != 1.0) scal(beta, y)
// Perform matrix-vector multiplication and add to y
var colCounterForA = 0
while (colCounterForA < nA) {
var i = Acols(colCounterForA)
val indEnd = Acols(colCounterForA + 1)
val xVal = xValues(colCounterForA) * alpha
while (i < indEnd) {
val rowIndex = Arows(i)
yValues(rowIndex) += Avals(i) * xVal
i += 1
}
colCounterForA += 1
}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,736 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable}
import java.util
import scala.annotation.varargs
import scala.collection.JavaConverters._
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
import org.json4s.DefaultFormats
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
/**
* Represents a numeric vector, whose index type is Int and value type is Double.
*
* Note: Users should not implement this interface.
*/
sealed trait Vector extends Serializable {
/**
* Size of the vector.
*/
def size: Int
/**
* Converts the instance to a double array.
*/
def toArray: Array[Double]
override def equals(other: Any): Boolean = {
other match {
case v2: Vector =>
if (this.size != v2.size) return false
(this, v2) match {
case (s1: SparseVector, s2: SparseVector) =>
Vectors.equals(s1.indices, s1.values, s2.indices, s2.values)
case (s1: SparseVector, d1: DenseVector) =>
Vectors.equals(s1.indices, s1.values, 0 until d1.size, d1.values)
case (d1: DenseVector, s1: SparseVector) =>
Vectors.equals(0 until d1.size, d1.values, s1.indices, s1.values)
case (_, _) => util.Arrays.equals(this.toArray, v2.toArray)
}
case _ => false
}
}
/**
* Returns a hash code value for the vector. The hash code is based on its size and its first 128
* nonzero entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]].
*/
override def hashCode(): Int = {
// This is a reference implementation. It calls return in foreachActive, which is slow.
// Subclasses should override it with optimized implementation.
var result: Int = 31 + size
var nnz = 0
this.foreachActive { (index, value) =>
if (nnz < Vectors.MAX_HASH_NNZ) {
// ignore explicit 0 for comparison between sparse and dense
if (value != 0) {
result = 31 * result + index
val bits = java.lang.Double.doubleToLongBits(value)
result = 31 * result + (bits ^ (bits >>> 32)).toInt
nnz += 1
}
} else {
return result
}
}
result
}
/**
* Converts the instance to a breeze vector.
*/
private[spark] def toBreeze: BV[Double]
/**
* Gets the value of the ith element.
* @param i index
*/
def apply(i: Int): Double = toBreeze(i)
/**
* Makes a deep copy of this vector.
*/
def copy: Vector = {
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
}
/**
* Applies a function `f` to all the active elements of dense and sparse vector.
*
* @param f the function takes two parameters where the first parameter is the index of
* the vector with type `Int`, and the second parameter is the corresponding value
* with type `Double`.
*/
def foreachActive(f: (Int, Double) => Unit): Unit
/**
* Number of active entries. An "active entry" is an element which is explicitly stored,
* regardless of its value. Note that inactive entries have value 0.
*/
def numActives: Int
/**
* Number of nonzero elements. This scans all active values and count nonzeros.
*/
def numNonzeros: Int
/**
* Converts this vector to a sparse vector with all explicit zeros removed.
*/
def toSparse: SparseVector
/**
* Converts this vector to a dense vector.
*/
def toDense: DenseVector = new DenseVector(this.toArray)
/**
* Returns a vector in either dense or sparse format, whichever uses less storage.
*/
def compressed: Vector = {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
if (1.5 * (nnz + 1.0) < size) {
toSparse
} else {
toDense
}
}
/**
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
* Returns -1 if vector has length 0.
*/
def argmax: Int
/**
* Converts the vector to a JSON string.
*/
def toJson: String
}
/**
* Factory methods for [[org.apache.spark.ml.linalg.Vector]].
* We don't use the name `Vector` because Scala imports
* [[scala.collection.immutable.Vector]] by default.
*/
object Vectors {
/**
* Creates a dense vector from its values.
*/
@varargs
def dense(firstValue: Double, otherValues: Double*): Vector =
new DenseVector((firstValue +: otherValues).toArray)
// A dummy implicit is used to avoid signature collision with the one generated by @varargs.
/**
* Creates a dense vector from a double array.
*/
def dense(values: Array[Double]): Vector = new DenseVector(values)
/**
* Creates a sparse vector providing its index array and value array.
*
* @param size vector size.
* @param indices index array, must be strictly increasing.
* @param values value array, must have the same length as indices.
*/
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
new SparseVector(size, indices, values)
/**
* Creates a sparse vector using unordered (index, value) pairs.
*
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
*/
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
val (indices, values) = elements.sortBy(_._1).unzip
var prev = -1
indices.foreach { i =>
require(prev < i, s"Found duplicate indices: $i.")
prev = i
}
require(prev < size, s"You may not write an element to index $prev because the declared " +
s"size of your vector is $size")
new SparseVector(size, indices.toArray, values.toArray)
}
/**
* Creates a sparse vector using unordered (index, value) pairs in a Java friendly way.
*
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
*/
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
sparse(size, elements.asScala.map { case (i, x) =>
(i.intValue(), x.doubleValue())
}.toSeq)
}
/**
* Creates a vector of all zeros.
*
* @param size vector size
* @return a zero vector
*/
def zeros(size: Int): Vector = {
new DenseVector(new Array[Double](size))
}
/**
* Parses the JSON representation of a vector into a [[Vector]].
*/
def fromJson(json: String): Vector = {
implicit val formats = DefaultFormats
val jValue = parseJson(json)
(jValue \ "type").extract[Int] match {
case 0 => // sparse
val size = (jValue \ "size").extract[Int]
val indices = (jValue \ "indices").extract[Seq[Int]].toArray
val values = (jValue \ "values").extract[Seq[Double]].toArray
sparse(size, indices, values)
case 1 => // dense
val values = (jValue \ "values").extract[Seq[Double]].toArray
dense(values)
case _ =>
throw new IllegalArgumentException(s"Cannot parse $json into a vector.")
}
}
/**
* Creates a vector instance from a breeze vector.
*/
private[spark] def fromBreeze(breezeVector: BV[Double]): Vector = {
breezeVector match {
case v: BDV[Double] =>
if (v.offset == 0 && v.stride == 1 && v.length == v.data.length) {
new DenseVector(v.data)
} else {
new DenseVector(v.toArray) // Can't use underlying array directly, so make a new one
}
case v: BSV[Double] =>
if (v.index.length == v.used) {
new SparseVector(v.length, v.index, v.data)
} else {
new SparseVector(v.length, v.index.slice(0, v.used), v.data.slice(0, v.used))
}
case v: BV[_] =>
sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
}
}
/**
* Returns the p-norm of this vector.
* @param vector input vector.
* @param p norm.
* @return norm in L^p^ space.
*/
def norm(vector: Vector, p: Double): Double = {
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
s"You specified p=$p.")
val values = vector match {
case DenseVector(vs) => vs
case SparseVector(n, ids, vs) => vs
case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
}
val size = values.length
if (p == 1) {
var sum = 0.0
var i = 0
while (i < size) {
sum += math.abs(values(i))
i += 1
}
sum
} else if (p == 2) {
var sum = 0.0
var i = 0
while (i < size) {
sum += values(i) * values(i)
i += 1
}
math.sqrt(sum)
} else if (p == Double.PositiveInfinity) {
var max = 0.0
var i = 0
while (i < size) {
val value = math.abs(values(i))
if (value > max) max = value
i += 1
}
max
} else {
var sum = 0.0
var i = 0
while (i < size) {
sum += math.pow(math.abs(values(i)), p)
i += 1
}
math.pow(sum, 1.0 / p)
}
}
/**
* Returns the squared distance between two Vectors.
* @param v1 first Vector.
* @param v2 second Vector.
* @return squared distance between two Vectors.
*/
def sqdist(v1: Vector, v2: Vector): Double = {
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
s"=${v2.size}.")
var squaredDistance = 0.0
(v1, v2) match {
case (v1: SparseVector, v2: SparseVector) =>
val v1Values = v1.values
val v1Indices = v1.indices
val v2Values = v2.values
val v2Indices = v2.indices
val nnzv1 = v1Indices.length
val nnzv2 = v2Indices.length
var kv1 = 0
var kv2 = 0
while (kv1 < nnzv1 || kv2 < nnzv2) {
var score = 0.0
if (kv2 >= nnzv2 || (kv1 < nnzv1 && v1Indices(kv1) < v2Indices(kv2))) {
score = v1Values(kv1)
kv1 += 1
} else if (kv1 >= nnzv1 || (kv2 < nnzv2 && v2Indices(kv2) < v1Indices(kv1))) {
score = v2Values(kv2)
kv2 += 1
} else {
score = v1Values(kv1) - v2Values(kv2)
kv1 += 1
kv2 += 1
}
squaredDistance += score * score
}
case (v1: SparseVector, v2: DenseVector) =>
squaredDistance = sqdist(v1, v2)
case (v1: DenseVector, v2: SparseVector) =>
squaredDistance = sqdist(v2, v1)
case (DenseVector(vv1), DenseVector(vv2)) =>
var kv = 0
val sz = vv1.length
while (kv < sz) {
val score = vv1(kv) - vv2(kv)
squaredDistance += score * score
kv += 1
}
case _ =>
throw new IllegalArgumentException("Do not support vector type " + v1.getClass +
" and " + v2.getClass)
}
squaredDistance
}
/**
* Returns the squared distance between DenseVector and SparseVector.
*/
private[ml] def sqdist(v1: SparseVector, v2: DenseVector): Double = {
var kv1 = 0
var kv2 = 0
val indices = v1.indices
var squaredDistance = 0.0
val nnzv1 = indices.length
val nnzv2 = v2.size
var iv1 = if (nnzv1 > 0) indices(kv1) else -1
while (kv2 < nnzv2) {
var score = 0.0
if (kv2 != iv1) {
score = v2(kv2)
} else {
score = v1.values(kv1) - v2(kv2)
if (kv1 < nnzv1 - 1) {
kv1 += 1
iv1 = indices(kv1)
}
}
squaredDistance += score * score
kv2 += 1
}
squaredDistance
}
/**
* Check equality between sparse/dense vectors
*/
private[ml] def equals(
v1Indices: IndexedSeq[Int],
v1Values: Array[Double],
v2Indices: IndexedSeq[Int],
v2Values: Array[Double]): Boolean = {
val v1Size = v1Values.length
val v2Size = v2Values.length
var k1 = 0
var k2 = 0
var allEqual = true
while (allEqual) {
while (k1 < v1Size && v1Values(k1) == 0) k1 += 1
while (k2 < v2Size && v2Values(k2) == 0) k2 += 1
if (k1 >= v1Size || k2 >= v2Size) {
return k1 >= v1Size && k2 >= v2Size // check end alignment
}
allEqual = v1Indices(k1) == v2Indices(k2) && v1Values(k1) == v2Values(k2)
k1 += 1
k2 += 1
}
allEqual
}
/** Max number of nonzero entries used in computing hash code. */
private[linalg] val MAX_HASH_NNZ = 128
}
/**
* A dense vector represented by a value array.
*/
class DenseVector (val values: Array[Double]) extends Vector {
override def size: Int = values.length
override def toString: String = values.mkString("[", ",", "]")
override def toArray: Array[Double] = values
private[spark] override def toBreeze: BV[Double] = new BDV[Double](values)
override def apply(i: Int): Double = values(i)
override def copy: DenseVector = {
new DenseVector(values.clone())
}
override def foreachActive(f: (Int, Double) => Unit): Unit = {
var i = 0
val localValuesSize = values.length
val localValues = values
while (i < localValuesSize) {
f(i, localValues(i))
i += 1
}
}
override def hashCode(): Int = {
var result: Int = 31 + size
var i = 0
val end = values.length
var nnz = 0
while (i < end && nnz < Vectors.MAX_HASH_NNZ) {
val v = values(i)
if (v != 0.0) {
result = 31 * result + i
val bits = java.lang.Double.doubleToLongBits(values(i))
result = 31 * result + (bits ^ (bits >>> 32)).toInt
nnz += 1
}
i += 1
}
result
}
override def numActives: Int = size
override def numNonzeros: Int = {
// same as values.count(_ != 0.0) but faster
var nnz = 0
values.foreach { v =>
if (v != 0.0) {
nnz += 1
}
}
nnz
}
override def toSparse: SparseVector = {
val nnz = numNonzeros
val ii = new Array[Int](nnz)
val vv = new Array[Double](nnz)
var k = 0
foreachActive { (i, v) =>
if (v != 0) {
ii(k) = i
vv(k) = v
k += 1
}
}
new SparseVector(size, ii, vv)
}
override def argmax: Int = {
if (size == 0) {
-1
} else {
var maxIdx = 0
var maxValue = values(0)
var i = 1
while (i < size) {
if (values(i) > maxValue) {
maxIdx = i
maxValue = values(i)
}
i += 1
}
maxIdx
}
}
override def toJson: String = {
val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
compact(render(jValue))
}
}
object DenseVector {
/** Extracts the value array from a dense vector. */
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
}
/**
* A sparse vector represented by an index array and an value array.
*
* @param size size of the vector.
* @param indices index array, assume to be strictly increasing.
* @param values value array, must have the same length as the index array.
*/
class SparseVector (
override val size: Int,
val indices: Array[Int],
val values: Array[Double]) extends Vector {
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
s" ${values.length} values.")
require(indices.length <= size, s"You provided ${indices.length} indices and values, " +
s"which exceeds the specified vector size ${size}.")
override def toString: String =
s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
override def toArray: Array[Double] = {
val data = new Array[Double](size)
var i = 0
val nnz = indices.length
while (i < nnz) {
data(indices(i)) = values(i)
i += 1
}
data
}
override def copy: SparseVector = {
new SparseVector(size, indices.clone(), values.clone())
}
private[spark] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
override def foreachActive(f: (Int, Double) => Unit): Unit = {
var i = 0
val localValuesSize = values.length
val localIndices = indices
val localValues = values
while (i < localValuesSize) {
f(localIndices(i), localValues(i))
i += 1
}
}
override def hashCode(): Int = {
var result: Int = 31 + size
val end = values.length
var k = 0
var nnz = 0
while (k < end && nnz < Vectors.MAX_HASH_NNZ) {
val v = values(k)
if (v != 0.0) {
val i = indices(k)
result = 31 * result + i
val bits = java.lang.Double.doubleToLongBits(v)
result = 31 * result + (bits ^ (bits >>> 32)).toInt
nnz += 1
}
k += 1
}
result
}
override def numActives: Int = values.length
override def numNonzeros: Int = {
var nnz = 0
values.foreach { v =>
if (v != 0.0) {
nnz += 1
}
}
nnz
}
override def toSparse: SparseVector = {
val nnz = numNonzeros
if (nnz == numActives) {
this
} else {
val ii = new Array[Int](nnz)
val vv = new Array[Double](nnz)
var k = 0
foreachActive { (i, v) =>
if (v != 0.0) {
ii(k) = i
vv(k) = v
k += 1
}
}
new SparseVector(size, ii, vv)
}
}
override def argmax: Int = {
if (size == 0) {
-1
} else {
// Find the max active entry.
var maxIdx = indices(0)
var maxValue = values(0)
var maxJ = 0
var j = 1
val na = numActives
while (j < na) {
val v = values(j)
if (v > maxValue) {
maxValue = v
maxIdx = indices(j)
maxJ = j
}
j += 1
}
// If the max active entry is nonpositive and there exists inactive ones, find the first zero.
if (maxValue <= 0.0 && na < size) {
if (maxValue == 0.0) {
// If there exists an inactive entry before maxIdx, find it and return its index.
if (maxJ < maxIdx) {
var k = 0
while (k < maxJ && indices(k) == k) {
k += 1
}
maxIdx = k
}
} else {
// If the max active value is negative, find and return the first inactive index.
var k = 0
while (k < na && indices(k) == k) {
k += 1
}
maxIdx = k
}
}
maxIdx
}
}
/**
* Create a slice of this vector based on the given indices.
* @param selectedIndices Unsorted list of indices into the vector.
* This does NOT do bound checking.
* @return New SparseVector with values in the order specified by the given indices.
*
* NOTE: The API needs to be discussed before making this public.
* Also, if we have a version assuming indices are sorted, we should optimize it.
*/
private[spark] def slice(selectedIndices: Array[Int]): SparseVector = {
var currentIdx = 0
val (sliceInds, sliceVals) = selectedIndices.flatMap { origIdx =>
val iIdx = java.util.Arrays.binarySearch(this.indices, origIdx)
val i_v = if (iIdx >= 0) {
Iterator((currentIdx, this.values(iIdx)))
} else {
Iterator()
}
currentIdx += 1
i_v
}.unzip
new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray)
}
override def toJson: String = {
val jValue = ("type" -> 0) ~
("size" -> size) ~
("indices" -> indices.toSeq) ~
("values" -> values.toSeq)
compact(render(jValue))
}
}
object SparseVector {
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
}

View file

@ -1,28 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml
import org.scalatest.FunSuite // scalastyle:ignore funsuite
// This is testing if the new build works. To be removed soon.
class DummyTestingSuite extends FunSuite { // scalastyle:ignore funsuite
test("This is testing if the new build works.") {
assert(DummyTesting.add10(15) === 25)
}
}

View file

@ -17,7 +17,14 @@
package org.apache.spark.ml
// This is a private class testing if the new build works. To be removed soon.
private[ml] object DummyTesting {
private[ml] def add10(input: Double): Double = input + 10
// scalastyle:off
import org.scalatest.{BeforeAndAfterAll, FunSuite}
/**
* Base abstract class for all unit tests in Spark for handling common functionality.
*/
private[spark] abstract class SparkMLFunSuite
extends FunSuite
with BeforeAndAfterAll {
// scalastyle:on
}

View file

@ -0,0 +1,408 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import org.apache.spark.ml.SparkMLFunSuite
import org.apache.spark.ml.linalg.BLAS._
import org.apache.spark.ml.util.TestingUtils._
class BLASSuite extends SparkMLFunSuite {
test("copy") {
val sx = Vectors.sparse(4, Array(0, 2), Array(1.0, -2.0))
val dx = Vectors.dense(1.0, 0.0, -2.0, 0.0)
val sy = Vectors.sparse(4, Array(0, 1, 3), Array(2.0, 1.0, 1.0))
val dy = Array(2.0, 1.0, 0.0, 1.0)
val dy1 = Vectors.dense(dy.clone())
copy(sx, dy1)
assert(dy1 ~== dx absTol 1e-15)
val dy2 = Vectors.dense(dy.clone())
copy(dx, dy2)
assert(dy2 ~== dx absTol 1e-15)
intercept[IllegalArgumentException] {
copy(sx, sy)
}
intercept[IllegalArgumentException] {
copy(dx, sy)
}
withClue("vector sizes must match") {
intercept[Exception] {
copy(sx, Vectors.dense(0.0, 1.0, 2.0))
}
}
}
test("scal") {
val a = 0.1
val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
val dx = Vectors.dense(1.0, 0.0, -2.0)
scal(a, sx)
assert(sx ~== Vectors.sparse(3, Array(0, 2), Array(0.1, -0.2)) absTol 1e-15)
scal(a, dx)
assert(dx ~== Vectors.dense(0.1, 0.0, -0.2) absTol 1e-15)
}
test("axpy") {
val alpha = 0.1
val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
val dx = Vectors.dense(1.0, 0.0, -2.0)
val dy = Array(2.0, 1.0, 0.0)
val expected = Vectors.dense(2.1, 1.0, -0.2)
val dy1 = Vectors.dense(dy.clone())
axpy(alpha, sx, dy1)
assert(dy1 ~== expected absTol 1e-15)
val dy2 = Vectors.dense(dy.clone())
axpy(alpha, dx, dy2)
assert(dy2 ~== expected absTol 1e-15)
val sy = Vectors.sparse(4, Array(0, 1), Array(2.0, 1.0))
intercept[IllegalArgumentException] {
axpy(alpha, sx, sy)
}
intercept[IllegalArgumentException] {
axpy(alpha, dx, sy)
}
withClue("vector sizes must match") {
intercept[Exception] {
axpy(alpha, sx, Vectors.dense(1.0, 2.0))
}
}
}
test("dot") {
val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
val dx = Vectors.dense(1.0, 0.0, -2.0)
val sy = Vectors.sparse(3, Array(0, 1), Array(2.0, 1.0))
val dy = Vectors.dense(2.0, 1.0, 0.0)
assert(dot(sx, sy) ~== 2.0 absTol 1e-15)
assert(dot(sy, sx) ~== 2.0 absTol 1e-15)
assert(dot(sx, dy) ~== 2.0 absTol 1e-15)
assert(dot(dy, sx) ~== 2.0 absTol 1e-15)
assert(dot(dx, dy) ~== 2.0 absTol 1e-15)
assert(dot(dy, dx) ~== 2.0 absTol 1e-15)
assert(dot(sx, sx) ~== 5.0 absTol 1e-15)
assert(dot(dx, dx) ~== 5.0 absTol 1e-15)
assert(dot(sx, dx) ~== 5.0 absTol 1e-15)
assert(dot(dx, sx) ~== 5.0 absTol 1e-15)
val sx1 = Vectors.sparse(10, Array(0, 3, 5, 7, 8), Array(1.0, 2.0, 3.0, 4.0, 5.0))
val sx2 = Vectors.sparse(10, Array(1, 3, 6, 7, 9), Array(1.0, 2.0, 3.0, 4.0, 5.0))
assert(dot(sx1, sx2) ~== 20.0 absTol 1e-15)
assert(dot(sx2, sx1) ~== 20.0 absTol 1e-15)
withClue("vector sizes must match") {
intercept[Exception] {
dot(sx, Vectors.dense(2.0, 1.0))
}
}
}
test("spr") {
// test dense vector
val alpha = 0.1
val x = new DenseVector(Array(1.0, 2, 2.1, 4))
val U = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
val expected = new DenseVector(Array(1.1, 2.2, 2.4, 3.21, 3.42, 3.441, 4.4, 4.8, 4.84, 5.6))
spr(alpha, x, U)
assert(U ~== expected absTol 1e-9)
val matrix33 = new DenseVector(Array(1.0, 2, 3, 4, 5))
withClue("Size of vector must match the rank of matrix") {
intercept[Exception] {
spr(alpha, x, matrix33)
}
}
// test sparse vector
val sv = new SparseVector(4, Array(0, 3), Array(1.0, 2))
val U2 = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
spr(0.1, sv, U2)
val expectedSparse = new DenseVector(Array(1.1, 2.0, 2.0, 3.0, 3.0, 3.0, 4.2, 4.0, 4.0, 4.4))
assert(U2 ~== expectedSparse absTol 1e-15)
}
test("syr") {
val dA = new DenseMatrix(4, 4,
Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
val x = new DenseVector(Array(0.0, 2.7, 3.5, 2.1))
val alpha = 0.15
val expected = new DenseMatrix(4, 4,
Array(0.0, 1.2, 2.2, 3.1, 1.2, 4.2935, 6.7175, 5.4505, 2.2, 6.7175, 3.6375, 4.1025, 3.1,
5.4505, 4.1025, 1.4615))
syr(alpha, x, dA)
assert(dA ~== expected absTol 1e-15)
val dB =
new DenseMatrix(3, 4, Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0))
withClue("Matrix A must be a symmetric Matrix") {
intercept[Exception] {
syr(alpha, x, dB)
}
}
val dC =
new DenseMatrix(3, 3, Array(0.0, 1.2, 2.2, 1.2, 3.2, 5.3, 2.2, 5.3, 1.8))
withClue("Size of vector must match the rank of matrix") {
intercept[Exception] {
syr(alpha, x, dC)
}
}
val y = new DenseVector(Array(0.0, 2.7, 3.5, 2.1, 1.5))
withClue("Size of vector must match the rank of matrix") {
intercept[Exception] {
syr(alpha, y, dA)
}
}
val xSparse = new SparseVector(4, Array(0, 2, 3), Array(1.0, 3.0, 4.0))
val dD = new DenseMatrix(4, 4,
Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
syr(0.1, xSparse, dD)
val expectedSparse = new DenseMatrix(4, 4,
Array(0.1, 1.2, 2.5, 3.5, 1.2, 3.2, 5.3, 4.6, 2.5, 5.3, 2.7, 4.2, 3.5, 4.6, 4.2, 2.4))
assert(dD ~== expectedSparse absTol 1e-15)
}
test("gemm") {
val dA =
new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0))
val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0))
val BTman = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0))
val BT = B.transpose
assert(dA.multiply(B) ~== expected absTol 1e-15)
assert(sA.multiply(B) ~== expected absTol 1e-15)
val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0))
val C2 = C1.copy
val C3 = C1.copy
val C4 = C1.copy
val C5 = C1.copy
val C6 = C1.copy
val C7 = C1.copy
val C8 = C1.copy
val C9 = C1.copy
val C10 = C1.copy
val C11 = C1.copy
val C12 = C1.copy
val C13 = C1.copy
val C14 = C1.copy
val C15 = C1.copy
val C16 = C1.copy
val C17 = C1.copy
val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0))
val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0))
val expected4 = new DenseMatrix(4, 2, Array(5.0, 0.0, 10.0, 5.0, 0.0, 0.0, 5.0, 0.0))
val expected5 = C1.copy
gemm(1.0, dA, B, 2.0, C1)
gemm(1.0, sA, B, 2.0, C2)
gemm(2.0, dA, B, 2.0, C3)
gemm(2.0, sA, B, 2.0, C4)
assert(C1 ~== expected2 absTol 1e-15)
assert(C2 ~== expected2 absTol 1e-15)
assert(C3 ~== expected3 absTol 1e-15)
assert(C4 ~== expected3 absTol 1e-15)
gemm(1.0, dA, B, 0.0, C17)
assert(C17 ~== expected absTol 1e-15)
gemm(1.0, sA, B, 0.0, C17)
assert(C17 ~== expected absTol 1e-15)
withClue("columns of A don't match the rows of B") {
intercept[Exception] {
gemm(1.0, dA.transpose, B, 2.0, C1)
}
}
val dATman =
new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
val sATman =
new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
val dATT = dATman.transpose
val sATT = sATman.transpose
val BTT = BTman.transpose.asInstanceOf[DenseMatrix]
assert(dATT.multiply(B) ~== expected absTol 1e-15)
assert(sATT.multiply(B) ~== expected absTol 1e-15)
assert(dATT.multiply(BTT) ~== expected absTol 1e-15)
assert(sATT.multiply(BTT) ~== expected absTol 1e-15)
gemm(1.0, dATT, BTT, 2.0, C5)
gemm(1.0, sATT, BTT, 2.0, C6)
gemm(2.0, dATT, BTT, 2.0, C7)
gemm(2.0, sATT, BTT, 2.0, C8)
gemm(1.0, dA, BTT, 2.0, C9)
gemm(1.0, sA, BTT, 2.0, C10)
gemm(2.0, dA, BTT, 2.0, C11)
gemm(2.0, sA, BTT, 2.0, C12)
assert(C5 ~== expected2 absTol 1e-15)
assert(C6 ~== expected2 absTol 1e-15)
assert(C7 ~== expected3 absTol 1e-15)
assert(C8 ~== expected3 absTol 1e-15)
assert(C9 ~== expected2 absTol 1e-15)
assert(C10 ~== expected2 absTol 1e-15)
assert(C11 ~== expected3 absTol 1e-15)
assert(C12 ~== expected3 absTol 1e-15)
gemm(0, dA, B, 5, C13)
gemm(0, sA, B, 5, C14)
gemm(0, dA, B, 1, C15)
gemm(0, sA, B, 1, C16)
assert(C13 ~== expected4 absTol 1e-15)
assert(C14 ~== expected4 absTol 1e-15)
assert(C15 ~== expected5 absTol 1e-15)
assert(C16 ~== expected5 absTol 1e-15)
}
test("gemv") {
val dA =
new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
val dA2 =
new DenseMatrix(4, 3, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0), true)
val sA2 =
new SparseMatrix(4, 3, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0),
true)
val dx = new DenseVector(Array(1.0, 2.0, 3.0))
val sx = dx.toSparse
val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
assert(dA.multiply(dx) ~== expected absTol 1e-15)
assert(sA.multiply(dx) ~== expected absTol 1e-15)
assert(dA.multiply(sx) ~== expected absTol 1e-15)
assert(sA.multiply(sx) ~== expected absTol 1e-15)
val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
val y2 = y1.copy
val y3 = y1.copy
val y4 = y1.copy
val y5 = y1.copy
val y6 = y1.copy
val y7 = y1.copy
val y8 = y1.copy
val y9 = y1.copy
val y10 = y1.copy
val y11 = y1.copy
val y12 = y1.copy
val y13 = y1.copy
val y14 = y1.copy
val y15 = y1.copy
val y16 = y1.copy
val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))
gemv(1.0, dA, dx, 2.0, y1)
gemv(1.0, sA, dx, 2.0, y2)
gemv(1.0, dA, sx, 2.0, y3)
gemv(1.0, sA, sx, 2.0, y4)
gemv(1.0, dA2, dx, 2.0, y5)
gemv(1.0, sA2, dx, 2.0, y6)
gemv(1.0, dA2, sx, 2.0, y7)
gemv(1.0, sA2, sx, 2.0, y8)
gemv(2.0, dA, dx, 2.0, y9)
gemv(2.0, sA, dx, 2.0, y10)
gemv(2.0, dA, sx, 2.0, y11)
gemv(2.0, sA, sx, 2.0, y12)
gemv(2.0, dA2, dx, 2.0, y13)
gemv(2.0, sA2, dx, 2.0, y14)
gemv(2.0, dA2, sx, 2.0, y15)
gemv(2.0, sA2, sx, 2.0, y16)
assert(y1 ~== expected2 absTol 1e-15)
assert(y2 ~== expected2 absTol 1e-15)
assert(y3 ~== expected2 absTol 1e-15)
assert(y4 ~== expected2 absTol 1e-15)
assert(y5 ~== expected2 absTol 1e-15)
assert(y6 ~== expected2 absTol 1e-15)
assert(y7 ~== expected2 absTol 1e-15)
assert(y8 ~== expected2 absTol 1e-15)
assert(y9 ~== expected3 absTol 1e-15)
assert(y10 ~== expected3 absTol 1e-15)
assert(y11 ~== expected3 absTol 1e-15)
assert(y12 ~== expected3 absTol 1e-15)
assert(y13 ~== expected3 absTol 1e-15)
assert(y14 ~== expected3 absTol 1e-15)
assert(y15 ~== expected3 absTol 1e-15)
assert(y16 ~== expected3 absTol 1e-15)
withClue("columns of A don't match the rows of B") {
intercept[Exception] {
gemv(1.0, dA.transpose, dx, 2.0, y1)
}
intercept[Exception] {
gemv(1.0, sA.transpose, dx, 2.0, y1)
}
intercept[Exception] {
gemv(1.0, dA.transpose, sx, 2.0, y1)
}
intercept[Exception] {
gemv(1.0, sA.transpose, sx, 2.0, y1)
}
}
val dAT =
new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
val sAT =
new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
val dATT = dAT.transpose
val sATT = sAT.transpose
assert(dATT.multiply(dx) ~== expected absTol 1e-15)
assert(sATT.multiply(dx) ~== expected absTol 1e-15)
assert(dATT.multiply(sx) ~== expected absTol 1e-15)
assert(sATT.multiply(sx) ~== expected absTol 1e-15)
}
}

View file

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM}
import org.apache.spark.ml.SparkMLFunSuite
class BreezeMatrixConversionSuite extends SparkMLFunSuite {
test("dense matrix to breeze") {
val mat = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
val breeze = mat.toBreeze.asInstanceOf[BDM[Double]]
assert(breeze.rows === mat.numRows)
assert(breeze.cols === mat.numCols)
assert(breeze.data.eq(mat.asInstanceOf[DenseMatrix].values), "should not copy data")
}
test("dense breeze matrix to matrix") {
val breeze = new BDM[Double](3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
val mat = Matrices.fromBreeze(breeze).asInstanceOf[DenseMatrix]
assert(mat.numRows === breeze.rows)
assert(mat.numCols === breeze.cols)
assert(mat.values.eq(breeze.data), "should not copy data")
// transposed matrix
val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[DenseMatrix]
assert(matTransposed.numRows === breeze.cols)
assert(matTransposed.numCols === breeze.rows)
assert(matTransposed.values.eq(breeze.data), "should not copy data")
}
test("sparse matrix to breeze") {
val values = Array(1.0, 2.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(1, 2, 1, 2)
val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values)
val breeze = mat.toBreeze.asInstanceOf[BSM[Double]]
assert(breeze.rows === mat.numRows)
assert(breeze.cols === mat.numCols)
assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data")
}
test("sparse breeze matrix to sparse matrix") {
val values = Array(1.0, 2.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(1, 2, 1, 2)
val breeze = new BSM[Double](values, 3, 2, colPtrs, rowIndices)
val mat = Matrices.fromBreeze(breeze).asInstanceOf[SparseMatrix]
assert(mat.numRows === breeze.rows)
assert(mat.numCols === breeze.cols)
assert(mat.values.eq(breeze.data), "should not copy data")
val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[SparseMatrix]
assert(matTransposed.numRows === breeze.cols)
assert(matTransposed.numCols === breeze.rows)
assert(!matTransposed.values.eq(breeze.data), "has to copy data")
}
}

View file

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
import org.apache.spark.ml.SparkMLFunSuite
/**
* Test Breeze vector conversions.
*/
class BreezeVectorConversionSuite extends SparkMLFunSuite {
val arr = Array(0.1, 0.2, 0.3, 0.4)
val n = 20
val indices = Array(0, 3, 5, 10, 13)
val values = Array(0.1, 0.5, 0.3, -0.8, -1.0)
test("dense to breeze") {
val vec = Vectors.dense(arr)
assert(vec.toBreeze === new BDV[Double](arr))
}
test("sparse to breeze") {
val vec = Vectors.sparse(n, indices, values)
assert(vec.toBreeze === new BSV[Double](indices, values, n))
}
test("dense breeze to vector") {
val breeze = new BDV[Double](arr)
val vec = Vectors.fromBreeze(breeze).asInstanceOf[DenseVector]
assert(vec.size === arr.length)
assert(vec.values.eq(arr), "should not copy data")
}
test("sparse breeze to vector") {
val breeze = new BSV[Double](indices, values, n)
val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
assert(vec.size === n)
assert(vec.indices.eq(indices), "should not copy data")
assert(vec.values.eq(values), "should not copy data")
}
test("sparse breeze with partially-used arrays to vector") {
val activeSize = 3
val breeze = new BSV[Double](indices, values, activeSize, n)
val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
assert(vec.size === n)
assert(vec.indices === indices.slice(0, activeSize))
assert(vec.values === values.slice(0, activeSize))
}
}

View file

@ -0,0 +1,511 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import java.util.Random
import breeze.linalg.{CSCMatrix, Matrix => BM}
import org.mockito.Mockito.when
import org.scalatest.mock.MockitoSugar._
import scala.collection.mutable.{Map => MutableMap}
import org.apache.spark.ml.SparkMLFunSuite
import org.apache.spark.ml.util.TestingUtils._
class MatricesSuite extends SparkMLFunSuite {
test("dense matrix construction") {
val m = 3
val n = 2
val values = Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)
val mat = Matrices.dense(m, n, values).asInstanceOf[DenseMatrix]
assert(mat.numRows === m)
assert(mat.numCols === n)
assert(mat.values.eq(values), "should not copy data")
}
test("dense matrix construction with wrong dimension") {
intercept[RuntimeException] {
Matrices.dense(3, 2, Array(0.0, 1.0, 2.0))
}
}
test("sparse matrix construction") {
val m = 3
val n = 4
val values = Array(1.0, 2.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 2, 4, 4)
val rowIndices = Array(1, 2, 1, 2)
val mat = Matrices.sparse(m, n, colPtrs, rowIndices, values).asInstanceOf[SparseMatrix]
assert(mat.numRows === m)
assert(mat.numCols === n)
assert(mat.values.eq(values), "should not copy data")
assert(mat.colPtrs.eq(colPtrs), "should not copy data")
assert(mat.rowIndices.eq(rowIndices), "should not copy data")
val entries: Array[(Int, Int, Double)] = Array((2, 2, 3.0), (1, 0, 1.0), (2, 0, 2.0),
(1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0))
val mat2 = SparseMatrix.fromCOO(m, n, entries)
assert(mat.toBreeze === mat2.toBreeze)
assert(mat2.values.length == 4)
}
test("sparse matrix construction with wrong number of elements") {
intercept[IllegalArgumentException] {
Matrices.sparse(3, 2, Array(0, 1), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
}
intercept[IllegalArgumentException] {
Matrices.sparse(3, 2, Array(0, 1, 2), Array(1, 2), Array(0.0, 1.0, 2.0))
}
}
test("index in matrices incorrect input") {
val sm = Matrices.sparse(3, 2, Array(0, 2, 3), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
val dm = Matrices.dense(3, 2, Array(0.0, 2.3, 1.4, 3.2, 1.0, 9.1))
Array(sm, dm).foreach { mat =>
intercept[IllegalArgumentException] { mat.index(4, 1) }
intercept[IllegalArgumentException] { mat.index(1, 4) }
intercept[IllegalArgumentException] { mat.index(-1, 2) }
intercept[IllegalArgumentException] { mat.index(1, -2) }
}
}
test("equals") {
val dm1 = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0))
assert(dm1 === dm1)
assert(dm1 !== dm1.transpose)
val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0))
assert(dm1 === dm2.transpose)
val sm1 = dm1.asInstanceOf[DenseMatrix].toSparse
assert(sm1 === sm1)
assert(sm1 === dm1)
assert(sm1 !== sm1.transpose)
val sm2 = dm2.asInstanceOf[DenseMatrix].toSparse
assert(sm1 === sm2.transpose)
assert(sm1 === dm2.transpose)
}
test("matrix copies are deep copies") {
val m = 3
val n = 2
val denseMat = Matrices.dense(m, n, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
val denseCopy = denseMat.copy
assert(!denseMat.toArray.eq(denseCopy.toArray))
val values = Array(1.0, 2.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(1, 2, 1, 2)
val sparseMat = Matrices.sparse(m, n, colPtrs, rowIndices, values)
val sparseCopy = sparseMat.copy
assert(!sparseMat.toArray.eq(sparseCopy.toArray))
}
test("matrix indexing and updating") {
val m = 3
val n = 2
val allValues = Array(0.0, 1.0, 2.0, 3.0, 4.0, 0.0)
val denseMat = new DenseMatrix(m, n, allValues)
assert(denseMat(0, 1) === 3.0)
assert(denseMat(0, 1) === denseMat.values(3))
assert(denseMat(0, 1) === denseMat(3))
assert(denseMat(0, 0) === 0.0)
denseMat.update(0, 0, 10.0)
assert(denseMat(0, 0) === 10.0)
assert(denseMat.values(0) === 10.0)
val sparseValues = Array(1.0, 2.0, 3.0, 4.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(1, 2, 0, 1)
val sparseMat = new SparseMatrix(m, n, colPtrs, rowIndices, sparseValues)
assert(sparseMat(0, 1) === 3.0)
assert(sparseMat(0, 1) === sparseMat.values(2))
assert(sparseMat(0, 0) === 0.0)
intercept[NoSuchElementException] {
sparseMat.update(0, 0, 10.0)
}
intercept[NoSuchElementException] {
sparseMat.update(2, 1, 10.0)
}
sparseMat.update(0, 1, 10.0)
assert(sparseMat(0, 1) === 10.0)
assert(sparseMat.values(2) === 10.0)
}
test("toSparse, toDense") {
val m = 3
val n = 2
val values = Array(1.0, 2.0, 4.0, 5.0)
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(0, 1, 1, 2)
val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
val deMat1 = new DenseMatrix(m, n, allValues)
val spMat2 = deMat1.toSparse
val deMat2 = spMat1.toDense
assert(spMat1.toBreeze === spMat2.toBreeze)
assert(deMat1.toBreeze === deMat2.toBreeze)
}
test("map, update") {
val m = 3
val n = 2
val values = Array(1.0, 2.0, 4.0, 5.0)
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(0, 1, 1, 2)
val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
val deMat1 = new DenseMatrix(m, n, allValues)
val deMat2 = deMat1.map(_ * 2)
val spMat2 = spMat1.map(_ * 2)
deMat1.update(_ * 2)
spMat1.update(_ * 2)
assert(spMat1.toArray === spMat2.toArray)
assert(deMat1.toArray === deMat2.toArray)
}
test("transpose") {
val dA =
new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
val dAT = dA.transpose.asInstanceOf[DenseMatrix]
val sAT = sA.transpose.asInstanceOf[SparseMatrix]
val dATexpected =
new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
val sATexpected =
new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
assert(dAT.toBreeze === dATexpected.toBreeze)
assert(sAT.toBreeze === sATexpected.toBreeze)
assert(dA(1, 0) === dAT(0, 1))
assert(dA(2, 1) === dAT(1, 2))
assert(sA(1, 0) === sAT(0, 1))
assert(sA(2, 1) === sAT(1, 2))
assert(!dA.toArray.eq(dAT.toArray), "has to have a new array")
assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array")
assert(dAT.toSparse.toBreeze === sATexpected.toBreeze)
assert(sAT.toDense.toBreeze === dATexpected.toBreeze)
}
test("foreachActive") {
val m = 3
val n = 2
val values = Array(1.0, 2.0, 4.0, 5.0)
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(0, 1, 1, 2)
val sp = new SparseMatrix(m, n, colPtrs, rowIndices, values)
val dn = new DenseMatrix(m, n, allValues)
val dnMap = MutableMap[(Int, Int), Double]()
dn.foreachActive { (i, j, value) =>
dnMap.put((i, j), value)
}
assert(dnMap.size === 6)
assert(dnMap(0, 0) === 1.0)
assert(dnMap(1, 0) === 2.0)
assert(dnMap(2, 0) === 0.0)
assert(dnMap(0, 1) === 0.0)
assert(dnMap(1, 1) === 4.0)
assert(dnMap(2, 1) === 5.0)
val spMap = MutableMap[(Int, Int), Double]()
sp.foreachActive { (i, j, value) =>
spMap.put((i, j), value)
}
assert(spMap.size === 4)
assert(spMap(0, 0) === 1.0)
assert(spMap(1, 0) === 2.0)
assert(spMap(1, 1) === 4.0)
assert(spMap(2, 1) === 5.0)
}
test("horzcat, vertcat, eye, speye") {
val m = 3
val n = 2
val values = Array(1.0, 2.0, 4.0, 5.0)
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
val colPtrs = Array(0, 2, 4)
val rowIndices = Array(0, 1, 1, 2)
// transposed versions
val allValuesT = Array(1.0, 0.0, 2.0, 4.0, 0.0, 5.0)
val colPtrsT = Array(0, 1, 3, 4)
val rowIndicesT = Array(0, 0, 1, 1)
val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
val deMat1 = new DenseMatrix(m, n, allValues)
val spMat1T = new SparseMatrix(n, m, colPtrsT, rowIndicesT, values)
val deMat1T = new DenseMatrix(n, m, allValuesT)
// should equal spMat1 & deMat1 respectively
val spMat1TT = spMat1T.transpose
val deMat1TT = deMat1T.transpose
val deMat2 = Matrices.eye(3)
val spMat2 = Matrices.speye(3)
val deMat3 = Matrices.eye(2)
val spMat3 = Matrices.speye(2)
val spHorz = Matrices.horzcat(Array(spMat1, spMat2))
val spHorz2 = Matrices.horzcat(Array(spMat1, deMat2))
val spHorz3 = Matrices.horzcat(Array(deMat1, spMat2))
val deHorz1 = Matrices.horzcat(Array(deMat1, deMat2))
val deHorz2 = Matrices.horzcat(Array[Matrix]())
assert(deHorz1.numRows === 3)
assert(spHorz2.numRows === 3)
assert(spHorz3.numRows === 3)
assert(spHorz.numRows === 3)
assert(deHorz1.numCols === 5)
assert(spHorz2.numCols === 5)
assert(spHorz3.numCols === 5)
assert(spHorz.numCols === 5)
assert(deHorz2.numRows === 0)
assert(deHorz2.numCols === 0)
assert(deHorz2.toArray.length === 0)
assert(deHorz1 ~== spHorz2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
assert(spHorz2 ~== spHorz3 absTol 1e-15)
assert(spHorz(0, 0) === 1.0)
assert(spHorz(2, 1) === 5.0)
assert(spHorz(0, 2) === 1.0)
assert(spHorz(1, 2) === 0.0)
assert(spHorz(1, 3) === 1.0)
assert(spHorz(2, 4) === 1.0)
assert(spHorz(1, 4) === 0.0)
assert(deHorz1(0, 0) === 1.0)
assert(deHorz1(2, 1) === 5.0)
assert(deHorz1(0, 2) === 1.0)
assert(deHorz1(1, 2) == 0.0)
assert(deHorz1(1, 3) === 1.0)
assert(deHorz1(2, 4) === 1.0)
assert(deHorz1(1, 4) === 0.0)
// containing transposed matrices
val spHorzT = Matrices.horzcat(Array(spMat1TT, spMat2))
val spHorz2T = Matrices.horzcat(Array(spMat1TT, deMat2))
val spHorz3T = Matrices.horzcat(Array(deMat1TT, spMat2))
val deHorz1T = Matrices.horzcat(Array(deMat1TT, deMat2))
assert(deHorz1T ~== deHorz1 absTol 1e-15)
assert(spHorzT ~== spHorz absTol 1e-15)
assert(spHorz2T ~== spHorz2 absTol 1e-15)
assert(spHorz3T ~== spHorz3 absTol 1e-15)
intercept[IllegalArgumentException] {
Matrices.horzcat(Array(spMat1, spMat3))
}
intercept[IllegalArgumentException] {
Matrices.horzcat(Array(deMat1, spMat3))
}
val spVert = Matrices.vertcat(Array(spMat1, spMat3))
val deVert1 = Matrices.vertcat(Array(deMat1, deMat3))
val spVert2 = Matrices.vertcat(Array(spMat1, deMat3))
val spVert3 = Matrices.vertcat(Array(deMat1, spMat3))
val deVert2 = Matrices.vertcat(Array[Matrix]())
assert(deVert1.numRows === 5)
assert(spVert2.numRows === 5)
assert(spVert3.numRows === 5)
assert(spVert.numRows === 5)
assert(deVert1.numCols === 2)
assert(spVert2.numCols === 2)
assert(spVert3.numCols === 2)
assert(spVert.numCols === 2)
assert(deVert2.numRows === 0)
assert(deVert2.numCols === 0)
assert(deVert2.toArray.length === 0)
assert(deVert1 ~== spVert2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
assert(spVert2 ~== spVert3 absTol 1e-15)
assert(spVert(0, 0) === 1.0)
assert(spVert(2, 1) === 5.0)
assert(spVert(3, 0) === 1.0)
assert(spVert(3, 1) === 0.0)
assert(spVert(4, 1) === 1.0)
assert(deVert1(0, 0) === 1.0)
assert(deVert1(2, 1) === 5.0)
assert(deVert1(3, 0) === 1.0)
assert(deVert1(3, 1) === 0.0)
assert(deVert1(4, 1) === 1.0)
// containing transposed matrices
val spVertT = Matrices.vertcat(Array(spMat1TT, spMat3))
val deVert1T = Matrices.vertcat(Array(deMat1TT, deMat3))
val spVert2T = Matrices.vertcat(Array(spMat1TT, deMat3))
val spVert3T = Matrices.vertcat(Array(deMat1TT, spMat3))
assert(deVert1T ~== deVert1 absTol 1e-15)
assert(spVertT ~== spVert absTol 1e-15)
assert(spVert2T ~== spVert2 absTol 1e-15)
assert(spVert3T ~== spVert3 absTol 1e-15)
intercept[IllegalArgumentException] {
Matrices.vertcat(Array(spMat1, spMat2))
}
intercept[IllegalArgumentException] {
Matrices.vertcat(Array(deMat1, spMat2))
}
}
test("zeros") {
val mat = Matrices.zeros(2, 3).asInstanceOf[DenseMatrix]
assert(mat.numRows === 2)
assert(mat.numCols === 3)
assert(mat.values.forall(_ == 0.0))
}
test("ones") {
val mat = Matrices.ones(2, 3).asInstanceOf[DenseMatrix]
assert(mat.numRows === 2)
assert(mat.numCols === 3)
assert(mat.values.forall(_ == 1.0))
}
test("eye") {
val mat = Matrices.eye(2).asInstanceOf[DenseMatrix]
assert(mat.numCols === 2)
assert(mat.numCols === 2)
assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 1.0))
}
test("rand") {
val rng = mock[Random]
when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0)
val mat = Matrices.rand(2, 2, rng).asInstanceOf[DenseMatrix]
assert(mat.numRows === 2)
assert(mat.numCols === 2)
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
}
test("randn") {
val rng = mock[Random]
when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
val mat = Matrices.randn(2, 2, rng).asInstanceOf[DenseMatrix]
assert(mat.numRows === 2)
assert(mat.numCols === 2)
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
}
test("diag") {
val mat = Matrices.diag(Vectors.dense(1.0, 2.0)).asInstanceOf[DenseMatrix]
assert(mat.numRows === 2)
assert(mat.numCols === 2)
assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 2.0))
}
test("sprand") {
val rng = mock[Random]
when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
val mat = SparseMatrix.sprand(4, 4, 0.25, rng)
assert(mat.numRows === 4)
assert(mat.numCols === 4)
assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
val mat2 = SparseMatrix.sprand(2, 3, 1.0, rng)
assert(mat2.rowIndices.toSeq === Seq(0, 1, 0, 1, 0, 1))
assert(mat2.colPtrs.toSeq === Seq(0, 2, 4, 6))
}
test("sprandn") {
val rng = mock[Random]
when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
val mat = SparseMatrix.sprandn(4, 4, 0.25, rng)
assert(mat.numRows === 4)
assert(mat.numCols === 4)
assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
}
test("toString") {
val empty = Matrices.ones(0, 0)
empty.toString(0, 0)
val mat = Matrices.rand(5, 10, new Random())
mat.toString(-1, -5)
mat.toString(0, 0)
mat.toString(Int.MinValue, Int.MinValue)
mat.toString(Int.MaxValue, Int.MaxValue)
var lines = mat.toString(6, 50).lines.toArray
assert(lines.size == 5 && lines.forall(_.size <= 50))
lines = mat.toString(5, 100).lines.toArray
assert(lines.size == 5 && lines.forall(_.size <= 100))
}
test("numNonzeros and numActives") {
val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1))
assert(dm1.numNonzeros === 3)
assert(dm1.numActives === 6)
val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0))
assert(sm1.numNonzeros === 1)
assert(sm1.numActives === 3)
}
test("fromBreeze with sparse matrix") {
// colPtr.last does NOT always equal to values.length in breeze SCSMatrix and
// invocation of compact() may be necessary. Refer to SPARK-11507
val bm1: BM[Double] = new CSCMatrix[Double](
Array(1.0, 1, 1), 3, 3, Array(0, 1, 2, 3), Array(0, 1, 2))
val bm2: BM[Double] = new CSCMatrix[Double](
Array(1.0, 2, 2, 4), 3, 3, Array(0, 0, 2, 4), Array(1, 2, 1, 2))
val sum = bm1 + bm2
Matrices.fromBreeze(sum)
}
test("row/col iterator") {
val dm = new DenseMatrix(3, 2, Array(0, 1, 2, 3, 4, 0))
val sm = dm.toSparse
val rows = Seq(Vectors.dense(0, 3), Vectors.dense(1, 4), Vectors.dense(2, 0))
val cols = Seq(Vectors.dense(0, 1, 2), Vectors.dense(3, 4, 0))
for (m <- Seq(dm, sm)) {
assert(m.rowIter.toSeq === rows)
assert(m.colIter.toSeq === cols)
assert(m.transpose.rowIter.toSeq === cols)
assert(m.transpose.colIter.toSeq === rows)
}
}
}

View file

@ -0,0 +1,358 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.linalg
import scala.util.Random
import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
import org.json4s.jackson.JsonMethods.{parse => parseJson}
import org.apache.spark.ml.SparkMLFunSuite
import org.apache.spark.ml.util.TestingUtils._
class VectorsSuite extends SparkMLFunSuite {
val arr = Array(0.1, 0.0, 0.3, 0.4)
val n = 4
val indices = Array(0, 2, 3)
val values = Array(0.1, 0.3, 0.4)
test("dense vector construction with varargs") {
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
assert(vec.size === arr.length)
assert(vec.values.eq(arr))
}
test("dense vector construction from a double array") {
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
assert(vec.size === arr.length)
assert(vec.values.eq(arr))
}
test("sparse vector construction") {
val vec = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
assert(vec.size === n)
assert(vec.indices.eq(indices))
assert(vec.values.eq(values))
}
test("sparse vector construction with unordered elements") {
val vec = Vectors.sparse(n, indices.zip(values).reverse).asInstanceOf[SparseVector]
assert(vec.size === n)
assert(vec.indices === indices)
assert(vec.values === values)
}
test("sparse vector construction with mismatched indices/values array") {
intercept[IllegalArgumentException] {
Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0, 7.0, 9.0))
}
intercept[IllegalArgumentException] {
Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0))
}
}
test("sparse vector construction with too many indices vs size") {
intercept[IllegalArgumentException] {
Vectors.sparse(3, Array(1, 2, 3, 4), Array(3.0, 5.0, 7.0, 9.0))
}
}
test("dense to array") {
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
assert(vec.toArray.eq(arr))
}
test("dense argmax") {
val vec = Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector]
assert(vec.argmax === -1)
val vec2 = Vectors.dense(arr).asInstanceOf[DenseVector]
assert(vec2.argmax === 3)
val vec3 = Vectors.dense(Array(-1.0, 0.0, -2.0, 1.0)).asInstanceOf[DenseVector]
assert(vec3.argmax === 3)
}
test("sparse to array") {
val vec = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
assert(vec.toArray === arr)
}
test("sparse argmax") {
val vec = Vectors.sparse(0, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
assert(vec.argmax === -1)
val vec2 = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
assert(vec2.argmax === 3)
val vec3 = Vectors.sparse(5, Array(2, 3, 4), Array(1.0, 0.0, -.7))
assert(vec3.argmax === 2)
// check for case that sparse vector is created with
// only negative values {0.0, 0.0,-1.0, -0.7, 0.0}
val vec4 = Vectors.sparse(5, Array(2, 3), Array(-1.0, -.7))
assert(vec4.argmax === 0)
val vec5 = Vectors.sparse(11, Array(0, 3, 10), Array(-1.0, -.7, 0.0))
assert(vec5.argmax === 1)
val vec6 = Vectors.sparse(11, Array(0, 1, 2), Array(-1.0, -.7, 0.0))
assert(vec6.argmax === 2)
val vec7 = Vectors.sparse(5, Array(0, 1, 3), Array(-1.0, 0.0, -.7))
assert(vec7.argmax === 1)
val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
assert(vec8.argmax === 0)
}
test("vector equals") {
val dv1 = Vectors.dense(arr.clone())
val dv2 = Vectors.dense(arr.clone())
val sv1 = Vectors.sparse(n, indices.clone(), values.clone())
val sv2 = Vectors.sparse(n, indices.clone(), values.clone())
val vectors = Seq(dv1, dv2, sv1, sv2)
for (v <- vectors; u <- vectors) {
assert(v === u)
assert(v.## === u.##)
}
val another = Vectors.dense(0.1, 0.2, 0.3, 0.4)
for (v <- vectors) {
assert(v != another)
assert(v.## != another.##)
}
}
test("vectors equals with explicit 0") {
val dv1 = Vectors.dense(Array(0, 0.9, 0, 0.8, 0))
val sv1 = Vectors.sparse(5, Array(1, 3), Array(0.9, 0.8))
val sv2 = Vectors.sparse(5, Array(0, 1, 2, 3, 4), Array(0, 0.9, 0, 0.8, 0))
val vectors = Seq(dv1, sv1, sv2)
for (v <- vectors; u <- vectors) {
assert(v === u)
assert(v.## === u.##)
}
val another = Vectors.sparse(5, Array(0, 1, 3), Array(0, 0.9, 0.2))
for (v <- vectors) {
assert(v != another)
assert(v.## != another.##)
}
}
test("indexing dense vectors") {
val vec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
assert(vec(0) === 1.0)
assert(vec(3) === 4.0)
}
test("indexing sparse vectors") {
val vec = Vectors.sparse(7, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
assert(vec(0) === 1.0)
assert(vec(1) === 0.0)
assert(vec(2) === 2.0)
assert(vec(3) === 0.0)
assert(vec(6) === 4.0)
val vec2 = Vectors.sparse(8, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
assert(vec2(6) === 4.0)
assert(vec2(7) === 0.0)
}
test("zeros") {
assert(Vectors.zeros(3) === Vectors.dense(0.0, 0.0, 0.0))
}
test("Vector.copy") {
val sv = Vectors.sparse(4, Array(0, 2), Array(1.0, 2.0))
val svCopy = sv.copy
(sv, svCopy) match {
case (sv: SparseVector, svCopy: SparseVector) =>
assert(sv.size === svCopy.size)
assert(sv.indices === svCopy.indices)
assert(sv.values === svCopy.values)
assert(!sv.indices.eq(svCopy.indices))
assert(!sv.values.eq(svCopy.values))
case _ =>
throw new RuntimeException(s"copy returned ${svCopy.getClass} on ${sv.getClass}.")
}
val dv = Vectors.dense(1.0, 0.0, 2.0)
val dvCopy = dv.copy
(dv, dvCopy) match {
case (dv: DenseVector, dvCopy: DenseVector) =>
assert(dv.size === dvCopy.size)
assert(dv.values === dvCopy.values)
assert(!dv.values.eq(dvCopy.values))
case _ =>
throw new RuntimeException(s"copy returned ${dvCopy.getClass} on ${dv.getClass}.")
}
}
test("fromBreeze") {
val x = BDM.zeros[Double](10, 10)
val v = Vectors.fromBreeze(x(::, 0))
assert(v.size === x.rows)
}
test("sqdist") {
val random = new Random()
for (m <- 1 until 1000 by 100) {
val nnz = random.nextInt(m)
val indices1 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
val values1 = Array.fill(nnz)(random.nextDouble)
val sparseVector1 = Vectors.sparse(m, indices1, values1)
val indices2 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
val values2 = Array.fill(nnz)(random.nextDouble)
val sparseVector2 = Vectors.sparse(m, indices2, values2)
val denseVector1 = Vectors.dense(sparseVector1.toArray)
val denseVector2 = Vectors.dense(sparseVector2.toArray)
val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze)
// SparseVector vs. SparseVector
assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
// DenseVector vs. SparseVector
assert(Vectors.sqdist(denseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
// DenseVector vs. DenseVector
assert(Vectors.sqdist(denseVector1, denseVector2) ~== squaredDist relTol 1E-8)
}
}
test("foreachActive") {
val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
val dvMap = scala.collection.mutable.Map[Int, Double]()
dv.foreachActive { (index, value) =>
dvMap.put(index, value)
}
assert(dvMap.size === 4)
assert(dvMap.get(0) === Some(0.0))
assert(dvMap.get(1) === Some(1.2))
assert(dvMap.get(2) === Some(3.1))
assert(dvMap.get(3) === Some(0.0))
val svMap = scala.collection.mutable.Map[Int, Double]()
sv.foreachActive { (index, value) =>
svMap.put(index, value)
}
assert(svMap.size === 3)
assert(svMap.get(1) === Some(1.2))
assert(svMap.get(2) === Some(3.1))
assert(svMap.get(3) === Some(0.0))
}
test("vector p-norm") {
val dv = Vectors.dense(0.0, -1.2, 3.1, 0.0, -4.5, 1.9)
val sv = Vectors.sparse(6, Seq((1, -1.2), (2, 3.1), (3, 0.0), (4, -4.5), (5, 1.9)))
assert(Vectors.norm(dv, 1.0) ~== dv.toArray.foldLeft(0.0)((a, v) =>
a + math.abs(v)) relTol 1E-8)
assert(Vectors.norm(sv, 1.0) ~== sv.toArray.foldLeft(0.0)((a, v) =>
a + math.abs(v)) relTol 1E-8)
assert(Vectors.norm(dv, 2.0) ~== math.sqrt(dv.toArray.foldLeft(0.0)((a, v) =>
a + v * v)) relTol 1E-8)
assert(Vectors.norm(sv, 2.0) ~== math.sqrt(sv.toArray.foldLeft(0.0)((a, v) =>
a + v * v)) relTol 1E-8)
assert(Vectors.norm(dv, Double.PositiveInfinity) ~== dv.toArray.map(math.abs).max relTol 1E-8)
assert(Vectors.norm(sv, Double.PositiveInfinity) ~== sv.toArray.map(math.abs).max relTol 1E-8)
assert(Vectors.norm(dv, 3.7) ~== math.pow(dv.toArray.foldLeft(0.0)((a, v) =>
a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
assert(Vectors.norm(sv, 3.7) ~== math.pow(sv.toArray.foldLeft(0.0)((a, v) =>
a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
}
test("Vector numActive and numNonzeros") {
val dv = Vectors.dense(0.0, 2.0, 3.0, 0.0)
assert(dv.numActives === 4)
assert(dv.numNonzeros === 2)
val sv = Vectors.sparse(4, Array(0, 1, 2), Array(0.0, 2.0, 3.0))
assert(sv.numActives === 3)
assert(sv.numNonzeros === 2)
}
test("Vector toSparse and toDense") {
val dv0 = Vectors.dense(0.0, 2.0, 3.0, 0.0)
assert(dv0.toDense === dv0)
val dv0s = dv0.toSparse
assert(dv0s.numActives === 2)
assert(dv0s === dv0)
val sv0 = Vectors.sparse(4, Array(0, 1, 2), Array(0.0, 2.0, 3.0))
assert(sv0.toDense === sv0)
val sv0s = sv0.toSparse
assert(sv0s.numActives === 2)
assert(sv0s === sv0)
}
test("Vector.compressed") {
val dv0 = Vectors.dense(1.0, 2.0, 3.0, 0.0)
val dv0c = dv0.compressed.asInstanceOf[DenseVector]
assert(dv0c === dv0)
val dv1 = Vectors.dense(0.0, 2.0, 0.0, 0.0)
val dv1c = dv1.compressed.asInstanceOf[SparseVector]
assert(dv1 === dv1c)
assert(dv1c.numActives === 1)
val sv0 = Vectors.sparse(4, Array(1, 2), Array(2.0, 0.0))
val sv0c = sv0.compressed.asInstanceOf[SparseVector]
assert(sv0 === sv0c)
assert(sv0c.numActives === 1)
val sv1 = Vectors.sparse(4, Array(0, 1, 2), Array(1.0, 2.0, 3.0))
val sv1c = sv1.compressed.asInstanceOf[DenseVector]
assert(sv1 === sv1c)
}
test("SparseVector.slice") {
val v = new SparseVector(5, Array(1, 2, 4), Array(1.1, 2.2, 4.4))
assert(v.slice(Array(0, 2)) === new SparseVector(2, Array(1), Array(2.2)))
assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2)))
assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4)))
}
test("toJson/fromJson") {
val sv0 = Vectors.sparse(0, Array.empty, Array.empty)
val sv1 = Vectors.sparse(1, Array.empty, Array.empty)
val sv2 = Vectors.sparse(2, Array(1), Array(2.0))
val dv0 = Vectors.dense(Array.empty[Double])
val dv1 = Vectors.dense(1.0)
val dv2 = Vectors.dense(0.0, 2.0)
for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) {
val json = v.toJson
parseJson(json) // `json` should be a valid JSON string
val u = Vectors.fromJson(json)
assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.")
assert(u === v, "toJson/fromJson should preserve vector values.")
}
}
}

View file

@ -0,0 +1,236 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.util
import org.scalatest.exceptions.TestFailedException
import org.apache.spark.ml.linalg.{Matrix, Vector}
object TestingUtils {
val ABS_TOL_MSG = " using absolute tolerance"
val REL_TOL_MSG = " using relative tolerance"
/**
* Private helper function for comparing two values using relative tolerance.
* Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue,
* the relative tolerance is meaningless, so the exception will be raised to warn users.
*/
private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
val absX = math.abs(x)
val absY = math.abs(y)
val diff = math.abs(x - y)
if (x == y) {
true
} else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) {
throw new TestFailedException(
s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0)
} else {
diff < eps * math.min(absX, absY)
}
}
/**
* Private helper function for comparing two values using absolute tolerance.
*/
private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
math.abs(x - y) < eps
}
case class CompareDoubleRightSide(
fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String)
/**
* Implicit class for comparing two double values using relative tolerance or absolute tolerance.
*/
implicit class DoubleWithAlmostEquals(val x: Double) {
/**
* When the difference of two values are within eps, returns true; otherwise, returns false.
*/
def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps)
/**
* When the difference of two values are within eps, returns false; otherwise, returns true.
*/
def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps)
/**
* Throws exception when the difference of two values are NOT within eps;
* otherwise, returns true.
*/
def ~==(r: CompareDoubleRightSide): Boolean = {
if (!r.fun(x, r.y, r.eps)) {
throw new TestFailedException(
s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
}
true
}
/**
* Throws exception when the difference of two values are within eps; otherwise, returns true.
*/
def !~==(r: CompareDoubleRightSide): Boolean = {
if (r.fun(x, r.y, r.eps)) {
throw new TestFailedException(
s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
}
true
}
/**
* Comparison using absolute tolerance.
*/
def absTol(eps: Double): CompareDoubleRightSide =
CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG)
/**
* Comparison using relative tolerance.
*/
def relTol(eps: Double): CompareDoubleRightSide =
CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG)
override def toString: String = x.toString
}
case class CompareVectorRightSide(
fun: (Vector, Vector, Double) => Boolean, y: Vector, eps: Double, method: String)
/**
* Implicit class for comparing two vectors using relative tolerance or absolute tolerance.
*/
implicit class VectorWithAlmostEquals(val x: Vector) {
/**
* When the difference of two vectors are within eps, returns true; otherwise, returns false.
*/
def ~=(r: CompareVectorRightSide): Boolean = r.fun(x, r.y, r.eps)
/**
* When the difference of two vectors are within eps, returns false; otherwise, returns true.
*/
def !~=(r: CompareVectorRightSide): Boolean = !r.fun(x, r.y, r.eps)
/**
* Throws exception when the difference of two vectors are NOT within eps;
* otherwise, returns true.
*/
def ~==(r: CompareVectorRightSide): Boolean = {
if (!r.fun(x, r.y, r.eps)) {
throw new TestFailedException(
s"Expected $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
}
true
}
/**
* Throws exception when the difference of two vectors are within eps; otherwise, returns true.
*/
def !~==(r: CompareVectorRightSide): Boolean = {
if (r.fun(x, r.y, r.eps)) {
throw new TestFailedException(
s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
}
true
}
/**
* Comparison using absolute tolerance.
*/
def absTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
(x: Vector, y: Vector, eps: Double) => {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
}, x, eps, ABS_TOL_MSG)
/**
* Comparison using relative tolerance. Note that comparing against sparse vector
* with elements having value of zero will raise exception because it involves with
* comparing against zero.
*/
def relTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
(x: Vector, y: Vector, eps: Double) => {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
}, x, eps, REL_TOL_MSG)
override def toString: String = x.toString
}
case class CompareMatrixRightSide(
fun: (Matrix, Matrix, Double) => Boolean, y: Matrix, eps: Double, method: String)
/**
* Implicit class for comparing two matrices using relative tolerance or absolute tolerance.
*/
implicit class MatrixWithAlmostEquals(val x: Matrix) {
/**
* When the difference of two matrices are within eps, returns true; otherwise, returns false.
*/
def ~=(r: CompareMatrixRightSide): Boolean = r.fun(x, r.y, r.eps)
/**
* When the difference of two matrices are within eps, returns false; otherwise, returns true.
*/
def !~=(r: CompareMatrixRightSide): Boolean = !r.fun(x, r.y, r.eps)
/**
* Throws exception when the difference of two matrices are NOT within eps;
* otherwise, returns true.
*/
def ~==(r: CompareMatrixRightSide): Boolean = {
if (!r.fun(x, r.y, r.eps)) {
throw new TestFailedException(
s"Expected \n$x\n and \n${r.y}\n to be within ${r.eps}${r.method} for all elements.", 0)
}
true
}
/**
* Throws exception when the difference of two matrices are within eps; otherwise, returns true.
*/
def !~==(r: CompareMatrixRightSide): Boolean = {
if (r.fun(x, r.y, r.eps)) {
throw new TestFailedException(
s"Did not expect \n$x\n and \n${r.y}\n to be within " +
"${r.eps}${r.method} for all elements.", 0)
}
true
}
/**
* Comparison using absolute tolerance.
*/
def absTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
(x: Matrix, y: Matrix, eps: Double) => {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
}, x, eps, ABS_TOL_MSG)
/**
* Comparison using relative tolerance. Note that comparing against sparse vector
* with elements having value of zero will raise exception because it involves with
* comparing against zero.
*/
def relTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
(x: Matrix, y: Matrix, eps: Double) => {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
}, x, eps, REL_TOL_MSG)
override def toString: String = x.toString
}
}

View file

@ -0,0 +1,187 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ml.util
import org.scalatest.exceptions.TestFailedException
import org.apache.spark.ml.SparkMLFunSuite
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.util.TestingUtils._
class TestingUtilsSuite extends SparkMLFunSuite {
test("Comparing doubles using relative error.") {
assert(23.1 ~== 23.52 relTol 0.02)
assert(23.1 ~== 22.74 relTol 0.02)
assert(23.1 ~= 23.52 relTol 0.02)
assert(23.1 ~= 22.74 relTol 0.02)
assert(!(23.1 !~= 23.52 relTol 0.02))
assert(!(23.1 !~= 22.74 relTol 0.02))
// Should throw exception with message when test fails.
intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02)
intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02)
intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02)
intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02)
assert(23.1 !~== 23.63 relTol 0.02)
assert(23.1 !~== 22.34 relTol 0.02)
assert(23.1 !~= 23.63 relTol 0.02)
assert(23.1 !~= 22.34 relTol 0.02)
assert(!(23.1 ~= 23.63 relTol 0.02))
assert(!(23.1 ~= 22.34 relTol 0.02))
// Comparing against zero should fail the test and throw exception with message
// saying that the relative error is meaningless in this situation.
intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032)
intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032)
intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032)
intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032)
intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032)
intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032)
intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032)
intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032)
// Comparisons of numbers very close to zero.
assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01)
assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01)
assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012)
assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012)
}
test("Comparing doubles using absolute error.") {
assert(17.8 ~== 17.99 absTol 0.2)
assert(17.8 ~== 17.61 absTol 0.2)
assert(17.8 ~= 17.99 absTol 0.2)
assert(17.8 ~= 17.61 absTol 0.2)
assert(!(17.8 !~= 17.99 absTol 0.2))
assert(!(17.8 !~= 17.61 absTol 0.2))
// Should throw exception with message when test fails.
intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2)
intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2)
intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2)
intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2)
assert(17.8 !~== 18.01 absTol 0.2)
assert(17.8 !~== 17.59 absTol 0.2)
assert(17.8 !~= 18.01 absTol 0.2)
assert(17.8 !~= 17.59 absTol 0.2)
assert(!(17.8 ~= 18.01 absTol 0.2))
assert(!(17.8 ~= 17.59 absTol 0.2))
// Comparisons of numbers very close to zero, and both side of zeros
assert(
Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(
Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(
-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(
Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
}
test("Comparing vectors using relative error.") {
// Comparisons of two dense vectors
assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
assert(!(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01))
assert(!(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01))
// Should throw exception with message when test fails.
intercept[TestFailedException](
Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
intercept[TestFailedException](
Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
// Comparing against zero should fail the test and throw exception with message
// saying that the relative error is meaningless in this situation.
intercept[TestFailedException](
Vectors.dense(Array(3.1, 0.01)) ~== Vectors.dense(Array(3.13, 0.0)) relTol 0.01)
intercept[TestFailedException](
Vectors.dense(Array(3.1, 0.01)) ~== Vectors.sparse(2, Array(0), Array(3.13)) relTol 0.01)
// Comparisons of two sparse vectors
assert(Vectors.dense(Array(3.1, 3.5)) ~==
Vectors.sparse(2, Array(0, 1), Array(3.130, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) !~==
Vectors.sparse(2, Array(0, 1), Array(3.135, 3.534)) relTol 0.01)
}
test("Comparing vectors using absolute error.") {
// Comparisons of two dense vectors
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6))
assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6))
// Should throw exception with message when test fails.
intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
// Comparisons of two sparse vectors
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) absTol 1E-6)
assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) ~==
Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) absTol 1E-6)
assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) !~==
Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
// Comparisons of a dense vector and a sparse vector
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) absTol 1E-6)
assert(Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) ~==
Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
Vectors.dense(Array(3.1, 1E-3, 2.4)) absTol 1E-6)
}
}

View file

@ -182,8 +182,8 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
for (folds <- 2 to 10) {
for (seed <- 1 to 5) {
val foldedRdds = kFold(data, folds, seed)
assert(foldedRdds.size === folds)
foldedRdds.map { case (training, validation) =>
assert(foldedRdds.length === folds)
foldedRdds.foreach { case (training, validation) =>
val result = validation.union(training).collect().sorted
val validationSize = validation.collect().size.toFloat
assert(validationSize > 0, "empty validation data")