[SPARK-14549][ML] Copy the Vector and Matrix classes from mllib to ml in mllib-local
## What changes were proposed in this pull request? This task will copy the Vector and Matrix classes from mllib to ml package in mllib-local jar. The UDTs and `since` annotation in ml vector and matrix will be removed from now. UDTs will be achieved by #SPARK-14487, and `since` will be replaced by /* since 1.2.0 */ The BLAS implementation will be copied, and some of the test utilities will be copies as well. Summary of changes: 1. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/BLAS.scala - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/BLAS.scala - logDebug("gemm: alpha is equal to 0 and beta is equal to 1. Returning C.") is removed in ml version. 2. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/Matrices.scala - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Matrices.scala - `Since` was removed, and we'll use standard `/* Since /*` Java doc. Will be in another PR. - `UDT` related code was removed, and will use `SPARK-13944` https://github.com/apache/spark/pull/12259 to replace the annotation. 3. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/Vectors.scala - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Vectors.scala - `Since` was removed. - `UDT` related code was removed. - In `def parseNumeric`, it was throwing `throw new SparkException(s"Cannot parse $other.")`, and now it's throwing `throw new IllegalArgumentException(s"Cannot parse $other.")` 4. In mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Vectors.scala - For consistency with ML version of vector, `def parseNumeric` is now throwing `throw new IllegalArgumentException(s"Cannot parse $other.")` 5. mllib/src/main/scala/org/apache/spark/**mllib**/util/NumericParser.scala is moved to mllib-local/src/main/scala/org/apache/spark/**ml**/util/NumericParser.scala - All the `throw new SparkException` were replaced by `throw new IllegalArgumentException` ## How was this patch tested? unit tests Author: DB Tsai <dbt@netflix.com> Closes #12317 from dbtsai/dbtsai-ml-vector.
This commit is contained in:
parent
a9324a06ef
commit
96534aa47c
|
@ -48,6 +48,10 @@
|
|||
<artifactId>scalacheck_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json4s</groupId>
|
||||
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
|
|
723
mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
Normal file
723
mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
Normal file
|
@ -0,0 +1,723 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
|
||||
import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
|
||||
|
||||
/**
|
||||
* BLAS routines for MLlib's vectors and matrices.
|
||||
*/
|
||||
private[spark] object BLAS extends Serializable {
|
||||
|
||||
@transient private var _f2jBLAS: NetlibBLAS = _
|
||||
@transient private var _nativeBLAS: NetlibBLAS = _
|
||||
|
||||
// For level-1 routines, we use Java implementation.
|
||||
private def f2jBLAS: NetlibBLAS = {
|
||||
if (_f2jBLAS == null) {
|
||||
_f2jBLAS = new F2jBLAS
|
||||
}
|
||||
_f2jBLAS
|
||||
}
|
||||
|
||||
/**
|
||||
* y += a * x
|
||||
*/
|
||||
def axpy(a: Double, x: Vector, y: Vector): Unit = {
|
||||
require(x.size == y.size)
|
||||
y match {
|
||||
case dy: DenseVector =>
|
||||
x match {
|
||||
case sx: SparseVector =>
|
||||
axpy(a, sx, dy)
|
||||
case dx: DenseVector =>
|
||||
axpy(a, dx, dy)
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException(
|
||||
s"axpy doesn't support x type ${x.getClass}.")
|
||||
}
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"axpy only supports adding to a dense vector but got type ${y.getClass}.")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* y += a * x
|
||||
*/
|
||||
private def axpy(a: Double, x: DenseVector, y: DenseVector): Unit = {
|
||||
val n = x.size
|
||||
f2jBLAS.daxpy(n, a, x.values, 1, y.values, 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* y += a * x
|
||||
*/
|
||||
private def axpy(a: Double, x: SparseVector, y: DenseVector): Unit = {
|
||||
val xValues = x.values
|
||||
val xIndices = x.indices
|
||||
val yValues = y.values
|
||||
val nnz = xIndices.length
|
||||
|
||||
if (a == 1.0) {
|
||||
var k = 0
|
||||
while (k < nnz) {
|
||||
yValues(xIndices(k)) += xValues(k)
|
||||
k += 1
|
||||
}
|
||||
} else {
|
||||
var k = 0
|
||||
while (k < nnz) {
|
||||
yValues(xIndices(k)) += a * xValues(k)
|
||||
k += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Y += a * x */
|
||||
private[spark] def axpy(a: Double, X: DenseMatrix, Y: DenseMatrix): Unit = {
|
||||
require(X.numRows == Y.numRows && X.numCols == Y.numCols, "Dimension mismatch: " +
|
||||
s"size(X) = ${(X.numRows, X.numCols)} but size(Y) = ${(Y.numRows, Y.numCols)}.")
|
||||
f2jBLAS.daxpy(X.numRows * X.numCols, a, X.values, 1, Y.values, 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* dot(x, y)
|
||||
*/
|
||||
def dot(x: Vector, y: Vector): Double = {
|
||||
require(x.size == y.size,
|
||||
"BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes:" +
|
||||
" x.size = " + x.size + ", y.size = " + y.size)
|
||||
(x, y) match {
|
||||
case (dx: DenseVector, dy: DenseVector) =>
|
||||
dot(dx, dy)
|
||||
case (sx: SparseVector, dy: DenseVector) =>
|
||||
dot(sx, dy)
|
||||
case (dx: DenseVector, sy: SparseVector) =>
|
||||
dot(sy, dx)
|
||||
case (sx: SparseVector, sy: SparseVector) =>
|
||||
dot(sx, sy)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"dot doesn't support (${x.getClass}, ${y.getClass}).")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* dot(x, y)
|
||||
*/
|
||||
private def dot(x: DenseVector, y: DenseVector): Double = {
|
||||
val n = x.size
|
||||
f2jBLAS.ddot(n, x.values, 1, y.values, 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* dot(x, y)
|
||||
*/
|
||||
private def dot(x: SparseVector, y: DenseVector): Double = {
|
||||
val xValues = x.values
|
||||
val xIndices = x.indices
|
||||
val yValues = y.values
|
||||
val nnz = xIndices.length
|
||||
|
||||
var sum = 0.0
|
||||
var k = 0
|
||||
while (k < nnz) {
|
||||
sum += xValues(k) * yValues(xIndices(k))
|
||||
k += 1
|
||||
}
|
||||
sum
|
||||
}
|
||||
|
||||
/**
|
||||
* dot(x, y)
|
||||
*/
|
||||
private def dot(x: SparseVector, y: SparseVector): Double = {
|
||||
val xValues = x.values
|
||||
val xIndices = x.indices
|
||||
val yValues = y.values
|
||||
val yIndices = y.indices
|
||||
val nnzx = xIndices.length
|
||||
val nnzy = yIndices.length
|
||||
|
||||
var kx = 0
|
||||
var ky = 0
|
||||
var sum = 0.0
|
||||
// y catching x
|
||||
while (kx < nnzx && ky < nnzy) {
|
||||
val ix = xIndices(kx)
|
||||
while (ky < nnzy && yIndices(ky) < ix) {
|
||||
ky += 1
|
||||
}
|
||||
if (ky < nnzy && yIndices(ky) == ix) {
|
||||
sum += xValues(kx) * yValues(ky)
|
||||
ky += 1
|
||||
}
|
||||
kx += 1
|
||||
}
|
||||
sum
|
||||
}
|
||||
|
||||
/**
|
||||
* y = x
|
||||
*/
|
||||
def copy(x: Vector, y: Vector): Unit = {
|
||||
val n = y.size
|
||||
require(x.size == n)
|
||||
y match {
|
||||
case dy: DenseVector =>
|
||||
x match {
|
||||
case sx: SparseVector =>
|
||||
val sxIndices = sx.indices
|
||||
val sxValues = sx.values
|
||||
val dyValues = dy.values
|
||||
val nnz = sxIndices.length
|
||||
|
||||
var i = 0
|
||||
var k = 0
|
||||
while (k < nnz) {
|
||||
val j = sxIndices(k)
|
||||
while (i < j) {
|
||||
dyValues(i) = 0.0
|
||||
i += 1
|
||||
}
|
||||
dyValues(i) = sxValues(k)
|
||||
i += 1
|
||||
k += 1
|
||||
}
|
||||
while (i < n) {
|
||||
dyValues(i) = 0.0
|
||||
i += 1
|
||||
}
|
||||
case dx: DenseVector =>
|
||||
Array.copy(dx.values, 0, dy.values, 0, n)
|
||||
}
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"y must be dense in copy but got ${y.getClass}")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* x = a * x
|
||||
*/
|
||||
def scal(a: Double, x: Vector): Unit = {
|
||||
x match {
|
||||
case sx: SparseVector =>
|
||||
f2jBLAS.dscal(sx.values.length, a, sx.values, 1)
|
||||
case dx: DenseVector =>
|
||||
f2jBLAS.dscal(dx.values.length, a, dx.values, 1)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"scal doesn't support vector type ${x.getClass}.")
|
||||
}
|
||||
}
|
||||
|
||||
// For level-3 routines, we use the native BLAS.
|
||||
private def nativeBLAS: NetlibBLAS = {
|
||||
if (_nativeBLAS == null) {
|
||||
_nativeBLAS = NativeBLAS
|
||||
}
|
||||
_nativeBLAS
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
|
||||
*
|
||||
* @param U the upper triangular part of the matrix in a [[DenseVector]](column major)
|
||||
*/
|
||||
def spr(alpha: Double, v: Vector, U: DenseVector): Unit = {
|
||||
spr(alpha, v, U.values)
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
|
||||
*
|
||||
* @param U the upper triangular part of the matrix packed in an array (column major)
|
||||
*/
|
||||
def spr(alpha: Double, v: Vector, U: Array[Double]): Unit = {
|
||||
val n = v.size
|
||||
v match {
|
||||
case DenseVector(values) =>
|
||||
NativeBLAS.dspr("U", n, alpha, values, 1, U)
|
||||
case SparseVector(size, indices, values) =>
|
||||
val nnz = indices.length
|
||||
var colStartIdx = 0
|
||||
var prevCol = 0
|
||||
var col = 0
|
||||
var j = 0
|
||||
var i = 0
|
||||
var av = 0.0
|
||||
while (j < nnz) {
|
||||
col = indices(j)
|
||||
// Skip empty columns.
|
||||
colStartIdx += (col - prevCol) * (col + prevCol + 1) / 2
|
||||
col = indices(j)
|
||||
av = alpha * values(j)
|
||||
i = 0
|
||||
while (i <= j) {
|
||||
U(colStartIdx + indices(i)) += av * values(i)
|
||||
i += 1
|
||||
}
|
||||
j += 1
|
||||
prevCol = col
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A := alpha * x * x^T^ + A
|
||||
* @param alpha a real scalar that will be multiplied to x * x^T^.
|
||||
* @param x the vector x that contains the n elements.
|
||||
* @param A the symmetric matrix A. Size of n x n.
|
||||
*/
|
||||
def syr(alpha: Double, x: Vector, A: DenseMatrix) {
|
||||
val mA = A.numRows
|
||||
val nA = A.numCols
|
||||
require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
|
||||
require(mA == x.size, s"The size of x doesn't match the rank of A. A: $mA x $nA, x: ${x.size}")
|
||||
|
||||
x match {
|
||||
case dv: DenseVector => syr(alpha, dv, A)
|
||||
case sv: SparseVector => syr(alpha, sv, A)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"syr doesn't support vector type ${x.getClass}.")
|
||||
}
|
||||
}
|
||||
|
||||
private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
|
||||
val nA = A.numRows
|
||||
val mA = A.numCols
|
||||
|
||||
nativeBLAS.dsyr("U", x.size, alpha, x.values, 1, A.values, nA)
|
||||
|
||||
// Fill lower triangular part of A
|
||||
var i = 0
|
||||
while (i < mA) {
|
||||
var j = i + 1
|
||||
while (j < nA) {
|
||||
A(j, i) = A(i, j)
|
||||
j += 1
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
|
||||
val mA = A.numCols
|
||||
val xIndices = x.indices
|
||||
val xValues = x.values
|
||||
val nnz = xValues.length
|
||||
val Avalues = A.values
|
||||
|
||||
var i = 0
|
||||
while (i < nnz) {
|
||||
val multiplier = alpha * xValues(i)
|
||||
val offset = xIndices(i) * mA
|
||||
var j = 0
|
||||
while (j < nnz) {
|
||||
Avalues(xIndices(j) + offset) += multiplier * xValues(j)
|
||||
j += 1
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* C := alpha * A * B + beta * C
|
||||
* @param alpha a scalar to scale the multiplication A * B.
|
||||
* @param A the matrix A that will be left multiplied to B. Size of m x k.
|
||||
* @param B the matrix B that will be left multiplied by A. Size of k x n.
|
||||
* @param beta a scalar that can be used to scale matrix C.
|
||||
* @param C the resulting matrix C. Size of m x n. C.isTransposed must be false.
|
||||
*/
|
||||
def gemm(
|
||||
alpha: Double,
|
||||
A: Matrix,
|
||||
B: DenseMatrix,
|
||||
beta: Double,
|
||||
C: DenseMatrix): Unit = {
|
||||
require(!C.isTransposed,
|
||||
"The matrix C cannot be the product of a transpose() call. C.isTransposed must be false.")
|
||||
if (alpha == 0.0 && beta == 1.0) {
|
||||
// gemm: alpha is equal to 0 and beta is equal to 1. Returning C.
|
||||
return
|
||||
} else if (alpha == 0.0) {
|
||||
f2jBLAS.dscal(C.values.length, beta, C.values, 1)
|
||||
} else {
|
||||
A match {
|
||||
case sparse: SparseMatrix => gemm(alpha, sparse, B, beta, C)
|
||||
case dense: DenseMatrix => gemm(alpha, dense, B, beta, C)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* C := alpha * A * B + beta * C
|
||||
* For `DenseMatrix` A.
|
||||
*/
|
||||
private def gemm(
|
||||
alpha: Double,
|
||||
A: DenseMatrix,
|
||||
B: DenseMatrix,
|
||||
beta: Double,
|
||||
C: DenseMatrix): Unit = {
|
||||
val tAstr = if (A.isTransposed) "T" else "N"
|
||||
val tBstr = if (B.isTransposed) "T" else "N"
|
||||
val lda = if (!A.isTransposed) A.numRows else A.numCols
|
||||
val ldb = if (!B.isTransposed) B.numRows else B.numCols
|
||||
|
||||
require(A.numCols == B.numRows,
|
||||
s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}")
|
||||
require(A.numRows == C.numRows,
|
||||
s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}")
|
||||
require(B.numCols == C.numCols,
|
||||
s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}")
|
||||
nativeBLAS.dgemm(tAstr, tBstr, A.numRows, B.numCols, A.numCols, alpha, A.values, lda,
|
||||
B.values, ldb, beta, C.values, C.numRows)
|
||||
}
|
||||
|
||||
/**
|
||||
* C := alpha * A * B + beta * C
|
||||
* For `SparseMatrix` A.
|
||||
*/
|
||||
private def gemm(
|
||||
alpha: Double,
|
||||
A: SparseMatrix,
|
||||
B: DenseMatrix,
|
||||
beta: Double,
|
||||
C: DenseMatrix): Unit = {
|
||||
val mA: Int = A.numRows
|
||||
val nB: Int = B.numCols
|
||||
val kA: Int = A.numCols
|
||||
val kB: Int = B.numRows
|
||||
|
||||
require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
|
||||
require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
|
||||
require(nB == C.numCols,
|
||||
s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
|
||||
|
||||
val Avals = A.values
|
||||
val Bvals = B.values
|
||||
val Cvals = C.values
|
||||
val ArowIndices = A.rowIndices
|
||||
val AcolPtrs = A.colPtrs
|
||||
|
||||
// Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
|
||||
if (A.isTransposed) {
|
||||
var colCounterForB = 0
|
||||
if (!B.isTransposed) { // Expensive to put the check inside the loop
|
||||
while (colCounterForB < nB) {
|
||||
var rowCounterForA = 0
|
||||
val Cstart = colCounterForB * mA
|
||||
val Bstart = colCounterForB * kA
|
||||
while (rowCounterForA < mA) {
|
||||
var i = AcolPtrs(rowCounterForA)
|
||||
val indEnd = AcolPtrs(rowCounterForA + 1)
|
||||
var sum = 0.0
|
||||
while (i < indEnd) {
|
||||
sum += Avals(i) * Bvals(Bstart + ArowIndices(i))
|
||||
i += 1
|
||||
}
|
||||
val Cindex = Cstart + rowCounterForA
|
||||
Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
|
||||
rowCounterForA += 1
|
||||
}
|
||||
colCounterForB += 1
|
||||
}
|
||||
} else {
|
||||
while (colCounterForB < nB) {
|
||||
var rowCounterForA = 0
|
||||
val Cstart = colCounterForB * mA
|
||||
while (rowCounterForA < mA) {
|
||||
var i = AcolPtrs(rowCounterForA)
|
||||
val indEnd = AcolPtrs(rowCounterForA + 1)
|
||||
var sum = 0.0
|
||||
while (i < indEnd) {
|
||||
sum += Avals(i) * B(ArowIndices(i), colCounterForB)
|
||||
i += 1
|
||||
}
|
||||
val Cindex = Cstart + rowCounterForA
|
||||
Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
|
||||
rowCounterForA += 1
|
||||
}
|
||||
colCounterForB += 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Scale matrix first if `beta` is not equal to 1.0
|
||||
if (beta != 1.0) {
|
||||
f2jBLAS.dscal(C.values.length, beta, C.values, 1)
|
||||
}
|
||||
// Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
|
||||
// B, and added to C.
|
||||
var colCounterForB = 0 // the column to be updated in C
|
||||
if (!B.isTransposed) { // Expensive to put the check inside the loop
|
||||
while (colCounterForB < nB) {
|
||||
var colCounterForA = 0 // The column of A to multiply with the row of B
|
||||
val Bstart = colCounterForB * kB
|
||||
val Cstart = colCounterForB * mA
|
||||
while (colCounterForA < kA) {
|
||||
var i = AcolPtrs(colCounterForA)
|
||||
val indEnd = AcolPtrs(colCounterForA + 1)
|
||||
val Bval = Bvals(Bstart + colCounterForA) * alpha
|
||||
while (i < indEnd) {
|
||||
Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
|
||||
i += 1
|
||||
}
|
||||
colCounterForA += 1
|
||||
}
|
||||
colCounterForB += 1
|
||||
}
|
||||
} else {
|
||||
while (colCounterForB < nB) {
|
||||
var colCounterForA = 0 // The column of A to multiply with the row of B
|
||||
val Cstart = colCounterForB * mA
|
||||
while (colCounterForA < kA) {
|
||||
var i = AcolPtrs(colCounterForA)
|
||||
val indEnd = AcolPtrs(colCounterForA + 1)
|
||||
val Bval = B(colCounterForA, colCounterForB) * alpha
|
||||
while (i < indEnd) {
|
||||
Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
|
||||
i += 1
|
||||
}
|
||||
colCounterForA += 1
|
||||
}
|
||||
colCounterForB += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* y := alpha * A * x + beta * y
|
||||
* @param alpha a scalar to scale the multiplication A * x.
|
||||
* @param A the matrix A that will be left multiplied to x. Size of m x n.
|
||||
* @param x the vector x that will be left multiplied by A. Size of n x 1.
|
||||
* @param beta a scalar that can be used to scale vector y.
|
||||
* @param y the resulting vector y. Size of m x 1.
|
||||
*/
|
||||
def gemv(
|
||||
alpha: Double,
|
||||
A: Matrix,
|
||||
x: Vector,
|
||||
beta: Double,
|
||||
y: DenseVector): Unit = {
|
||||
require(A.numCols == x.size,
|
||||
s"The columns of A don't match the number of elements of x. A: ${A.numCols}, x: ${x.size}")
|
||||
require(A.numRows == y.size,
|
||||
s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.size}")
|
||||
if (alpha == 0.0 && beta == 1.0) {
|
||||
// gemv: alpha is equal to 0 and beta is equal to 1. Returning y.
|
||||
return
|
||||
} else if (alpha == 0.0) {
|
||||
scal(beta, y)
|
||||
} else {
|
||||
(A, x) match {
|
||||
case (smA: SparseMatrix, dvx: DenseVector) =>
|
||||
gemv(alpha, smA, dvx, beta, y)
|
||||
case (smA: SparseMatrix, svx: SparseVector) =>
|
||||
gemv(alpha, smA, svx, beta, y)
|
||||
case (dmA: DenseMatrix, dvx: DenseVector) =>
|
||||
gemv(alpha, dmA, dvx, beta, y)
|
||||
case (dmA: DenseMatrix, svx: SparseVector) =>
|
||||
gemv(alpha, dmA, svx, beta, y)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"gemv doesn't support running on matrix type " +
|
||||
s"${A.getClass} and vector type ${x.getClass}.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* y := alpha * A * x + beta * y
|
||||
* For `DenseMatrix` A and `DenseVector` x.
|
||||
*/
|
||||
private def gemv(
|
||||
alpha: Double,
|
||||
A: DenseMatrix,
|
||||
x: DenseVector,
|
||||
beta: Double,
|
||||
y: DenseVector): Unit = {
|
||||
val tStrA = if (A.isTransposed) "T" else "N"
|
||||
val mA = if (!A.isTransposed) A.numRows else A.numCols
|
||||
val nA = if (!A.isTransposed) A.numCols else A.numRows
|
||||
nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta,
|
||||
y.values, 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* y := alpha * A * x + beta * y
|
||||
* For `DenseMatrix` A and `SparseVector` x.
|
||||
*/
|
||||
private def gemv(
|
||||
alpha: Double,
|
||||
A: DenseMatrix,
|
||||
x: SparseVector,
|
||||
beta: Double,
|
||||
y: DenseVector): Unit = {
|
||||
val mA: Int = A.numRows
|
||||
val nA: Int = A.numCols
|
||||
|
||||
val Avals = A.values
|
||||
|
||||
val xIndices = x.indices
|
||||
val xNnz = xIndices.length
|
||||
val xValues = x.values
|
||||
val yValues = y.values
|
||||
|
||||
if (A.isTransposed) {
|
||||
var rowCounterForA = 0
|
||||
while (rowCounterForA < mA) {
|
||||
var sum = 0.0
|
||||
var k = 0
|
||||
while (k < xNnz) {
|
||||
sum += xValues(k) * Avals(xIndices(k) + rowCounterForA * nA)
|
||||
k += 1
|
||||
}
|
||||
yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
|
||||
rowCounterForA += 1
|
||||
}
|
||||
} else {
|
||||
var rowCounterForA = 0
|
||||
while (rowCounterForA < mA) {
|
||||
var sum = 0.0
|
||||
var k = 0
|
||||
while (k < xNnz) {
|
||||
sum += xValues(k) * Avals(xIndices(k) * mA + rowCounterForA)
|
||||
k += 1
|
||||
}
|
||||
yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
|
||||
rowCounterForA += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* y := alpha * A * x + beta * y
|
||||
* For `SparseMatrix` A and `SparseVector` x.
|
||||
*/
|
||||
private def gemv(
|
||||
alpha: Double,
|
||||
A: SparseMatrix,
|
||||
x: SparseVector,
|
||||
beta: Double,
|
||||
y: DenseVector): Unit = {
|
||||
val xValues = x.values
|
||||
val xIndices = x.indices
|
||||
val xNnz = xIndices.length
|
||||
|
||||
val yValues = y.values
|
||||
|
||||
val mA: Int = A.numRows
|
||||
val nA: Int = A.numCols
|
||||
|
||||
val Avals = A.values
|
||||
val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
|
||||
val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
|
||||
|
||||
if (A.isTransposed) {
|
||||
var rowCounter = 0
|
||||
while (rowCounter < mA) {
|
||||
var i = Arows(rowCounter)
|
||||
val indEnd = Arows(rowCounter + 1)
|
||||
var sum = 0.0
|
||||
var k = 0
|
||||
while (k < xNnz && i < indEnd) {
|
||||
if (xIndices(k) == Acols(i)) {
|
||||
sum += Avals(i) * xValues(k)
|
||||
i += 1
|
||||
}
|
||||
k += 1
|
||||
}
|
||||
yValues(rowCounter) = sum * alpha + beta * yValues(rowCounter)
|
||||
rowCounter += 1
|
||||
}
|
||||
} else {
|
||||
if (beta != 1.0) scal(beta, y)
|
||||
|
||||
var colCounterForA = 0
|
||||
var k = 0
|
||||
while (colCounterForA < nA && k < xNnz) {
|
||||
if (xIndices(k) == colCounterForA) {
|
||||
var i = Acols(colCounterForA)
|
||||
val indEnd = Acols(colCounterForA + 1)
|
||||
|
||||
val xTemp = xValues(k) * alpha
|
||||
while (i < indEnd) {
|
||||
val rowIndex = Arows(i)
|
||||
yValues(Arows(i)) += Avals(i) * xTemp
|
||||
i += 1
|
||||
}
|
||||
k += 1
|
||||
}
|
||||
colCounterForA += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* y := alpha * A * x + beta * y
|
||||
* For `SparseMatrix` A and `DenseVector` x.
|
||||
*/
|
||||
private def gemv(
|
||||
alpha: Double,
|
||||
A: SparseMatrix,
|
||||
x: DenseVector,
|
||||
beta: Double,
|
||||
y: DenseVector): Unit = {
|
||||
val xValues = x.values
|
||||
val yValues = y.values
|
||||
val mA: Int = A.numRows
|
||||
val nA: Int = A.numCols
|
||||
|
||||
val Avals = A.values
|
||||
val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
|
||||
val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
|
||||
// Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
|
||||
if (A.isTransposed) {
|
||||
var rowCounter = 0
|
||||
while (rowCounter < mA) {
|
||||
var i = Arows(rowCounter)
|
||||
val indEnd = Arows(rowCounter + 1)
|
||||
var sum = 0.0
|
||||
while (i < indEnd) {
|
||||
sum += Avals(i) * xValues(Acols(i))
|
||||
i += 1
|
||||
}
|
||||
yValues(rowCounter) = beta * yValues(rowCounter) + sum * alpha
|
||||
rowCounter += 1
|
||||
}
|
||||
} else {
|
||||
if (beta != 1.0) scal(beta, y)
|
||||
// Perform matrix-vector multiplication and add to y
|
||||
var colCounterForA = 0
|
||||
while (colCounterForA < nA) {
|
||||
var i = Acols(colCounterForA)
|
||||
val indEnd = Acols(colCounterForA + 1)
|
||||
val xVal = xValues(colCounterForA) * alpha
|
||||
while (i < indEnd) {
|
||||
val rowIndex = Arows(i)
|
||||
yValues(rowIndex) += Avals(i) * xVal
|
||||
i += 1
|
||||
}
|
||||
colCounterForA += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1026
mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
Normal file
1026
mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,736 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable}
|
||||
import java.util
|
||||
|
||||
import scala.annotation.varargs
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonDSL._
|
||||
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
|
||||
|
||||
/**
|
||||
* Represents a numeric vector, whose index type is Int and value type is Double.
|
||||
*
|
||||
* Note: Users should not implement this interface.
|
||||
*/
|
||||
sealed trait Vector extends Serializable {
|
||||
|
||||
/**
|
||||
* Size of the vector.
|
||||
*/
|
||||
def size: Int
|
||||
|
||||
/**
|
||||
* Converts the instance to a double array.
|
||||
*/
|
||||
def toArray: Array[Double]
|
||||
|
||||
override def equals(other: Any): Boolean = {
|
||||
other match {
|
||||
case v2: Vector =>
|
||||
if (this.size != v2.size) return false
|
||||
(this, v2) match {
|
||||
case (s1: SparseVector, s2: SparseVector) =>
|
||||
Vectors.equals(s1.indices, s1.values, s2.indices, s2.values)
|
||||
case (s1: SparseVector, d1: DenseVector) =>
|
||||
Vectors.equals(s1.indices, s1.values, 0 until d1.size, d1.values)
|
||||
case (d1: DenseVector, s1: SparseVector) =>
|
||||
Vectors.equals(0 until d1.size, d1.values, s1.indices, s1.values)
|
||||
case (_, _) => util.Arrays.equals(this.toArray, v2.toArray)
|
||||
}
|
||||
case _ => false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code value for the vector. The hash code is based on its size and its first 128
|
||||
* nonzero entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]].
|
||||
*/
|
||||
override def hashCode(): Int = {
|
||||
// This is a reference implementation. It calls return in foreachActive, which is slow.
|
||||
// Subclasses should override it with optimized implementation.
|
||||
var result: Int = 31 + size
|
||||
var nnz = 0
|
||||
this.foreachActive { (index, value) =>
|
||||
if (nnz < Vectors.MAX_HASH_NNZ) {
|
||||
// ignore explicit 0 for comparison between sparse and dense
|
||||
if (value != 0) {
|
||||
result = 31 * result + index
|
||||
val bits = java.lang.Double.doubleToLongBits(value)
|
||||
result = 31 * result + (bits ^ (bits >>> 32)).toInt
|
||||
nnz += 1
|
||||
}
|
||||
} else {
|
||||
return result
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the instance to a breeze vector.
|
||||
*/
|
||||
private[spark] def toBreeze: BV[Double]
|
||||
|
||||
/**
|
||||
* Gets the value of the ith element.
|
||||
* @param i index
|
||||
*/
|
||||
def apply(i: Int): Double = toBreeze(i)
|
||||
|
||||
/**
|
||||
* Makes a deep copy of this vector.
|
||||
*/
|
||||
def copy: Vector = {
|
||||
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a function `f` to all the active elements of dense and sparse vector.
|
||||
*
|
||||
* @param f the function takes two parameters where the first parameter is the index of
|
||||
* the vector with type `Int`, and the second parameter is the corresponding value
|
||||
* with type `Double`.
|
||||
*/
|
||||
def foreachActive(f: (Int, Double) => Unit): Unit
|
||||
|
||||
/**
|
||||
* Number of active entries. An "active entry" is an element which is explicitly stored,
|
||||
* regardless of its value. Note that inactive entries have value 0.
|
||||
*/
|
||||
def numActives: Int
|
||||
|
||||
/**
|
||||
* Number of nonzero elements. This scans all active values and count nonzeros.
|
||||
*/
|
||||
def numNonzeros: Int
|
||||
|
||||
/**
|
||||
* Converts this vector to a sparse vector with all explicit zeros removed.
|
||||
*/
|
||||
def toSparse: SparseVector
|
||||
|
||||
/**
|
||||
* Converts this vector to a dense vector.
|
||||
*/
|
||||
def toDense: DenseVector = new DenseVector(this.toArray)
|
||||
|
||||
/**
|
||||
* Returns a vector in either dense or sparse format, whichever uses less storage.
|
||||
*/
|
||||
def compressed: Vector = {
|
||||
val nnz = numNonzeros
|
||||
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
|
||||
if (1.5 * (nnz + 1.0) < size) {
|
||||
toSparse
|
||||
} else {
|
||||
toDense
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
|
||||
* Returns -1 if vector has length 0.
|
||||
*/
|
||||
def argmax: Int
|
||||
|
||||
/**
|
||||
* Converts the vector to a JSON string.
|
||||
*/
|
||||
def toJson: String
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory methods for [[org.apache.spark.ml.linalg.Vector]].
|
||||
* We don't use the name `Vector` because Scala imports
|
||||
* [[scala.collection.immutable.Vector]] by default.
|
||||
*/
|
||||
object Vectors {
|
||||
|
||||
/**
|
||||
* Creates a dense vector from its values.
|
||||
*/
|
||||
@varargs
|
||||
def dense(firstValue: Double, otherValues: Double*): Vector =
|
||||
new DenseVector((firstValue +: otherValues).toArray)
|
||||
|
||||
// A dummy implicit is used to avoid signature collision with the one generated by @varargs.
|
||||
/**
|
||||
* Creates a dense vector from a double array.
|
||||
*/
|
||||
def dense(values: Array[Double]): Vector = new DenseVector(values)
|
||||
|
||||
/**
|
||||
* Creates a sparse vector providing its index array and value array.
|
||||
*
|
||||
* @param size vector size.
|
||||
* @param indices index array, must be strictly increasing.
|
||||
* @param values value array, must have the same length as indices.
|
||||
*/
|
||||
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
|
||||
new SparseVector(size, indices, values)
|
||||
|
||||
/**
|
||||
* Creates a sparse vector using unordered (index, value) pairs.
|
||||
*
|
||||
* @param size vector size.
|
||||
* @param elements vector elements in (index, value) pairs.
|
||||
*/
|
||||
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
|
||||
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
|
||||
|
||||
val (indices, values) = elements.sortBy(_._1).unzip
|
||||
var prev = -1
|
||||
indices.foreach { i =>
|
||||
require(prev < i, s"Found duplicate indices: $i.")
|
||||
prev = i
|
||||
}
|
||||
require(prev < size, s"You may not write an element to index $prev because the declared " +
|
||||
s"size of your vector is $size")
|
||||
|
||||
new SparseVector(size, indices.toArray, values.toArray)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a sparse vector using unordered (index, value) pairs in a Java friendly way.
|
||||
*
|
||||
* @param size vector size.
|
||||
* @param elements vector elements in (index, value) pairs.
|
||||
*/
|
||||
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
|
||||
sparse(size, elements.asScala.map { case (i, x) =>
|
||||
(i.intValue(), x.doubleValue())
|
||||
}.toSeq)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a vector of all zeros.
|
||||
*
|
||||
* @param size vector size
|
||||
* @return a zero vector
|
||||
*/
|
||||
def zeros(size: Int): Vector = {
|
||||
new DenseVector(new Array[Double](size))
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the JSON representation of a vector into a [[Vector]].
|
||||
*/
|
||||
def fromJson(json: String): Vector = {
|
||||
implicit val formats = DefaultFormats
|
||||
val jValue = parseJson(json)
|
||||
(jValue \ "type").extract[Int] match {
|
||||
case 0 => // sparse
|
||||
val size = (jValue \ "size").extract[Int]
|
||||
val indices = (jValue \ "indices").extract[Seq[Int]].toArray
|
||||
val values = (jValue \ "values").extract[Seq[Double]].toArray
|
||||
sparse(size, indices, values)
|
||||
case 1 => // dense
|
||||
val values = (jValue \ "values").extract[Seq[Double]].toArray
|
||||
dense(values)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(s"Cannot parse $json into a vector.")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a vector instance from a breeze vector.
|
||||
*/
|
||||
private[spark] def fromBreeze(breezeVector: BV[Double]): Vector = {
|
||||
breezeVector match {
|
||||
case v: BDV[Double] =>
|
||||
if (v.offset == 0 && v.stride == 1 && v.length == v.data.length) {
|
||||
new DenseVector(v.data)
|
||||
} else {
|
||||
new DenseVector(v.toArray) // Can't use underlying array directly, so make a new one
|
||||
}
|
||||
case v: BSV[Double] =>
|
||||
if (v.index.length == v.used) {
|
||||
new SparseVector(v.length, v.index, v.data)
|
||||
} else {
|
||||
new SparseVector(v.length, v.index.slice(0, v.used), v.data.slice(0, v.used))
|
||||
}
|
||||
case v: BV[_] =>
|
||||
sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the p-norm of this vector.
|
||||
* @param vector input vector.
|
||||
* @param p norm.
|
||||
* @return norm in L^p^ space.
|
||||
*/
|
||||
def norm(vector: Vector, p: Double): Double = {
|
||||
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
|
||||
s"You specified p=$p.")
|
||||
val values = vector match {
|
||||
case DenseVector(vs) => vs
|
||||
case SparseVector(n, ids, vs) => vs
|
||||
case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
|
||||
}
|
||||
val size = values.length
|
||||
|
||||
if (p == 1) {
|
||||
var sum = 0.0
|
||||
var i = 0
|
||||
while (i < size) {
|
||||
sum += math.abs(values(i))
|
||||
i += 1
|
||||
}
|
||||
sum
|
||||
} else if (p == 2) {
|
||||
var sum = 0.0
|
||||
var i = 0
|
||||
while (i < size) {
|
||||
sum += values(i) * values(i)
|
||||
i += 1
|
||||
}
|
||||
math.sqrt(sum)
|
||||
} else if (p == Double.PositiveInfinity) {
|
||||
var max = 0.0
|
||||
var i = 0
|
||||
while (i < size) {
|
||||
val value = math.abs(values(i))
|
||||
if (value > max) max = value
|
||||
i += 1
|
||||
}
|
||||
max
|
||||
} else {
|
||||
var sum = 0.0
|
||||
var i = 0
|
||||
while (i < size) {
|
||||
sum += math.pow(math.abs(values(i)), p)
|
||||
i += 1
|
||||
}
|
||||
math.pow(sum, 1.0 / p)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the squared distance between two Vectors.
|
||||
* @param v1 first Vector.
|
||||
* @param v2 second Vector.
|
||||
* @return squared distance between two Vectors.
|
||||
*/
|
||||
def sqdist(v1: Vector, v2: Vector): Double = {
|
||||
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
|
||||
s"=${v2.size}.")
|
||||
var squaredDistance = 0.0
|
||||
(v1, v2) match {
|
||||
case (v1: SparseVector, v2: SparseVector) =>
|
||||
val v1Values = v1.values
|
||||
val v1Indices = v1.indices
|
||||
val v2Values = v2.values
|
||||
val v2Indices = v2.indices
|
||||
val nnzv1 = v1Indices.length
|
||||
val nnzv2 = v2Indices.length
|
||||
|
||||
var kv1 = 0
|
||||
var kv2 = 0
|
||||
while (kv1 < nnzv1 || kv2 < nnzv2) {
|
||||
var score = 0.0
|
||||
|
||||
if (kv2 >= nnzv2 || (kv1 < nnzv1 && v1Indices(kv1) < v2Indices(kv2))) {
|
||||
score = v1Values(kv1)
|
||||
kv1 += 1
|
||||
} else if (kv1 >= nnzv1 || (kv2 < nnzv2 && v2Indices(kv2) < v1Indices(kv1))) {
|
||||
score = v2Values(kv2)
|
||||
kv2 += 1
|
||||
} else {
|
||||
score = v1Values(kv1) - v2Values(kv2)
|
||||
kv1 += 1
|
||||
kv2 += 1
|
||||
}
|
||||
squaredDistance += score * score
|
||||
}
|
||||
|
||||
case (v1: SparseVector, v2: DenseVector) =>
|
||||
squaredDistance = sqdist(v1, v2)
|
||||
|
||||
case (v1: DenseVector, v2: SparseVector) =>
|
||||
squaredDistance = sqdist(v2, v1)
|
||||
|
||||
case (DenseVector(vv1), DenseVector(vv2)) =>
|
||||
var kv = 0
|
||||
val sz = vv1.length
|
||||
while (kv < sz) {
|
||||
val score = vv1(kv) - vv2(kv)
|
||||
squaredDistance += score * score
|
||||
kv += 1
|
||||
}
|
||||
case _ =>
|
||||
throw new IllegalArgumentException("Do not support vector type " + v1.getClass +
|
||||
" and " + v2.getClass)
|
||||
}
|
||||
squaredDistance
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the squared distance between DenseVector and SparseVector.
|
||||
*/
|
||||
private[ml] def sqdist(v1: SparseVector, v2: DenseVector): Double = {
|
||||
var kv1 = 0
|
||||
var kv2 = 0
|
||||
val indices = v1.indices
|
||||
var squaredDistance = 0.0
|
||||
val nnzv1 = indices.length
|
||||
val nnzv2 = v2.size
|
||||
var iv1 = if (nnzv1 > 0) indices(kv1) else -1
|
||||
|
||||
while (kv2 < nnzv2) {
|
||||
var score = 0.0
|
||||
if (kv2 != iv1) {
|
||||
score = v2(kv2)
|
||||
} else {
|
||||
score = v1.values(kv1) - v2(kv2)
|
||||
if (kv1 < nnzv1 - 1) {
|
||||
kv1 += 1
|
||||
iv1 = indices(kv1)
|
||||
}
|
||||
}
|
||||
squaredDistance += score * score
|
||||
kv2 += 1
|
||||
}
|
||||
squaredDistance
|
||||
}
|
||||
|
||||
/**
|
||||
* Check equality between sparse/dense vectors
|
||||
*/
|
||||
private[ml] def equals(
|
||||
v1Indices: IndexedSeq[Int],
|
||||
v1Values: Array[Double],
|
||||
v2Indices: IndexedSeq[Int],
|
||||
v2Values: Array[Double]): Boolean = {
|
||||
val v1Size = v1Values.length
|
||||
val v2Size = v2Values.length
|
||||
var k1 = 0
|
||||
var k2 = 0
|
||||
var allEqual = true
|
||||
while (allEqual) {
|
||||
while (k1 < v1Size && v1Values(k1) == 0) k1 += 1
|
||||
while (k2 < v2Size && v2Values(k2) == 0) k2 += 1
|
||||
|
||||
if (k1 >= v1Size || k2 >= v2Size) {
|
||||
return k1 >= v1Size && k2 >= v2Size // check end alignment
|
||||
}
|
||||
allEqual = v1Indices(k1) == v2Indices(k2) && v1Values(k1) == v2Values(k2)
|
||||
k1 += 1
|
||||
k2 += 1
|
||||
}
|
||||
allEqual
|
||||
}
|
||||
|
||||
/** Max number of nonzero entries used in computing hash code. */
|
||||
private[linalg] val MAX_HASH_NNZ = 128
|
||||
}
|
||||
|
||||
/**
|
||||
* A dense vector represented by a value array.
|
||||
*/
|
||||
class DenseVector (val values: Array[Double]) extends Vector {
|
||||
|
||||
override def size: Int = values.length
|
||||
|
||||
override def toString: String = values.mkString("[", ",", "]")
|
||||
|
||||
override def toArray: Array[Double] = values
|
||||
|
||||
private[spark] override def toBreeze: BV[Double] = new BDV[Double](values)
|
||||
|
||||
override def apply(i: Int): Double = values(i)
|
||||
|
||||
override def copy: DenseVector = {
|
||||
new DenseVector(values.clone())
|
||||
}
|
||||
|
||||
override def foreachActive(f: (Int, Double) => Unit): Unit = {
|
||||
var i = 0
|
||||
val localValuesSize = values.length
|
||||
val localValues = values
|
||||
|
||||
while (i < localValuesSize) {
|
||||
f(i, localValues(i))
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
override def hashCode(): Int = {
|
||||
var result: Int = 31 + size
|
||||
var i = 0
|
||||
val end = values.length
|
||||
var nnz = 0
|
||||
while (i < end && nnz < Vectors.MAX_HASH_NNZ) {
|
||||
val v = values(i)
|
||||
if (v != 0.0) {
|
||||
result = 31 * result + i
|
||||
val bits = java.lang.Double.doubleToLongBits(values(i))
|
||||
result = 31 * result + (bits ^ (bits >>> 32)).toInt
|
||||
nnz += 1
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
override def numActives: Int = size
|
||||
|
||||
override def numNonzeros: Int = {
|
||||
// same as values.count(_ != 0.0) but faster
|
||||
var nnz = 0
|
||||
values.foreach { v =>
|
||||
if (v != 0.0) {
|
||||
nnz += 1
|
||||
}
|
||||
}
|
||||
nnz
|
||||
}
|
||||
|
||||
override def toSparse: SparseVector = {
|
||||
val nnz = numNonzeros
|
||||
val ii = new Array[Int](nnz)
|
||||
val vv = new Array[Double](nnz)
|
||||
var k = 0
|
||||
foreachActive { (i, v) =>
|
||||
if (v != 0) {
|
||||
ii(k) = i
|
||||
vv(k) = v
|
||||
k += 1
|
||||
}
|
||||
}
|
||||
new SparseVector(size, ii, vv)
|
||||
}
|
||||
|
||||
override def argmax: Int = {
|
||||
if (size == 0) {
|
||||
-1
|
||||
} else {
|
||||
var maxIdx = 0
|
||||
var maxValue = values(0)
|
||||
var i = 1
|
||||
while (i < size) {
|
||||
if (values(i) > maxValue) {
|
||||
maxIdx = i
|
||||
maxValue = values(i)
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
maxIdx
|
||||
}
|
||||
}
|
||||
|
||||
override def toJson: String = {
|
||||
val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
|
||||
compact(render(jValue))
|
||||
}
|
||||
}
|
||||
|
||||
object DenseVector {
|
||||
|
||||
/** Extracts the value array from a dense vector. */
|
||||
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
|
||||
}
|
||||
|
||||
/**
|
||||
* A sparse vector represented by an index array and an value array.
|
||||
*
|
||||
* @param size size of the vector.
|
||||
* @param indices index array, assume to be strictly increasing.
|
||||
* @param values value array, must have the same length as the index array.
|
||||
*/
|
||||
class SparseVector (
|
||||
override val size: Int,
|
||||
val indices: Array[Int],
|
||||
val values: Array[Double]) extends Vector {
|
||||
|
||||
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
|
||||
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
|
||||
s" ${values.length} values.")
|
||||
require(indices.length <= size, s"You provided ${indices.length} indices and values, " +
|
||||
s"which exceeds the specified vector size ${size}.")
|
||||
|
||||
override def toString: String =
|
||||
s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
|
||||
|
||||
override def toArray: Array[Double] = {
|
||||
val data = new Array[Double](size)
|
||||
var i = 0
|
||||
val nnz = indices.length
|
||||
while (i < nnz) {
|
||||
data(indices(i)) = values(i)
|
||||
i += 1
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
override def copy: SparseVector = {
|
||||
new SparseVector(size, indices.clone(), values.clone())
|
||||
}
|
||||
|
||||
private[spark] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
|
||||
|
||||
override def foreachActive(f: (Int, Double) => Unit): Unit = {
|
||||
var i = 0
|
||||
val localValuesSize = values.length
|
||||
val localIndices = indices
|
||||
val localValues = values
|
||||
|
||||
while (i < localValuesSize) {
|
||||
f(localIndices(i), localValues(i))
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
override def hashCode(): Int = {
|
||||
var result: Int = 31 + size
|
||||
val end = values.length
|
||||
var k = 0
|
||||
var nnz = 0
|
||||
while (k < end && nnz < Vectors.MAX_HASH_NNZ) {
|
||||
val v = values(k)
|
||||
if (v != 0.0) {
|
||||
val i = indices(k)
|
||||
result = 31 * result + i
|
||||
val bits = java.lang.Double.doubleToLongBits(v)
|
||||
result = 31 * result + (bits ^ (bits >>> 32)).toInt
|
||||
nnz += 1
|
||||
}
|
||||
k += 1
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
override def numActives: Int = values.length
|
||||
|
||||
override def numNonzeros: Int = {
|
||||
var nnz = 0
|
||||
values.foreach { v =>
|
||||
if (v != 0.0) {
|
||||
nnz += 1
|
||||
}
|
||||
}
|
||||
nnz
|
||||
}
|
||||
|
||||
override def toSparse: SparseVector = {
|
||||
val nnz = numNonzeros
|
||||
if (nnz == numActives) {
|
||||
this
|
||||
} else {
|
||||
val ii = new Array[Int](nnz)
|
||||
val vv = new Array[Double](nnz)
|
||||
var k = 0
|
||||
foreachActive { (i, v) =>
|
||||
if (v != 0.0) {
|
||||
ii(k) = i
|
||||
vv(k) = v
|
||||
k += 1
|
||||
}
|
||||
}
|
||||
new SparseVector(size, ii, vv)
|
||||
}
|
||||
}
|
||||
|
||||
override def argmax: Int = {
|
||||
if (size == 0) {
|
||||
-1
|
||||
} else {
|
||||
// Find the max active entry.
|
||||
var maxIdx = indices(0)
|
||||
var maxValue = values(0)
|
||||
var maxJ = 0
|
||||
var j = 1
|
||||
val na = numActives
|
||||
while (j < na) {
|
||||
val v = values(j)
|
||||
if (v > maxValue) {
|
||||
maxValue = v
|
||||
maxIdx = indices(j)
|
||||
maxJ = j
|
||||
}
|
||||
j += 1
|
||||
}
|
||||
|
||||
// If the max active entry is nonpositive and there exists inactive ones, find the first zero.
|
||||
if (maxValue <= 0.0 && na < size) {
|
||||
if (maxValue == 0.0) {
|
||||
// If there exists an inactive entry before maxIdx, find it and return its index.
|
||||
if (maxJ < maxIdx) {
|
||||
var k = 0
|
||||
while (k < maxJ && indices(k) == k) {
|
||||
k += 1
|
||||
}
|
||||
maxIdx = k
|
||||
}
|
||||
} else {
|
||||
// If the max active value is negative, find and return the first inactive index.
|
||||
var k = 0
|
||||
while (k < na && indices(k) == k) {
|
||||
k += 1
|
||||
}
|
||||
maxIdx = k
|
||||
}
|
||||
}
|
||||
|
||||
maxIdx
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a slice of this vector based on the given indices.
|
||||
* @param selectedIndices Unsorted list of indices into the vector.
|
||||
* This does NOT do bound checking.
|
||||
* @return New SparseVector with values in the order specified by the given indices.
|
||||
*
|
||||
* NOTE: The API needs to be discussed before making this public.
|
||||
* Also, if we have a version assuming indices are sorted, we should optimize it.
|
||||
*/
|
||||
private[spark] def slice(selectedIndices: Array[Int]): SparseVector = {
|
||||
var currentIdx = 0
|
||||
val (sliceInds, sliceVals) = selectedIndices.flatMap { origIdx =>
|
||||
val iIdx = java.util.Arrays.binarySearch(this.indices, origIdx)
|
||||
val i_v = if (iIdx >= 0) {
|
||||
Iterator((currentIdx, this.values(iIdx)))
|
||||
} else {
|
||||
Iterator()
|
||||
}
|
||||
currentIdx += 1
|
||||
i_v
|
||||
}.unzip
|
||||
new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray)
|
||||
}
|
||||
|
||||
override def toJson: String = {
|
||||
val jValue = ("type" -> 0) ~
|
||||
("size" -> size) ~
|
||||
("indices" -> indices.toSeq) ~
|
||||
("values" -> values.toSeq)
|
||||
compact(render(jValue))
|
||||
}
|
||||
}
|
||||
|
||||
object SparseVector {
|
||||
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
|
||||
Some((sv.size, sv.indices, sv.values))
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml
|
||||
|
||||
import org.scalatest.FunSuite // scalastyle:ignore funsuite
|
||||
|
||||
// This is testing if the new build works. To be removed soon.
|
||||
class DummyTestingSuite extends FunSuite { // scalastyle:ignore funsuite
|
||||
|
||||
test("This is testing if the new build works.") {
|
||||
assert(DummyTesting.add10(15) === 25)
|
||||
}
|
||||
}
|
|
@ -17,7 +17,14 @@
|
|||
|
||||
package org.apache.spark.ml
|
||||
|
||||
// This is a private class testing if the new build works. To be removed soon.
|
||||
private[ml] object DummyTesting {
|
||||
private[ml] def add10(input: Double): Double = input + 10
|
||||
// scalastyle:off
|
||||
import org.scalatest.{BeforeAndAfterAll, FunSuite}
|
||||
|
||||
/**
|
||||
* Base abstract class for all unit tests in Spark for handling common functionality.
|
||||
*/
|
||||
private[spark] abstract class SparkMLFunSuite
|
||||
extends FunSuite
|
||||
with BeforeAndAfterAll {
|
||||
// scalastyle:on
|
||||
}
|
|
@ -0,0 +1,408 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import org.apache.spark.ml.SparkMLFunSuite
|
||||
import org.apache.spark.ml.linalg.BLAS._
|
||||
import org.apache.spark.ml.util.TestingUtils._
|
||||
|
||||
class BLASSuite extends SparkMLFunSuite {
|
||||
|
||||
test("copy") {
|
||||
val sx = Vectors.sparse(4, Array(0, 2), Array(1.0, -2.0))
|
||||
val dx = Vectors.dense(1.0, 0.0, -2.0, 0.0)
|
||||
val sy = Vectors.sparse(4, Array(0, 1, 3), Array(2.0, 1.0, 1.0))
|
||||
val dy = Array(2.0, 1.0, 0.0, 1.0)
|
||||
|
||||
val dy1 = Vectors.dense(dy.clone())
|
||||
copy(sx, dy1)
|
||||
assert(dy1 ~== dx absTol 1e-15)
|
||||
|
||||
val dy2 = Vectors.dense(dy.clone())
|
||||
copy(dx, dy2)
|
||||
assert(dy2 ~== dx absTol 1e-15)
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
copy(sx, sy)
|
||||
}
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
copy(dx, sy)
|
||||
}
|
||||
|
||||
withClue("vector sizes must match") {
|
||||
intercept[Exception] {
|
||||
copy(sx, Vectors.dense(0.0, 1.0, 2.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("scal") {
|
||||
val a = 0.1
|
||||
val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
|
||||
val dx = Vectors.dense(1.0, 0.0, -2.0)
|
||||
|
||||
scal(a, sx)
|
||||
assert(sx ~== Vectors.sparse(3, Array(0, 2), Array(0.1, -0.2)) absTol 1e-15)
|
||||
|
||||
scal(a, dx)
|
||||
assert(dx ~== Vectors.dense(0.1, 0.0, -0.2) absTol 1e-15)
|
||||
}
|
||||
|
||||
test("axpy") {
|
||||
val alpha = 0.1
|
||||
val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
|
||||
val dx = Vectors.dense(1.0, 0.0, -2.0)
|
||||
val dy = Array(2.0, 1.0, 0.0)
|
||||
val expected = Vectors.dense(2.1, 1.0, -0.2)
|
||||
|
||||
val dy1 = Vectors.dense(dy.clone())
|
||||
axpy(alpha, sx, dy1)
|
||||
assert(dy1 ~== expected absTol 1e-15)
|
||||
|
||||
val dy2 = Vectors.dense(dy.clone())
|
||||
axpy(alpha, dx, dy2)
|
||||
assert(dy2 ~== expected absTol 1e-15)
|
||||
|
||||
val sy = Vectors.sparse(4, Array(0, 1), Array(2.0, 1.0))
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
axpy(alpha, sx, sy)
|
||||
}
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
axpy(alpha, dx, sy)
|
||||
}
|
||||
|
||||
withClue("vector sizes must match") {
|
||||
intercept[Exception] {
|
||||
axpy(alpha, sx, Vectors.dense(1.0, 2.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("dot") {
|
||||
val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
|
||||
val dx = Vectors.dense(1.0, 0.0, -2.0)
|
||||
val sy = Vectors.sparse(3, Array(0, 1), Array(2.0, 1.0))
|
||||
val dy = Vectors.dense(2.0, 1.0, 0.0)
|
||||
|
||||
assert(dot(sx, sy) ~== 2.0 absTol 1e-15)
|
||||
assert(dot(sy, sx) ~== 2.0 absTol 1e-15)
|
||||
assert(dot(sx, dy) ~== 2.0 absTol 1e-15)
|
||||
assert(dot(dy, sx) ~== 2.0 absTol 1e-15)
|
||||
assert(dot(dx, dy) ~== 2.0 absTol 1e-15)
|
||||
assert(dot(dy, dx) ~== 2.0 absTol 1e-15)
|
||||
|
||||
assert(dot(sx, sx) ~== 5.0 absTol 1e-15)
|
||||
assert(dot(dx, dx) ~== 5.0 absTol 1e-15)
|
||||
assert(dot(sx, dx) ~== 5.0 absTol 1e-15)
|
||||
assert(dot(dx, sx) ~== 5.0 absTol 1e-15)
|
||||
|
||||
val sx1 = Vectors.sparse(10, Array(0, 3, 5, 7, 8), Array(1.0, 2.0, 3.0, 4.0, 5.0))
|
||||
val sx2 = Vectors.sparse(10, Array(1, 3, 6, 7, 9), Array(1.0, 2.0, 3.0, 4.0, 5.0))
|
||||
assert(dot(sx1, sx2) ~== 20.0 absTol 1e-15)
|
||||
assert(dot(sx2, sx1) ~== 20.0 absTol 1e-15)
|
||||
|
||||
withClue("vector sizes must match") {
|
||||
intercept[Exception] {
|
||||
dot(sx, Vectors.dense(2.0, 1.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("spr") {
|
||||
// test dense vector
|
||||
val alpha = 0.1
|
||||
val x = new DenseVector(Array(1.0, 2, 2.1, 4))
|
||||
val U = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
|
||||
val expected = new DenseVector(Array(1.1, 2.2, 2.4, 3.21, 3.42, 3.441, 4.4, 4.8, 4.84, 5.6))
|
||||
|
||||
spr(alpha, x, U)
|
||||
assert(U ~== expected absTol 1e-9)
|
||||
|
||||
val matrix33 = new DenseVector(Array(1.0, 2, 3, 4, 5))
|
||||
withClue("Size of vector must match the rank of matrix") {
|
||||
intercept[Exception] {
|
||||
spr(alpha, x, matrix33)
|
||||
}
|
||||
}
|
||||
|
||||
// test sparse vector
|
||||
val sv = new SparseVector(4, Array(0, 3), Array(1.0, 2))
|
||||
val U2 = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
|
||||
spr(0.1, sv, U2)
|
||||
val expectedSparse = new DenseVector(Array(1.1, 2.0, 2.0, 3.0, 3.0, 3.0, 4.2, 4.0, 4.0, 4.4))
|
||||
assert(U2 ~== expectedSparse absTol 1e-15)
|
||||
}
|
||||
|
||||
test("syr") {
|
||||
val dA = new DenseMatrix(4, 4,
|
||||
Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
|
||||
val x = new DenseVector(Array(0.0, 2.7, 3.5, 2.1))
|
||||
val alpha = 0.15
|
||||
|
||||
val expected = new DenseMatrix(4, 4,
|
||||
Array(0.0, 1.2, 2.2, 3.1, 1.2, 4.2935, 6.7175, 5.4505, 2.2, 6.7175, 3.6375, 4.1025, 3.1,
|
||||
5.4505, 4.1025, 1.4615))
|
||||
|
||||
syr(alpha, x, dA)
|
||||
|
||||
assert(dA ~== expected absTol 1e-15)
|
||||
|
||||
val dB =
|
||||
new DenseMatrix(3, 4, Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0))
|
||||
|
||||
withClue("Matrix A must be a symmetric Matrix") {
|
||||
intercept[Exception] {
|
||||
syr(alpha, x, dB)
|
||||
}
|
||||
}
|
||||
|
||||
val dC =
|
||||
new DenseMatrix(3, 3, Array(0.0, 1.2, 2.2, 1.2, 3.2, 5.3, 2.2, 5.3, 1.8))
|
||||
|
||||
withClue("Size of vector must match the rank of matrix") {
|
||||
intercept[Exception] {
|
||||
syr(alpha, x, dC)
|
||||
}
|
||||
}
|
||||
|
||||
val y = new DenseVector(Array(0.0, 2.7, 3.5, 2.1, 1.5))
|
||||
|
||||
withClue("Size of vector must match the rank of matrix") {
|
||||
intercept[Exception] {
|
||||
syr(alpha, y, dA)
|
||||
}
|
||||
}
|
||||
|
||||
val xSparse = new SparseVector(4, Array(0, 2, 3), Array(1.0, 3.0, 4.0))
|
||||
val dD = new DenseMatrix(4, 4,
|
||||
Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
|
||||
syr(0.1, xSparse, dD)
|
||||
val expectedSparse = new DenseMatrix(4, 4,
|
||||
Array(0.1, 1.2, 2.5, 3.5, 1.2, 3.2, 5.3, 4.6, 2.5, 5.3, 2.7, 4.2, 3.5, 4.6, 4.2, 2.4))
|
||||
assert(dD ~== expectedSparse absTol 1e-15)
|
||||
}
|
||||
|
||||
test("gemm") {
|
||||
val dA =
|
||||
new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
|
||||
val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
|
||||
|
||||
val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0))
|
||||
val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0))
|
||||
val BTman = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0))
|
||||
val BT = B.transpose
|
||||
|
||||
assert(dA.multiply(B) ~== expected absTol 1e-15)
|
||||
assert(sA.multiply(B) ~== expected absTol 1e-15)
|
||||
|
||||
val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0))
|
||||
val C2 = C1.copy
|
||||
val C3 = C1.copy
|
||||
val C4 = C1.copy
|
||||
val C5 = C1.copy
|
||||
val C6 = C1.copy
|
||||
val C7 = C1.copy
|
||||
val C8 = C1.copy
|
||||
val C9 = C1.copy
|
||||
val C10 = C1.copy
|
||||
val C11 = C1.copy
|
||||
val C12 = C1.copy
|
||||
val C13 = C1.copy
|
||||
val C14 = C1.copy
|
||||
val C15 = C1.copy
|
||||
val C16 = C1.copy
|
||||
val C17 = C1.copy
|
||||
val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0))
|
||||
val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0))
|
||||
val expected4 = new DenseMatrix(4, 2, Array(5.0, 0.0, 10.0, 5.0, 0.0, 0.0, 5.0, 0.0))
|
||||
val expected5 = C1.copy
|
||||
|
||||
gemm(1.0, dA, B, 2.0, C1)
|
||||
gemm(1.0, sA, B, 2.0, C2)
|
||||
gemm(2.0, dA, B, 2.0, C3)
|
||||
gemm(2.0, sA, B, 2.0, C4)
|
||||
assert(C1 ~== expected2 absTol 1e-15)
|
||||
assert(C2 ~== expected2 absTol 1e-15)
|
||||
assert(C3 ~== expected3 absTol 1e-15)
|
||||
assert(C4 ~== expected3 absTol 1e-15)
|
||||
gemm(1.0, dA, B, 0.0, C17)
|
||||
assert(C17 ~== expected absTol 1e-15)
|
||||
gemm(1.0, sA, B, 0.0, C17)
|
||||
assert(C17 ~== expected absTol 1e-15)
|
||||
|
||||
withClue("columns of A don't match the rows of B") {
|
||||
intercept[Exception] {
|
||||
gemm(1.0, dA.transpose, B, 2.0, C1)
|
||||
}
|
||||
}
|
||||
|
||||
val dATman =
|
||||
new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
|
||||
val sATman =
|
||||
new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
|
||||
|
||||
val dATT = dATman.transpose
|
||||
val sATT = sATman.transpose
|
||||
val BTT = BTman.transpose.asInstanceOf[DenseMatrix]
|
||||
|
||||
assert(dATT.multiply(B) ~== expected absTol 1e-15)
|
||||
assert(sATT.multiply(B) ~== expected absTol 1e-15)
|
||||
assert(dATT.multiply(BTT) ~== expected absTol 1e-15)
|
||||
assert(sATT.multiply(BTT) ~== expected absTol 1e-15)
|
||||
|
||||
gemm(1.0, dATT, BTT, 2.0, C5)
|
||||
gemm(1.0, sATT, BTT, 2.0, C6)
|
||||
gemm(2.0, dATT, BTT, 2.0, C7)
|
||||
gemm(2.0, sATT, BTT, 2.0, C8)
|
||||
gemm(1.0, dA, BTT, 2.0, C9)
|
||||
gemm(1.0, sA, BTT, 2.0, C10)
|
||||
gemm(2.0, dA, BTT, 2.0, C11)
|
||||
gemm(2.0, sA, BTT, 2.0, C12)
|
||||
assert(C5 ~== expected2 absTol 1e-15)
|
||||
assert(C6 ~== expected2 absTol 1e-15)
|
||||
assert(C7 ~== expected3 absTol 1e-15)
|
||||
assert(C8 ~== expected3 absTol 1e-15)
|
||||
assert(C9 ~== expected2 absTol 1e-15)
|
||||
assert(C10 ~== expected2 absTol 1e-15)
|
||||
assert(C11 ~== expected3 absTol 1e-15)
|
||||
assert(C12 ~== expected3 absTol 1e-15)
|
||||
|
||||
gemm(0, dA, B, 5, C13)
|
||||
gemm(0, sA, B, 5, C14)
|
||||
gemm(0, dA, B, 1, C15)
|
||||
gemm(0, sA, B, 1, C16)
|
||||
assert(C13 ~== expected4 absTol 1e-15)
|
||||
assert(C14 ~== expected4 absTol 1e-15)
|
||||
assert(C15 ~== expected5 absTol 1e-15)
|
||||
assert(C16 ~== expected5 absTol 1e-15)
|
||||
|
||||
}
|
||||
|
||||
test("gemv") {
|
||||
|
||||
val dA =
|
||||
new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
|
||||
val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
|
||||
|
||||
val dA2 =
|
||||
new DenseMatrix(4, 3, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0), true)
|
||||
val sA2 =
|
||||
new SparseMatrix(4, 3, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0),
|
||||
true)
|
||||
|
||||
val dx = new DenseVector(Array(1.0, 2.0, 3.0))
|
||||
val sx = dx.toSparse
|
||||
val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
|
||||
|
||||
assert(dA.multiply(dx) ~== expected absTol 1e-15)
|
||||
assert(sA.multiply(dx) ~== expected absTol 1e-15)
|
||||
assert(dA.multiply(sx) ~== expected absTol 1e-15)
|
||||
assert(sA.multiply(sx) ~== expected absTol 1e-15)
|
||||
|
||||
val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
|
||||
val y2 = y1.copy
|
||||
val y3 = y1.copy
|
||||
val y4 = y1.copy
|
||||
val y5 = y1.copy
|
||||
val y6 = y1.copy
|
||||
val y7 = y1.copy
|
||||
val y8 = y1.copy
|
||||
val y9 = y1.copy
|
||||
val y10 = y1.copy
|
||||
val y11 = y1.copy
|
||||
val y12 = y1.copy
|
||||
val y13 = y1.copy
|
||||
val y14 = y1.copy
|
||||
val y15 = y1.copy
|
||||
val y16 = y1.copy
|
||||
|
||||
val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
|
||||
val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))
|
||||
|
||||
gemv(1.0, dA, dx, 2.0, y1)
|
||||
gemv(1.0, sA, dx, 2.0, y2)
|
||||
gemv(1.0, dA, sx, 2.0, y3)
|
||||
gemv(1.0, sA, sx, 2.0, y4)
|
||||
|
||||
gemv(1.0, dA2, dx, 2.0, y5)
|
||||
gemv(1.0, sA2, dx, 2.0, y6)
|
||||
gemv(1.0, dA2, sx, 2.0, y7)
|
||||
gemv(1.0, sA2, sx, 2.0, y8)
|
||||
|
||||
gemv(2.0, dA, dx, 2.0, y9)
|
||||
gemv(2.0, sA, dx, 2.0, y10)
|
||||
gemv(2.0, dA, sx, 2.0, y11)
|
||||
gemv(2.0, sA, sx, 2.0, y12)
|
||||
|
||||
gemv(2.0, dA2, dx, 2.0, y13)
|
||||
gemv(2.0, sA2, dx, 2.0, y14)
|
||||
gemv(2.0, dA2, sx, 2.0, y15)
|
||||
gemv(2.0, sA2, sx, 2.0, y16)
|
||||
|
||||
assert(y1 ~== expected2 absTol 1e-15)
|
||||
assert(y2 ~== expected2 absTol 1e-15)
|
||||
assert(y3 ~== expected2 absTol 1e-15)
|
||||
assert(y4 ~== expected2 absTol 1e-15)
|
||||
|
||||
assert(y5 ~== expected2 absTol 1e-15)
|
||||
assert(y6 ~== expected2 absTol 1e-15)
|
||||
assert(y7 ~== expected2 absTol 1e-15)
|
||||
assert(y8 ~== expected2 absTol 1e-15)
|
||||
|
||||
assert(y9 ~== expected3 absTol 1e-15)
|
||||
assert(y10 ~== expected3 absTol 1e-15)
|
||||
assert(y11 ~== expected3 absTol 1e-15)
|
||||
assert(y12 ~== expected3 absTol 1e-15)
|
||||
|
||||
assert(y13 ~== expected3 absTol 1e-15)
|
||||
assert(y14 ~== expected3 absTol 1e-15)
|
||||
assert(y15 ~== expected3 absTol 1e-15)
|
||||
assert(y16 ~== expected3 absTol 1e-15)
|
||||
|
||||
withClue("columns of A don't match the rows of B") {
|
||||
intercept[Exception] {
|
||||
gemv(1.0, dA.transpose, dx, 2.0, y1)
|
||||
}
|
||||
intercept[Exception] {
|
||||
gemv(1.0, sA.transpose, dx, 2.0, y1)
|
||||
}
|
||||
intercept[Exception] {
|
||||
gemv(1.0, dA.transpose, sx, 2.0, y1)
|
||||
}
|
||||
intercept[Exception] {
|
||||
gemv(1.0, sA.transpose, sx, 2.0, y1)
|
||||
}
|
||||
}
|
||||
|
||||
val dAT =
|
||||
new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
|
||||
val sAT =
|
||||
new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
|
||||
|
||||
val dATT = dAT.transpose
|
||||
val sATT = sAT.transpose
|
||||
|
||||
assert(dATT.multiply(dx) ~== expected absTol 1e-15)
|
||||
assert(sATT.multiply(dx) ~== expected absTol 1e-15)
|
||||
assert(dATT.multiply(sx) ~== expected absTol 1e-15)
|
||||
assert(sATT.multiply(sx) ~== expected absTol 1e-15)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM}
|
||||
|
||||
import org.apache.spark.ml.SparkMLFunSuite
|
||||
|
||||
class BreezeMatrixConversionSuite extends SparkMLFunSuite {
|
||||
test("dense matrix to breeze") {
|
||||
val mat = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
|
||||
val breeze = mat.toBreeze.asInstanceOf[BDM[Double]]
|
||||
assert(breeze.rows === mat.numRows)
|
||||
assert(breeze.cols === mat.numCols)
|
||||
assert(breeze.data.eq(mat.asInstanceOf[DenseMatrix].values), "should not copy data")
|
||||
}
|
||||
|
||||
test("dense breeze matrix to matrix") {
|
||||
val breeze = new BDM[Double](3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
|
||||
val mat = Matrices.fromBreeze(breeze).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === breeze.rows)
|
||||
assert(mat.numCols === breeze.cols)
|
||||
assert(mat.values.eq(breeze.data), "should not copy data")
|
||||
// transposed matrix
|
||||
val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[DenseMatrix]
|
||||
assert(matTransposed.numRows === breeze.cols)
|
||||
assert(matTransposed.numCols === breeze.rows)
|
||||
assert(matTransposed.values.eq(breeze.data), "should not copy data")
|
||||
}
|
||||
|
||||
test("sparse matrix to breeze") {
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(1, 2, 1, 2)
|
||||
val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values)
|
||||
val breeze = mat.toBreeze.asInstanceOf[BSM[Double]]
|
||||
assert(breeze.rows === mat.numRows)
|
||||
assert(breeze.cols === mat.numCols)
|
||||
assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data")
|
||||
}
|
||||
|
||||
test("sparse breeze matrix to sparse matrix") {
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(1, 2, 1, 2)
|
||||
val breeze = new BSM[Double](values, 3, 2, colPtrs, rowIndices)
|
||||
val mat = Matrices.fromBreeze(breeze).asInstanceOf[SparseMatrix]
|
||||
assert(mat.numRows === breeze.rows)
|
||||
assert(mat.numCols === breeze.cols)
|
||||
assert(mat.values.eq(breeze.data), "should not copy data")
|
||||
val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[SparseMatrix]
|
||||
assert(matTransposed.numRows === breeze.cols)
|
||||
assert(matTransposed.numCols === breeze.rows)
|
||||
assert(!matTransposed.values.eq(breeze.data), "has to copy data")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
|
||||
|
||||
import org.apache.spark.ml.SparkMLFunSuite
|
||||
|
||||
/**
|
||||
* Test Breeze vector conversions.
|
||||
*/
|
||||
class BreezeVectorConversionSuite extends SparkMLFunSuite {
|
||||
|
||||
val arr = Array(0.1, 0.2, 0.3, 0.4)
|
||||
val n = 20
|
||||
val indices = Array(0, 3, 5, 10, 13)
|
||||
val values = Array(0.1, 0.5, 0.3, -0.8, -1.0)
|
||||
|
||||
test("dense to breeze") {
|
||||
val vec = Vectors.dense(arr)
|
||||
assert(vec.toBreeze === new BDV[Double](arr))
|
||||
}
|
||||
|
||||
test("sparse to breeze") {
|
||||
val vec = Vectors.sparse(n, indices, values)
|
||||
assert(vec.toBreeze === new BSV[Double](indices, values, n))
|
||||
}
|
||||
|
||||
test("dense breeze to vector") {
|
||||
val breeze = new BDV[Double](arr)
|
||||
val vec = Vectors.fromBreeze(breeze).asInstanceOf[DenseVector]
|
||||
assert(vec.size === arr.length)
|
||||
assert(vec.values.eq(arr), "should not copy data")
|
||||
}
|
||||
|
||||
test("sparse breeze to vector") {
|
||||
val breeze = new BSV[Double](indices, values, n)
|
||||
val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
|
||||
assert(vec.size === n)
|
||||
assert(vec.indices.eq(indices), "should not copy data")
|
||||
assert(vec.values.eq(values), "should not copy data")
|
||||
}
|
||||
|
||||
test("sparse breeze with partially-used arrays to vector") {
|
||||
val activeSize = 3
|
||||
val breeze = new BSV[Double](indices, values, activeSize, n)
|
||||
val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
|
||||
assert(vec.size === n)
|
||||
assert(vec.indices === indices.slice(0, activeSize))
|
||||
assert(vec.values === values.slice(0, activeSize))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,511 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import java.util.Random
|
||||
|
||||
import breeze.linalg.{CSCMatrix, Matrix => BM}
|
||||
import org.mockito.Mockito.when
|
||||
import org.scalatest.mock.MockitoSugar._
|
||||
import scala.collection.mutable.{Map => MutableMap}
|
||||
|
||||
import org.apache.spark.ml.SparkMLFunSuite
|
||||
import org.apache.spark.ml.util.TestingUtils._
|
||||
|
||||
class MatricesSuite extends SparkMLFunSuite {
|
||||
test("dense matrix construction") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
val values = Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)
|
||||
val mat = Matrices.dense(m, n, values).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === m)
|
||||
assert(mat.numCols === n)
|
||||
assert(mat.values.eq(values), "should not copy data")
|
||||
}
|
||||
|
||||
test("dense matrix construction with wrong dimension") {
|
||||
intercept[RuntimeException] {
|
||||
Matrices.dense(3, 2, Array(0.0, 1.0, 2.0))
|
||||
}
|
||||
}
|
||||
|
||||
test("sparse matrix construction") {
|
||||
val m = 3
|
||||
val n = 4
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 2, 4, 4)
|
||||
val rowIndices = Array(1, 2, 1, 2)
|
||||
val mat = Matrices.sparse(m, n, colPtrs, rowIndices, values).asInstanceOf[SparseMatrix]
|
||||
assert(mat.numRows === m)
|
||||
assert(mat.numCols === n)
|
||||
assert(mat.values.eq(values), "should not copy data")
|
||||
assert(mat.colPtrs.eq(colPtrs), "should not copy data")
|
||||
assert(mat.rowIndices.eq(rowIndices), "should not copy data")
|
||||
|
||||
val entries: Array[(Int, Int, Double)] = Array((2, 2, 3.0), (1, 0, 1.0), (2, 0, 2.0),
|
||||
(1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0))
|
||||
|
||||
val mat2 = SparseMatrix.fromCOO(m, n, entries)
|
||||
assert(mat.toBreeze === mat2.toBreeze)
|
||||
assert(mat2.values.length == 4)
|
||||
}
|
||||
|
||||
test("sparse matrix construction with wrong number of elements") {
|
||||
intercept[IllegalArgumentException] {
|
||||
Matrices.sparse(3, 2, Array(0, 1), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
|
||||
}
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
Matrices.sparse(3, 2, Array(0, 1, 2), Array(1, 2), Array(0.0, 1.0, 2.0))
|
||||
}
|
||||
}
|
||||
|
||||
test("index in matrices incorrect input") {
|
||||
val sm = Matrices.sparse(3, 2, Array(0, 2, 3), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
|
||||
val dm = Matrices.dense(3, 2, Array(0.0, 2.3, 1.4, 3.2, 1.0, 9.1))
|
||||
Array(sm, dm).foreach { mat =>
|
||||
intercept[IllegalArgumentException] { mat.index(4, 1) }
|
||||
intercept[IllegalArgumentException] { mat.index(1, 4) }
|
||||
intercept[IllegalArgumentException] { mat.index(-1, 2) }
|
||||
intercept[IllegalArgumentException] { mat.index(1, -2) }
|
||||
}
|
||||
}
|
||||
|
||||
test("equals") {
|
||||
val dm1 = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0))
|
||||
assert(dm1 === dm1)
|
||||
assert(dm1 !== dm1.transpose)
|
||||
|
||||
val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0))
|
||||
assert(dm1 === dm2.transpose)
|
||||
|
||||
val sm1 = dm1.asInstanceOf[DenseMatrix].toSparse
|
||||
assert(sm1 === sm1)
|
||||
assert(sm1 === dm1)
|
||||
assert(sm1 !== sm1.transpose)
|
||||
|
||||
val sm2 = dm2.asInstanceOf[DenseMatrix].toSparse
|
||||
assert(sm1 === sm2.transpose)
|
||||
assert(sm1 === dm2.transpose)
|
||||
}
|
||||
|
||||
test("matrix copies are deep copies") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
|
||||
val denseMat = Matrices.dense(m, n, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
|
||||
val denseCopy = denseMat.copy
|
||||
|
||||
assert(!denseMat.toArray.eq(denseCopy.toArray))
|
||||
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(1, 2, 1, 2)
|
||||
val sparseMat = Matrices.sparse(m, n, colPtrs, rowIndices, values)
|
||||
val sparseCopy = sparseMat.copy
|
||||
|
||||
assert(!sparseMat.toArray.eq(sparseCopy.toArray))
|
||||
}
|
||||
|
||||
test("matrix indexing and updating") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
val allValues = Array(0.0, 1.0, 2.0, 3.0, 4.0, 0.0)
|
||||
|
||||
val denseMat = new DenseMatrix(m, n, allValues)
|
||||
|
||||
assert(denseMat(0, 1) === 3.0)
|
||||
assert(denseMat(0, 1) === denseMat.values(3))
|
||||
assert(denseMat(0, 1) === denseMat(3))
|
||||
assert(denseMat(0, 0) === 0.0)
|
||||
|
||||
denseMat.update(0, 0, 10.0)
|
||||
assert(denseMat(0, 0) === 10.0)
|
||||
assert(denseMat.values(0) === 10.0)
|
||||
|
||||
val sparseValues = Array(1.0, 2.0, 3.0, 4.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(1, 2, 0, 1)
|
||||
val sparseMat = new SparseMatrix(m, n, colPtrs, rowIndices, sparseValues)
|
||||
|
||||
assert(sparseMat(0, 1) === 3.0)
|
||||
assert(sparseMat(0, 1) === sparseMat.values(2))
|
||||
assert(sparseMat(0, 0) === 0.0)
|
||||
|
||||
intercept[NoSuchElementException] {
|
||||
sparseMat.update(0, 0, 10.0)
|
||||
}
|
||||
|
||||
intercept[NoSuchElementException] {
|
||||
sparseMat.update(2, 1, 10.0)
|
||||
}
|
||||
|
||||
sparseMat.update(0, 1, 10.0)
|
||||
assert(sparseMat(0, 1) === 10.0)
|
||||
assert(sparseMat.values(2) === 10.0)
|
||||
}
|
||||
|
||||
test("toSparse, toDense") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(0, 1, 1, 2)
|
||||
|
||||
val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
|
||||
val deMat1 = new DenseMatrix(m, n, allValues)
|
||||
|
||||
val spMat2 = deMat1.toSparse
|
||||
val deMat2 = spMat1.toDense
|
||||
|
||||
assert(spMat1.toBreeze === spMat2.toBreeze)
|
||||
assert(deMat1.toBreeze === deMat2.toBreeze)
|
||||
}
|
||||
|
||||
test("map, update") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(0, 1, 1, 2)
|
||||
|
||||
val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
|
||||
val deMat1 = new DenseMatrix(m, n, allValues)
|
||||
val deMat2 = deMat1.map(_ * 2)
|
||||
val spMat2 = spMat1.map(_ * 2)
|
||||
deMat1.update(_ * 2)
|
||||
spMat1.update(_ * 2)
|
||||
|
||||
assert(spMat1.toArray === spMat2.toArray)
|
||||
assert(deMat1.toArray === deMat2.toArray)
|
||||
}
|
||||
|
||||
test("transpose") {
|
||||
val dA =
|
||||
new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
|
||||
val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
|
||||
|
||||
val dAT = dA.transpose.asInstanceOf[DenseMatrix]
|
||||
val sAT = sA.transpose.asInstanceOf[SparseMatrix]
|
||||
val dATexpected =
|
||||
new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
|
||||
val sATexpected =
|
||||
new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
|
||||
|
||||
assert(dAT.toBreeze === dATexpected.toBreeze)
|
||||
assert(sAT.toBreeze === sATexpected.toBreeze)
|
||||
assert(dA(1, 0) === dAT(0, 1))
|
||||
assert(dA(2, 1) === dAT(1, 2))
|
||||
assert(sA(1, 0) === sAT(0, 1))
|
||||
assert(sA(2, 1) === sAT(1, 2))
|
||||
|
||||
assert(!dA.toArray.eq(dAT.toArray), "has to have a new array")
|
||||
assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array")
|
||||
|
||||
assert(dAT.toSparse.toBreeze === sATexpected.toBreeze)
|
||||
assert(sAT.toDense.toBreeze === dATexpected.toBreeze)
|
||||
}
|
||||
|
||||
test("foreachActive") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(0, 1, 1, 2)
|
||||
|
||||
val sp = new SparseMatrix(m, n, colPtrs, rowIndices, values)
|
||||
val dn = new DenseMatrix(m, n, allValues)
|
||||
|
||||
val dnMap = MutableMap[(Int, Int), Double]()
|
||||
dn.foreachActive { (i, j, value) =>
|
||||
dnMap.put((i, j), value)
|
||||
}
|
||||
assert(dnMap.size === 6)
|
||||
assert(dnMap(0, 0) === 1.0)
|
||||
assert(dnMap(1, 0) === 2.0)
|
||||
assert(dnMap(2, 0) === 0.0)
|
||||
assert(dnMap(0, 1) === 0.0)
|
||||
assert(dnMap(1, 1) === 4.0)
|
||||
assert(dnMap(2, 1) === 5.0)
|
||||
|
||||
val spMap = MutableMap[(Int, Int), Double]()
|
||||
sp.foreachActive { (i, j, value) =>
|
||||
spMap.put((i, j), value)
|
||||
}
|
||||
assert(spMap.size === 4)
|
||||
assert(spMap(0, 0) === 1.0)
|
||||
assert(spMap(1, 0) === 2.0)
|
||||
assert(spMap(1, 1) === 4.0)
|
||||
assert(spMap(2, 1) === 5.0)
|
||||
}
|
||||
|
||||
test("horzcat, vertcat, eye, speye") {
|
||||
val m = 3
|
||||
val n = 2
|
||||
val values = Array(1.0, 2.0, 4.0, 5.0)
|
||||
val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
|
||||
val colPtrs = Array(0, 2, 4)
|
||||
val rowIndices = Array(0, 1, 1, 2)
|
||||
// transposed versions
|
||||
val allValuesT = Array(1.0, 0.0, 2.0, 4.0, 0.0, 5.0)
|
||||
val colPtrsT = Array(0, 1, 3, 4)
|
||||
val rowIndicesT = Array(0, 0, 1, 1)
|
||||
|
||||
val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
|
||||
val deMat1 = new DenseMatrix(m, n, allValues)
|
||||
val spMat1T = new SparseMatrix(n, m, colPtrsT, rowIndicesT, values)
|
||||
val deMat1T = new DenseMatrix(n, m, allValuesT)
|
||||
|
||||
// should equal spMat1 & deMat1 respectively
|
||||
val spMat1TT = spMat1T.transpose
|
||||
val deMat1TT = deMat1T.transpose
|
||||
|
||||
val deMat2 = Matrices.eye(3)
|
||||
val spMat2 = Matrices.speye(3)
|
||||
val deMat3 = Matrices.eye(2)
|
||||
val spMat3 = Matrices.speye(2)
|
||||
|
||||
val spHorz = Matrices.horzcat(Array(spMat1, spMat2))
|
||||
val spHorz2 = Matrices.horzcat(Array(spMat1, deMat2))
|
||||
val spHorz3 = Matrices.horzcat(Array(deMat1, spMat2))
|
||||
val deHorz1 = Matrices.horzcat(Array(deMat1, deMat2))
|
||||
val deHorz2 = Matrices.horzcat(Array[Matrix]())
|
||||
|
||||
assert(deHorz1.numRows === 3)
|
||||
assert(spHorz2.numRows === 3)
|
||||
assert(spHorz3.numRows === 3)
|
||||
assert(spHorz.numRows === 3)
|
||||
assert(deHorz1.numCols === 5)
|
||||
assert(spHorz2.numCols === 5)
|
||||
assert(spHorz3.numCols === 5)
|
||||
assert(spHorz.numCols === 5)
|
||||
assert(deHorz2.numRows === 0)
|
||||
assert(deHorz2.numCols === 0)
|
||||
assert(deHorz2.toArray.length === 0)
|
||||
|
||||
assert(deHorz1 ~== spHorz2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
|
||||
assert(spHorz2 ~== spHorz3 absTol 1e-15)
|
||||
assert(spHorz(0, 0) === 1.0)
|
||||
assert(spHorz(2, 1) === 5.0)
|
||||
assert(spHorz(0, 2) === 1.0)
|
||||
assert(spHorz(1, 2) === 0.0)
|
||||
assert(spHorz(1, 3) === 1.0)
|
||||
assert(spHorz(2, 4) === 1.0)
|
||||
assert(spHorz(1, 4) === 0.0)
|
||||
assert(deHorz1(0, 0) === 1.0)
|
||||
assert(deHorz1(2, 1) === 5.0)
|
||||
assert(deHorz1(0, 2) === 1.0)
|
||||
assert(deHorz1(1, 2) == 0.0)
|
||||
assert(deHorz1(1, 3) === 1.0)
|
||||
assert(deHorz1(2, 4) === 1.0)
|
||||
assert(deHorz1(1, 4) === 0.0)
|
||||
|
||||
// containing transposed matrices
|
||||
val spHorzT = Matrices.horzcat(Array(spMat1TT, spMat2))
|
||||
val spHorz2T = Matrices.horzcat(Array(spMat1TT, deMat2))
|
||||
val spHorz3T = Matrices.horzcat(Array(deMat1TT, spMat2))
|
||||
val deHorz1T = Matrices.horzcat(Array(deMat1TT, deMat2))
|
||||
|
||||
assert(deHorz1T ~== deHorz1 absTol 1e-15)
|
||||
assert(spHorzT ~== spHorz absTol 1e-15)
|
||||
assert(spHorz2T ~== spHorz2 absTol 1e-15)
|
||||
assert(spHorz3T ~== spHorz3 absTol 1e-15)
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
Matrices.horzcat(Array(spMat1, spMat3))
|
||||
}
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
Matrices.horzcat(Array(deMat1, spMat3))
|
||||
}
|
||||
|
||||
val spVert = Matrices.vertcat(Array(spMat1, spMat3))
|
||||
val deVert1 = Matrices.vertcat(Array(deMat1, deMat3))
|
||||
val spVert2 = Matrices.vertcat(Array(spMat1, deMat3))
|
||||
val spVert3 = Matrices.vertcat(Array(deMat1, spMat3))
|
||||
val deVert2 = Matrices.vertcat(Array[Matrix]())
|
||||
|
||||
assert(deVert1.numRows === 5)
|
||||
assert(spVert2.numRows === 5)
|
||||
assert(spVert3.numRows === 5)
|
||||
assert(spVert.numRows === 5)
|
||||
assert(deVert1.numCols === 2)
|
||||
assert(spVert2.numCols === 2)
|
||||
assert(spVert3.numCols === 2)
|
||||
assert(spVert.numCols === 2)
|
||||
assert(deVert2.numRows === 0)
|
||||
assert(deVert2.numCols === 0)
|
||||
assert(deVert2.toArray.length === 0)
|
||||
|
||||
assert(deVert1 ~== spVert2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
|
||||
assert(spVert2 ~== spVert3 absTol 1e-15)
|
||||
assert(spVert(0, 0) === 1.0)
|
||||
assert(spVert(2, 1) === 5.0)
|
||||
assert(spVert(3, 0) === 1.0)
|
||||
assert(spVert(3, 1) === 0.0)
|
||||
assert(spVert(4, 1) === 1.0)
|
||||
assert(deVert1(0, 0) === 1.0)
|
||||
assert(deVert1(2, 1) === 5.0)
|
||||
assert(deVert1(3, 0) === 1.0)
|
||||
assert(deVert1(3, 1) === 0.0)
|
||||
assert(deVert1(4, 1) === 1.0)
|
||||
|
||||
// containing transposed matrices
|
||||
val spVertT = Matrices.vertcat(Array(spMat1TT, spMat3))
|
||||
val deVert1T = Matrices.vertcat(Array(deMat1TT, deMat3))
|
||||
val spVert2T = Matrices.vertcat(Array(spMat1TT, deMat3))
|
||||
val spVert3T = Matrices.vertcat(Array(deMat1TT, spMat3))
|
||||
|
||||
assert(deVert1T ~== deVert1 absTol 1e-15)
|
||||
assert(spVertT ~== spVert absTol 1e-15)
|
||||
assert(spVert2T ~== spVert2 absTol 1e-15)
|
||||
assert(spVert3T ~== spVert3 absTol 1e-15)
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
Matrices.vertcat(Array(spMat1, spMat2))
|
||||
}
|
||||
|
||||
intercept[IllegalArgumentException] {
|
||||
Matrices.vertcat(Array(deMat1, spMat2))
|
||||
}
|
||||
}
|
||||
|
||||
test("zeros") {
|
||||
val mat = Matrices.zeros(2, 3).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === 2)
|
||||
assert(mat.numCols === 3)
|
||||
assert(mat.values.forall(_ == 0.0))
|
||||
}
|
||||
|
||||
test("ones") {
|
||||
val mat = Matrices.ones(2, 3).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === 2)
|
||||
assert(mat.numCols === 3)
|
||||
assert(mat.values.forall(_ == 1.0))
|
||||
}
|
||||
|
||||
test("eye") {
|
||||
val mat = Matrices.eye(2).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numCols === 2)
|
||||
assert(mat.numCols === 2)
|
||||
assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 1.0))
|
||||
}
|
||||
|
||||
test("rand") {
|
||||
val rng = mock[Random]
|
||||
when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0)
|
||||
val mat = Matrices.rand(2, 2, rng).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === 2)
|
||||
assert(mat.numCols === 2)
|
||||
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
|
||||
}
|
||||
|
||||
test("randn") {
|
||||
val rng = mock[Random]
|
||||
when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
|
||||
val mat = Matrices.randn(2, 2, rng).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === 2)
|
||||
assert(mat.numCols === 2)
|
||||
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
|
||||
}
|
||||
|
||||
test("diag") {
|
||||
val mat = Matrices.diag(Vectors.dense(1.0, 2.0)).asInstanceOf[DenseMatrix]
|
||||
assert(mat.numRows === 2)
|
||||
assert(mat.numCols === 2)
|
||||
assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 2.0))
|
||||
}
|
||||
|
||||
test("sprand") {
|
||||
val rng = mock[Random]
|
||||
when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
|
||||
when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
|
||||
val mat = SparseMatrix.sprand(4, 4, 0.25, rng)
|
||||
assert(mat.numRows === 4)
|
||||
assert(mat.numCols === 4)
|
||||
assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
|
||||
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
|
||||
val mat2 = SparseMatrix.sprand(2, 3, 1.0, rng)
|
||||
assert(mat2.rowIndices.toSeq === Seq(0, 1, 0, 1, 0, 1))
|
||||
assert(mat2.colPtrs.toSeq === Seq(0, 2, 4, 6))
|
||||
}
|
||||
|
||||
test("sprandn") {
|
||||
val rng = mock[Random]
|
||||
when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
|
||||
when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
|
||||
val mat = SparseMatrix.sprandn(4, 4, 0.25, rng)
|
||||
assert(mat.numRows === 4)
|
||||
assert(mat.numCols === 4)
|
||||
assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
|
||||
assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
|
||||
}
|
||||
|
||||
test("toString") {
|
||||
val empty = Matrices.ones(0, 0)
|
||||
empty.toString(0, 0)
|
||||
|
||||
val mat = Matrices.rand(5, 10, new Random())
|
||||
mat.toString(-1, -5)
|
||||
mat.toString(0, 0)
|
||||
mat.toString(Int.MinValue, Int.MinValue)
|
||||
mat.toString(Int.MaxValue, Int.MaxValue)
|
||||
var lines = mat.toString(6, 50).lines.toArray
|
||||
assert(lines.size == 5 && lines.forall(_.size <= 50))
|
||||
|
||||
lines = mat.toString(5, 100).lines.toArray
|
||||
assert(lines.size == 5 && lines.forall(_.size <= 100))
|
||||
}
|
||||
|
||||
test("numNonzeros and numActives") {
|
||||
val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1))
|
||||
assert(dm1.numNonzeros === 3)
|
||||
assert(dm1.numActives === 6)
|
||||
|
||||
val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0))
|
||||
assert(sm1.numNonzeros === 1)
|
||||
assert(sm1.numActives === 3)
|
||||
}
|
||||
|
||||
test("fromBreeze with sparse matrix") {
|
||||
// colPtr.last does NOT always equal to values.length in breeze SCSMatrix and
|
||||
// invocation of compact() may be necessary. Refer to SPARK-11507
|
||||
val bm1: BM[Double] = new CSCMatrix[Double](
|
||||
Array(1.0, 1, 1), 3, 3, Array(0, 1, 2, 3), Array(0, 1, 2))
|
||||
val bm2: BM[Double] = new CSCMatrix[Double](
|
||||
Array(1.0, 2, 2, 4), 3, 3, Array(0, 0, 2, 4), Array(1, 2, 1, 2))
|
||||
val sum = bm1 + bm2
|
||||
Matrices.fromBreeze(sum)
|
||||
}
|
||||
|
||||
test("row/col iterator") {
|
||||
val dm = new DenseMatrix(3, 2, Array(0, 1, 2, 3, 4, 0))
|
||||
val sm = dm.toSparse
|
||||
val rows = Seq(Vectors.dense(0, 3), Vectors.dense(1, 4), Vectors.dense(2, 0))
|
||||
val cols = Seq(Vectors.dense(0, 1, 2), Vectors.dense(3, 4, 0))
|
||||
for (m <- Seq(dm, sm)) {
|
||||
assert(m.rowIter.toSeq === rows)
|
||||
assert(m.colIter.toSeq === cols)
|
||||
assert(m.transpose.rowIter.toSeq === cols)
|
||||
assert(m.transpose.colIter.toSeq === rows)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,358 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.linalg
|
||||
|
||||
import scala.util.Random
|
||||
|
||||
import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
|
||||
import org.json4s.jackson.JsonMethods.{parse => parseJson}
|
||||
|
||||
import org.apache.spark.ml.SparkMLFunSuite
|
||||
import org.apache.spark.ml.util.TestingUtils._
|
||||
|
||||
class VectorsSuite extends SparkMLFunSuite {
|
||||
|
||||
val arr = Array(0.1, 0.0, 0.3, 0.4)
|
||||
val n = 4
|
||||
val indices = Array(0, 2, 3)
|
||||
val values = Array(0.1, 0.3, 0.4)
|
||||
|
||||
test("dense vector construction with varargs") {
|
||||
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
|
||||
assert(vec.size === arr.length)
|
||||
assert(vec.values.eq(arr))
|
||||
}
|
||||
|
||||
test("dense vector construction from a double array") {
|
||||
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
|
||||
assert(vec.size === arr.length)
|
||||
assert(vec.values.eq(arr))
|
||||
}
|
||||
|
||||
test("sparse vector construction") {
|
||||
val vec = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
|
||||
assert(vec.size === n)
|
||||
assert(vec.indices.eq(indices))
|
||||
assert(vec.values.eq(values))
|
||||
}
|
||||
|
||||
test("sparse vector construction with unordered elements") {
|
||||
val vec = Vectors.sparse(n, indices.zip(values).reverse).asInstanceOf[SparseVector]
|
||||
assert(vec.size === n)
|
||||
assert(vec.indices === indices)
|
||||
assert(vec.values === values)
|
||||
}
|
||||
|
||||
test("sparse vector construction with mismatched indices/values array") {
|
||||
intercept[IllegalArgumentException] {
|
||||
Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0, 7.0, 9.0))
|
||||
}
|
||||
intercept[IllegalArgumentException] {
|
||||
Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0))
|
||||
}
|
||||
}
|
||||
|
||||
test("sparse vector construction with too many indices vs size") {
|
||||
intercept[IllegalArgumentException] {
|
||||
Vectors.sparse(3, Array(1, 2, 3, 4), Array(3.0, 5.0, 7.0, 9.0))
|
||||
}
|
||||
}
|
||||
|
||||
test("dense to array") {
|
||||
val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
|
||||
assert(vec.toArray.eq(arr))
|
||||
}
|
||||
|
||||
test("dense argmax") {
|
||||
val vec = Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector]
|
||||
assert(vec.argmax === -1)
|
||||
|
||||
val vec2 = Vectors.dense(arr).asInstanceOf[DenseVector]
|
||||
assert(vec2.argmax === 3)
|
||||
|
||||
val vec3 = Vectors.dense(Array(-1.0, 0.0, -2.0, 1.0)).asInstanceOf[DenseVector]
|
||||
assert(vec3.argmax === 3)
|
||||
}
|
||||
|
||||
test("sparse to array") {
|
||||
val vec = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
|
||||
assert(vec.toArray === arr)
|
||||
}
|
||||
|
||||
test("sparse argmax") {
|
||||
val vec = Vectors.sparse(0, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
|
||||
assert(vec.argmax === -1)
|
||||
|
||||
val vec2 = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
|
||||
assert(vec2.argmax === 3)
|
||||
|
||||
val vec3 = Vectors.sparse(5, Array(2, 3, 4), Array(1.0, 0.0, -.7))
|
||||
assert(vec3.argmax === 2)
|
||||
|
||||
// check for case that sparse vector is created with
|
||||
// only negative values {0.0, 0.0,-1.0, -0.7, 0.0}
|
||||
val vec4 = Vectors.sparse(5, Array(2, 3), Array(-1.0, -.7))
|
||||
assert(vec4.argmax === 0)
|
||||
|
||||
val vec5 = Vectors.sparse(11, Array(0, 3, 10), Array(-1.0, -.7, 0.0))
|
||||
assert(vec5.argmax === 1)
|
||||
|
||||
val vec6 = Vectors.sparse(11, Array(0, 1, 2), Array(-1.0, -.7, 0.0))
|
||||
assert(vec6.argmax === 2)
|
||||
|
||||
val vec7 = Vectors.sparse(5, Array(0, 1, 3), Array(-1.0, 0.0, -.7))
|
||||
assert(vec7.argmax === 1)
|
||||
|
||||
val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
|
||||
assert(vec8.argmax === 0)
|
||||
}
|
||||
|
||||
test("vector equals") {
|
||||
val dv1 = Vectors.dense(arr.clone())
|
||||
val dv2 = Vectors.dense(arr.clone())
|
||||
val sv1 = Vectors.sparse(n, indices.clone(), values.clone())
|
||||
val sv2 = Vectors.sparse(n, indices.clone(), values.clone())
|
||||
|
||||
val vectors = Seq(dv1, dv2, sv1, sv2)
|
||||
|
||||
for (v <- vectors; u <- vectors) {
|
||||
assert(v === u)
|
||||
assert(v.## === u.##)
|
||||
}
|
||||
|
||||
val another = Vectors.dense(0.1, 0.2, 0.3, 0.4)
|
||||
|
||||
for (v <- vectors) {
|
||||
assert(v != another)
|
||||
assert(v.## != another.##)
|
||||
}
|
||||
}
|
||||
|
||||
test("vectors equals with explicit 0") {
|
||||
val dv1 = Vectors.dense(Array(0, 0.9, 0, 0.8, 0))
|
||||
val sv1 = Vectors.sparse(5, Array(1, 3), Array(0.9, 0.8))
|
||||
val sv2 = Vectors.sparse(5, Array(0, 1, 2, 3, 4), Array(0, 0.9, 0, 0.8, 0))
|
||||
|
||||
val vectors = Seq(dv1, sv1, sv2)
|
||||
for (v <- vectors; u <- vectors) {
|
||||
assert(v === u)
|
||||
assert(v.## === u.##)
|
||||
}
|
||||
|
||||
val another = Vectors.sparse(5, Array(0, 1, 3), Array(0, 0.9, 0.2))
|
||||
for (v <- vectors) {
|
||||
assert(v != another)
|
||||
assert(v.## != another.##)
|
||||
}
|
||||
}
|
||||
|
||||
test("indexing dense vectors") {
|
||||
val vec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
|
||||
assert(vec(0) === 1.0)
|
||||
assert(vec(3) === 4.0)
|
||||
}
|
||||
|
||||
test("indexing sparse vectors") {
|
||||
val vec = Vectors.sparse(7, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
|
||||
assert(vec(0) === 1.0)
|
||||
assert(vec(1) === 0.0)
|
||||
assert(vec(2) === 2.0)
|
||||
assert(vec(3) === 0.0)
|
||||
assert(vec(6) === 4.0)
|
||||
val vec2 = Vectors.sparse(8, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
|
||||
assert(vec2(6) === 4.0)
|
||||
assert(vec2(7) === 0.0)
|
||||
}
|
||||
|
||||
test("zeros") {
|
||||
assert(Vectors.zeros(3) === Vectors.dense(0.0, 0.0, 0.0))
|
||||
}
|
||||
|
||||
test("Vector.copy") {
|
||||
val sv = Vectors.sparse(4, Array(0, 2), Array(1.0, 2.0))
|
||||
val svCopy = sv.copy
|
||||
(sv, svCopy) match {
|
||||
case (sv: SparseVector, svCopy: SparseVector) =>
|
||||
assert(sv.size === svCopy.size)
|
||||
assert(sv.indices === svCopy.indices)
|
||||
assert(sv.values === svCopy.values)
|
||||
assert(!sv.indices.eq(svCopy.indices))
|
||||
assert(!sv.values.eq(svCopy.values))
|
||||
case _ =>
|
||||
throw new RuntimeException(s"copy returned ${svCopy.getClass} on ${sv.getClass}.")
|
||||
}
|
||||
|
||||
val dv = Vectors.dense(1.0, 0.0, 2.0)
|
||||
val dvCopy = dv.copy
|
||||
(dv, dvCopy) match {
|
||||
case (dv: DenseVector, dvCopy: DenseVector) =>
|
||||
assert(dv.size === dvCopy.size)
|
||||
assert(dv.values === dvCopy.values)
|
||||
assert(!dv.values.eq(dvCopy.values))
|
||||
case _ =>
|
||||
throw new RuntimeException(s"copy returned ${dvCopy.getClass} on ${dv.getClass}.")
|
||||
}
|
||||
}
|
||||
|
||||
test("fromBreeze") {
|
||||
val x = BDM.zeros[Double](10, 10)
|
||||
val v = Vectors.fromBreeze(x(::, 0))
|
||||
assert(v.size === x.rows)
|
||||
}
|
||||
|
||||
test("sqdist") {
|
||||
val random = new Random()
|
||||
for (m <- 1 until 1000 by 100) {
|
||||
val nnz = random.nextInt(m)
|
||||
|
||||
val indices1 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
|
||||
val values1 = Array.fill(nnz)(random.nextDouble)
|
||||
val sparseVector1 = Vectors.sparse(m, indices1, values1)
|
||||
|
||||
val indices2 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
|
||||
val values2 = Array.fill(nnz)(random.nextDouble)
|
||||
val sparseVector2 = Vectors.sparse(m, indices2, values2)
|
||||
|
||||
val denseVector1 = Vectors.dense(sparseVector1.toArray)
|
||||
val denseVector2 = Vectors.dense(sparseVector2.toArray)
|
||||
|
||||
val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze)
|
||||
|
||||
// SparseVector vs. SparseVector
|
||||
assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
|
||||
// DenseVector vs. SparseVector
|
||||
assert(Vectors.sqdist(denseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
|
||||
// DenseVector vs. DenseVector
|
||||
assert(Vectors.sqdist(denseVector1, denseVector2) ~== squaredDist relTol 1E-8)
|
||||
}
|
||||
}
|
||||
|
||||
test("foreachActive") {
|
||||
val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
|
||||
val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
|
||||
|
||||
val dvMap = scala.collection.mutable.Map[Int, Double]()
|
||||
dv.foreachActive { (index, value) =>
|
||||
dvMap.put(index, value)
|
||||
}
|
||||
assert(dvMap.size === 4)
|
||||
assert(dvMap.get(0) === Some(0.0))
|
||||
assert(dvMap.get(1) === Some(1.2))
|
||||
assert(dvMap.get(2) === Some(3.1))
|
||||
assert(dvMap.get(3) === Some(0.0))
|
||||
|
||||
val svMap = scala.collection.mutable.Map[Int, Double]()
|
||||
sv.foreachActive { (index, value) =>
|
||||
svMap.put(index, value)
|
||||
}
|
||||
assert(svMap.size === 3)
|
||||
assert(svMap.get(1) === Some(1.2))
|
||||
assert(svMap.get(2) === Some(3.1))
|
||||
assert(svMap.get(3) === Some(0.0))
|
||||
}
|
||||
|
||||
test("vector p-norm") {
|
||||
val dv = Vectors.dense(0.0, -1.2, 3.1, 0.0, -4.5, 1.9)
|
||||
val sv = Vectors.sparse(6, Seq((1, -1.2), (2, 3.1), (3, 0.0), (4, -4.5), (5, 1.9)))
|
||||
|
||||
assert(Vectors.norm(dv, 1.0) ~== dv.toArray.foldLeft(0.0)((a, v) =>
|
||||
a + math.abs(v)) relTol 1E-8)
|
||||
assert(Vectors.norm(sv, 1.0) ~== sv.toArray.foldLeft(0.0)((a, v) =>
|
||||
a + math.abs(v)) relTol 1E-8)
|
||||
|
||||
assert(Vectors.norm(dv, 2.0) ~== math.sqrt(dv.toArray.foldLeft(0.0)((a, v) =>
|
||||
a + v * v)) relTol 1E-8)
|
||||
assert(Vectors.norm(sv, 2.0) ~== math.sqrt(sv.toArray.foldLeft(0.0)((a, v) =>
|
||||
a + v * v)) relTol 1E-8)
|
||||
|
||||
assert(Vectors.norm(dv, Double.PositiveInfinity) ~== dv.toArray.map(math.abs).max relTol 1E-8)
|
||||
assert(Vectors.norm(sv, Double.PositiveInfinity) ~== sv.toArray.map(math.abs).max relTol 1E-8)
|
||||
|
||||
assert(Vectors.norm(dv, 3.7) ~== math.pow(dv.toArray.foldLeft(0.0)((a, v) =>
|
||||
a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
|
||||
assert(Vectors.norm(sv, 3.7) ~== math.pow(sv.toArray.foldLeft(0.0)((a, v) =>
|
||||
a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
|
||||
}
|
||||
|
||||
test("Vector numActive and numNonzeros") {
|
||||
val dv = Vectors.dense(0.0, 2.0, 3.0, 0.0)
|
||||
assert(dv.numActives === 4)
|
||||
assert(dv.numNonzeros === 2)
|
||||
|
||||
val sv = Vectors.sparse(4, Array(0, 1, 2), Array(0.0, 2.0, 3.0))
|
||||
assert(sv.numActives === 3)
|
||||
assert(sv.numNonzeros === 2)
|
||||
}
|
||||
|
||||
test("Vector toSparse and toDense") {
|
||||
val dv0 = Vectors.dense(0.0, 2.0, 3.0, 0.0)
|
||||
assert(dv0.toDense === dv0)
|
||||
val dv0s = dv0.toSparse
|
||||
assert(dv0s.numActives === 2)
|
||||
assert(dv0s === dv0)
|
||||
|
||||
val sv0 = Vectors.sparse(4, Array(0, 1, 2), Array(0.0, 2.0, 3.0))
|
||||
assert(sv0.toDense === sv0)
|
||||
val sv0s = sv0.toSparse
|
||||
assert(sv0s.numActives === 2)
|
||||
assert(sv0s === sv0)
|
||||
}
|
||||
|
||||
test("Vector.compressed") {
|
||||
val dv0 = Vectors.dense(1.0, 2.0, 3.0, 0.0)
|
||||
val dv0c = dv0.compressed.asInstanceOf[DenseVector]
|
||||
assert(dv0c === dv0)
|
||||
|
||||
val dv1 = Vectors.dense(0.0, 2.0, 0.0, 0.0)
|
||||
val dv1c = dv1.compressed.asInstanceOf[SparseVector]
|
||||
assert(dv1 === dv1c)
|
||||
assert(dv1c.numActives === 1)
|
||||
|
||||
val sv0 = Vectors.sparse(4, Array(1, 2), Array(2.0, 0.0))
|
||||
val sv0c = sv0.compressed.asInstanceOf[SparseVector]
|
||||
assert(sv0 === sv0c)
|
||||
assert(sv0c.numActives === 1)
|
||||
|
||||
val sv1 = Vectors.sparse(4, Array(0, 1, 2), Array(1.0, 2.0, 3.0))
|
||||
val sv1c = sv1.compressed.asInstanceOf[DenseVector]
|
||||
assert(sv1 === sv1c)
|
||||
}
|
||||
|
||||
test("SparseVector.slice") {
|
||||
val v = new SparseVector(5, Array(1, 2, 4), Array(1.1, 2.2, 4.4))
|
||||
assert(v.slice(Array(0, 2)) === new SparseVector(2, Array(1), Array(2.2)))
|
||||
assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2)))
|
||||
assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4)))
|
||||
}
|
||||
|
||||
test("toJson/fromJson") {
|
||||
val sv0 = Vectors.sparse(0, Array.empty, Array.empty)
|
||||
val sv1 = Vectors.sparse(1, Array.empty, Array.empty)
|
||||
val sv2 = Vectors.sparse(2, Array(1), Array(2.0))
|
||||
val dv0 = Vectors.dense(Array.empty[Double])
|
||||
val dv1 = Vectors.dense(1.0)
|
||||
val dv2 = Vectors.dense(0.0, 2.0)
|
||||
for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) {
|
||||
val json = v.toJson
|
||||
parseJson(json) // `json` should be a valid JSON string
|
||||
val u = Vectors.fromJson(json)
|
||||
assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.")
|
||||
assert(u === v, "toJson/fromJson should preserve vector values.")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.util
|
||||
|
||||
import org.scalatest.exceptions.TestFailedException
|
||||
|
||||
import org.apache.spark.ml.linalg.{Matrix, Vector}
|
||||
|
||||
object TestingUtils {
|
||||
|
||||
val ABS_TOL_MSG = " using absolute tolerance"
|
||||
val REL_TOL_MSG = " using relative tolerance"
|
||||
|
||||
/**
|
||||
* Private helper function for comparing two values using relative tolerance.
|
||||
* Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue,
|
||||
* the relative tolerance is meaningless, so the exception will be raised to warn users.
|
||||
*/
|
||||
private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
|
||||
val absX = math.abs(x)
|
||||
val absY = math.abs(y)
|
||||
val diff = math.abs(x - y)
|
||||
if (x == y) {
|
||||
true
|
||||
} else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) {
|
||||
throw new TestFailedException(
|
||||
s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0)
|
||||
} else {
|
||||
diff < eps * math.min(absX, absY)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Private helper function for comparing two values using absolute tolerance.
|
||||
*/
|
||||
private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
|
||||
math.abs(x - y) < eps
|
||||
}
|
||||
|
||||
case class CompareDoubleRightSide(
|
||||
fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String)
|
||||
|
||||
/**
|
||||
* Implicit class for comparing two double values using relative tolerance or absolute tolerance.
|
||||
*/
|
||||
implicit class DoubleWithAlmostEquals(val x: Double) {
|
||||
|
||||
/**
|
||||
* When the difference of two values are within eps, returns true; otherwise, returns false.
|
||||
*/
|
||||
def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps)
|
||||
|
||||
/**
|
||||
* When the difference of two values are within eps, returns false; otherwise, returns true.
|
||||
*/
|
||||
def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps)
|
||||
|
||||
/**
|
||||
* Throws exception when the difference of two values are NOT within eps;
|
||||
* otherwise, returns true.
|
||||
*/
|
||||
def ~==(r: CompareDoubleRightSide): Boolean = {
|
||||
if (!r.fun(x, r.y, r.eps)) {
|
||||
throw new TestFailedException(
|
||||
s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws exception when the difference of two values are within eps; otherwise, returns true.
|
||||
*/
|
||||
def !~==(r: CompareDoubleRightSide): Boolean = {
|
||||
if (r.fun(x, r.y, r.eps)) {
|
||||
throw new TestFailedException(
|
||||
s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparison using absolute tolerance.
|
||||
*/
|
||||
def absTol(eps: Double): CompareDoubleRightSide =
|
||||
CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG)
|
||||
|
||||
/**
|
||||
* Comparison using relative tolerance.
|
||||
*/
|
||||
def relTol(eps: Double): CompareDoubleRightSide =
|
||||
CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG)
|
||||
|
||||
override def toString: String = x.toString
|
||||
}
|
||||
|
||||
case class CompareVectorRightSide(
|
||||
fun: (Vector, Vector, Double) => Boolean, y: Vector, eps: Double, method: String)
|
||||
|
||||
/**
|
||||
* Implicit class for comparing two vectors using relative tolerance or absolute tolerance.
|
||||
*/
|
||||
implicit class VectorWithAlmostEquals(val x: Vector) {
|
||||
|
||||
/**
|
||||
* When the difference of two vectors are within eps, returns true; otherwise, returns false.
|
||||
*/
|
||||
def ~=(r: CompareVectorRightSide): Boolean = r.fun(x, r.y, r.eps)
|
||||
|
||||
/**
|
||||
* When the difference of two vectors are within eps, returns false; otherwise, returns true.
|
||||
*/
|
||||
def !~=(r: CompareVectorRightSide): Boolean = !r.fun(x, r.y, r.eps)
|
||||
|
||||
/**
|
||||
* Throws exception when the difference of two vectors are NOT within eps;
|
||||
* otherwise, returns true.
|
||||
*/
|
||||
def ~==(r: CompareVectorRightSide): Boolean = {
|
||||
if (!r.fun(x, r.y, r.eps)) {
|
||||
throw new TestFailedException(
|
||||
s"Expected $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws exception when the difference of two vectors are within eps; otherwise, returns true.
|
||||
*/
|
||||
def !~==(r: CompareVectorRightSide): Boolean = {
|
||||
if (r.fun(x, r.y, r.eps)) {
|
||||
throw new TestFailedException(
|
||||
s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparison using absolute tolerance.
|
||||
*/
|
||||
def absTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
|
||||
(x: Vector, y: Vector, eps: Double) => {
|
||||
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
|
||||
}, x, eps, ABS_TOL_MSG)
|
||||
|
||||
/**
|
||||
* Comparison using relative tolerance. Note that comparing against sparse vector
|
||||
* with elements having value of zero will raise exception because it involves with
|
||||
* comparing against zero.
|
||||
*/
|
||||
def relTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
|
||||
(x: Vector, y: Vector, eps: Double) => {
|
||||
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
|
||||
}, x, eps, REL_TOL_MSG)
|
||||
|
||||
override def toString: String = x.toString
|
||||
}
|
||||
|
||||
case class CompareMatrixRightSide(
|
||||
fun: (Matrix, Matrix, Double) => Boolean, y: Matrix, eps: Double, method: String)
|
||||
|
||||
/**
|
||||
* Implicit class for comparing two matrices using relative tolerance or absolute tolerance.
|
||||
*/
|
||||
implicit class MatrixWithAlmostEquals(val x: Matrix) {
|
||||
|
||||
/**
|
||||
* When the difference of two matrices are within eps, returns true; otherwise, returns false.
|
||||
*/
|
||||
def ~=(r: CompareMatrixRightSide): Boolean = r.fun(x, r.y, r.eps)
|
||||
|
||||
/**
|
||||
* When the difference of two matrices are within eps, returns false; otherwise, returns true.
|
||||
*/
|
||||
def !~=(r: CompareMatrixRightSide): Boolean = !r.fun(x, r.y, r.eps)
|
||||
|
||||
/**
|
||||
* Throws exception when the difference of two matrices are NOT within eps;
|
||||
* otherwise, returns true.
|
||||
*/
|
||||
def ~==(r: CompareMatrixRightSide): Boolean = {
|
||||
if (!r.fun(x, r.y, r.eps)) {
|
||||
throw new TestFailedException(
|
||||
s"Expected \n$x\n and \n${r.y}\n to be within ${r.eps}${r.method} for all elements.", 0)
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws exception when the difference of two matrices are within eps; otherwise, returns true.
|
||||
*/
|
||||
def !~==(r: CompareMatrixRightSide): Boolean = {
|
||||
if (r.fun(x, r.y, r.eps)) {
|
||||
throw new TestFailedException(
|
||||
s"Did not expect \n$x\n and \n${r.y}\n to be within " +
|
||||
"${r.eps}${r.method} for all elements.", 0)
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparison using absolute tolerance.
|
||||
*/
|
||||
def absTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
|
||||
(x: Matrix, y: Matrix, eps: Double) => {
|
||||
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
|
||||
}, x, eps, ABS_TOL_MSG)
|
||||
|
||||
/**
|
||||
* Comparison using relative tolerance. Note that comparing against sparse vector
|
||||
* with elements having value of zero will raise exception because it involves with
|
||||
* comparing against zero.
|
||||
*/
|
||||
def relTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
|
||||
(x: Matrix, y: Matrix, eps: Double) => {
|
||||
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
|
||||
}, x, eps, REL_TOL_MSG)
|
||||
|
||||
override def toString: String = x.toString
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.ml.util
|
||||
|
||||
import org.scalatest.exceptions.TestFailedException
|
||||
|
||||
import org.apache.spark.ml.SparkMLFunSuite
|
||||
import org.apache.spark.ml.linalg.Vectors
|
||||
import org.apache.spark.ml.util.TestingUtils._
|
||||
|
||||
class TestingUtilsSuite extends SparkMLFunSuite {
|
||||
|
||||
test("Comparing doubles using relative error.") {
|
||||
|
||||
assert(23.1 ~== 23.52 relTol 0.02)
|
||||
assert(23.1 ~== 22.74 relTol 0.02)
|
||||
assert(23.1 ~= 23.52 relTol 0.02)
|
||||
assert(23.1 ~= 22.74 relTol 0.02)
|
||||
assert(!(23.1 !~= 23.52 relTol 0.02))
|
||||
assert(!(23.1 !~= 22.74 relTol 0.02))
|
||||
|
||||
// Should throw exception with message when test fails.
|
||||
intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02)
|
||||
intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02)
|
||||
intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02)
|
||||
intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02)
|
||||
|
||||
assert(23.1 !~== 23.63 relTol 0.02)
|
||||
assert(23.1 !~== 22.34 relTol 0.02)
|
||||
assert(23.1 !~= 23.63 relTol 0.02)
|
||||
assert(23.1 !~= 22.34 relTol 0.02)
|
||||
assert(!(23.1 ~= 23.63 relTol 0.02))
|
||||
assert(!(23.1 ~= 22.34 relTol 0.02))
|
||||
|
||||
// Comparing against zero should fail the test and throw exception with message
|
||||
// saying that the relative error is meaningless in this situation.
|
||||
intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032)
|
||||
intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032)
|
||||
intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032)
|
||||
intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032)
|
||||
intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032)
|
||||
intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032)
|
||||
intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032)
|
||||
intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032)
|
||||
|
||||
// Comparisons of numbers very close to zero.
|
||||
assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01)
|
||||
assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01)
|
||||
|
||||
assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012)
|
||||
assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012)
|
||||
}
|
||||
|
||||
test("Comparing doubles using absolute error.") {
|
||||
|
||||
assert(17.8 ~== 17.99 absTol 0.2)
|
||||
assert(17.8 ~== 17.61 absTol 0.2)
|
||||
assert(17.8 ~= 17.99 absTol 0.2)
|
||||
assert(17.8 ~= 17.61 absTol 0.2)
|
||||
assert(!(17.8 !~= 17.99 absTol 0.2))
|
||||
assert(!(17.8 !~= 17.61 absTol 0.2))
|
||||
|
||||
// Should throw exception with message when test fails.
|
||||
intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2)
|
||||
intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2)
|
||||
intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2)
|
||||
intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2)
|
||||
|
||||
assert(17.8 !~== 18.01 absTol 0.2)
|
||||
assert(17.8 !~== 17.59 absTol 0.2)
|
||||
assert(17.8 !~= 18.01 absTol 0.2)
|
||||
assert(17.8 !~= 17.59 absTol 0.2)
|
||||
assert(!(17.8 ~= 18.01 absTol 0.2))
|
||||
assert(!(17.8 ~= 17.59 absTol 0.2))
|
||||
|
||||
// Comparisons of numbers very close to zero, and both side of zeros
|
||||
assert(
|
||||
Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
|
||||
assert(
|
||||
Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
|
||||
|
||||
assert(
|
||||
-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
|
||||
assert(
|
||||
Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
|
||||
}
|
||||
|
||||
test("Comparing vectors using relative error.") {
|
||||
|
||||
// Comparisons of two dense vectors
|
||||
assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
|
||||
assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
|
||||
assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
|
||||
assert(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
|
||||
assert(!(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01))
|
||||
assert(!(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01))
|
||||
|
||||
// Should throw exception with message when test fails.
|
||||
intercept[TestFailedException](
|
||||
Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
|
||||
|
||||
intercept[TestFailedException](
|
||||
Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
|
||||
|
||||
// Comparing against zero should fail the test and throw exception with message
|
||||
// saying that the relative error is meaningless in this situation.
|
||||
intercept[TestFailedException](
|
||||
Vectors.dense(Array(3.1, 0.01)) ~== Vectors.dense(Array(3.13, 0.0)) relTol 0.01)
|
||||
|
||||
intercept[TestFailedException](
|
||||
Vectors.dense(Array(3.1, 0.01)) ~== Vectors.sparse(2, Array(0), Array(3.13)) relTol 0.01)
|
||||
|
||||
// Comparisons of two sparse vectors
|
||||
assert(Vectors.dense(Array(3.1, 3.5)) ~==
|
||||
Vectors.sparse(2, Array(0, 1), Array(3.130, 3.534)) relTol 0.01)
|
||||
|
||||
assert(Vectors.dense(Array(3.1, 3.5)) !~==
|
||||
Vectors.sparse(2, Array(0, 1), Array(3.135, 3.534)) relTol 0.01)
|
||||
}
|
||||
|
||||
test("Comparing vectors using absolute error.") {
|
||||
|
||||
// Comparisons of two dense vectors
|
||||
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
|
||||
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
|
||||
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
|
||||
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
|
||||
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
|
||||
|
||||
assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
|
||||
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6))
|
||||
|
||||
assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
|
||||
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6))
|
||||
|
||||
// Should throw exception with message when test fails.
|
||||
intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
|
||||
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
|
||||
|
||||
intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
|
||||
Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
|
||||
|
||||
// Comparisons of two sparse vectors
|
||||
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
|
||||
Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) ~==
|
||||
Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
|
||||
Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) !~==
|
||||
Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
|
||||
|
||||
// Comparisons of a dense vector and a sparse vector
|
||||
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
|
||||
Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) ~==
|
||||
Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
|
||||
|
||||
assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
|
||||
Vectors.dense(Array(3.1, 1E-3, 2.4)) absTol 1E-6)
|
||||
}
|
||||
}
|
|
@ -182,8 +182,8 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
|
|||
for (folds <- 2 to 10) {
|
||||
for (seed <- 1 to 5) {
|
||||
val foldedRdds = kFold(data, folds, seed)
|
||||
assert(foldedRdds.size === folds)
|
||||
foldedRdds.map { case (training, validation) =>
|
||||
assert(foldedRdds.length === folds)
|
||||
foldedRdds.foreach { case (training, validation) =>
|
||||
val result = validation.union(training).collect().sorted
|
||||
val validationSize = validation.collect().size.toFloat
|
||||
assert(validationSize > 0, "empty validation data")
|
||||
|
|
Loading…
Reference in a new issue