[SPARK-10491] [MLLIB] move RowMatrix.dspr to BLAS

jira: https://issues.apache.org/jira/browse/SPARK-10491

We implemented dspr with sparse vector support in `RowMatrix`. This method is also used in WeightedLeastSquares and other places. It would be useful to move it to `linalg.BLAS`.

Let me know if new UT needed.

Author: Yuhao Yang <hhbyyh@gmail.com>

Closes #8663 from hhbyyh/movedspr.
This commit is contained in:
Yuhao Yang 2015-09-15 09:58:49 -07:00 committed by Xiangrui Meng
parent 09b7e7c198
commit c35fdcb7e9
4 changed files with 72 additions and 41 deletions

View file

@ -88,7 +88,7 @@ private[ml] class WeightedLeastSquares(
if (fitIntercept) { if (fitIntercept) {
// shift centers // shift centers
// A^T A - aBar aBar^T // A^T A - aBar aBar^T
RowMatrix.dspr(-1.0, aBar, aaValues) BLAS.spr(-1.0, aBar, aaValues)
// A^T b - bBar aBar // A^T b - bBar aBar
BLAS.axpy(-bBar, aBar, abBar) BLAS.axpy(-bBar, aBar, abBar)
} }
@ -203,7 +203,7 @@ private[ml] object WeightedLeastSquares {
bbSum += w * b * b bbSum += w * b * b
BLAS.axpy(w, a, aSum) BLAS.axpy(w, a, aSum)
BLAS.axpy(w * b, a, abSum) BLAS.axpy(w * b, a, abSum)
RowMatrix.dspr(w, a, aaSum.values) BLAS.spr(w, a, aaSum)
this this
} }

View file

@ -236,6 +236,50 @@ private[spark] object BLAS extends Serializable with Logging {
_nativeBLAS _nativeBLAS
} }
/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
*
* @param U the upper triangular part of the matrix in a [[DenseVector]](column major)
*/
def spr(alpha: Double, v: Vector, U: DenseVector): Unit = {
spr(alpha, v, U.values)
}
/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
*
* @param U the upper triangular part of the matrix packed in an array (column major)
*/
def spr(alpha: Double, v: Vector, U: Array[Double]): Unit = {
val n = v.size
v match {
case DenseVector(values) =>
NativeBLAS.dspr("U", n, alpha, values, 1, U)
case SparseVector(size, indices, values) =>
val nnz = indices.length
var colStartIdx = 0
var prevCol = 0
var col = 0
var j = 0
var i = 0
var av = 0.0
while (j < nnz) {
col = indices(j)
// Skip empty columns.
colStartIdx += (col - prevCol) * (col + prevCol + 1) / 2
col = indices(j)
av = alpha * values(j)
i = 0
while (i <= j) {
U(colStartIdx + indices(i)) += av * values(i)
i += 1
}
j += 1
prevCol = col
}
}
}
/** /**
* A := alpha * x * x^T^ + A * A := alpha * x * x^T^ + A
* @param alpha a real scalar that will be multiplied to x * x^T^. * @param alpha a real scalar that will be multiplied to x * x^T^.

View file

@ -24,7 +24,6 @@ import scala.collection.mutable.ListBuffer
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy, import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy,
svd => brzSvd, MatrixSingularException, inv} svd => brzSvd, MatrixSingularException, inv}
import breeze.numerics.{sqrt => brzSqrt} import breeze.numerics.{sqrt => brzSqrt}
import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging import org.apache.spark.Logging
import org.apache.spark.SparkContext._ import org.apache.spark.SparkContext._
@ -123,7 +122,7 @@ class RowMatrix @Since("1.0.0") (
// Compute the upper triangular part of the gram matrix. // Compute the upper triangular part of the gram matrix.
val GU = rows.treeAggregate(new BDV[Double](new Array[Double](nt)))( val GU = rows.treeAggregate(new BDV[Double](new Array[Double](nt)))(
seqOp = (U, v) => { seqOp = (U, v) => {
RowMatrix.dspr(1.0, v, U.data) BLAS.spr(1.0, v, U.data)
U U
}, combOp = (U1, U2) => U1 += U2) }, combOp = (U1, U2) => U1 += U2)
@ -673,43 +672,6 @@ class RowMatrix @Since("1.0.0") (
@Experimental @Experimental
object RowMatrix { object RowMatrix {
/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's DSPR.
*
* @param U the upper triangular part of the matrix packed in an array (column major)
*/
// TODO: SPARK-10491 - move this method to linalg.BLAS
private[spark] def dspr(alpha: Double, v: Vector, U: Array[Double]): Unit = {
// TODO: Find a better home (breeze?) for this method.
val n = v.size
v match {
case DenseVector(values) =>
blas.dspr("U", n, alpha, values, 1, U)
case SparseVector(size, indices, values) =>
val nnz = indices.length
var colStartIdx = 0
var prevCol = 0
var col = 0
var j = 0
var i = 0
var av = 0.0
while (j < nnz) {
col = indices(j)
// Skip empty columns.
colStartIdx += (col - prevCol) * (col + prevCol + 1) / 2
col = indices(j)
av = alpha * values(j)
i = 0
while (i <= j) {
U(colStartIdx + indices(i)) += av * values(i)
i += 1
}
j += 1
prevCol = col
}
}
}
/** /**
* Fills a full square matrix from its upper triangular part. * Fills a full square matrix from its upper triangular part.
*/ */

View file

@ -126,6 +126,31 @@ class BLASSuite extends SparkFunSuite {
} }
} }
test("spr") {
// test dense vector
val alpha = 0.1
val x = new DenseVector(Array(1.0, 2, 2.1, 4))
val U = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
val expected = new DenseVector(Array(1.1, 2.2, 2.4, 3.21, 3.42, 3.441, 4.4, 4.8, 4.84, 5.6))
spr(alpha, x, U)
assert(U ~== expected absTol 1e-9)
val matrix33 = new DenseVector(Array(1.0, 2, 3, 4, 5))
withClue("Size of vector must match the rank of matrix") {
intercept[Exception] {
spr(alpha, x, matrix33)
}
}
// test sparse vector
val sv = new SparseVector(4, Array(0, 3), Array(1.0, 2))
val U2 = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
spr(0.1, sv, U2)
val expectedSparse = new DenseVector(Array(1.1, 2.0, 2.0, 3.0, 3.0, 3.0, 4.2, 4.0, 4.0, 4.4))
assert(U2 ~== expectedSparse absTol 1e-15)
}
test("syr") { test("syr") {
val dA = new DenseMatrix(4, 4, val dA = new DenseMatrix(4, 4,
Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8)) Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))