[MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix

* Transpose is missing from CoordinateMatrix (this is cheap to compute, so it should be there)
* IndexedRowMatrix should be convertable to CoordinateMatrix (conversion added)

Tests for both added.

Author: Reza Zadeh <reza@databricks.com>

Closes #4089 from rezazadeh/matutils and squashes the following commits:

ec5238b [Reza Zadeh] Array -> Iterator to avoid temp array
3ce0b5d [Reza Zadeh] Array -> Iterator
bbc907a [Reza Zadeh] Use 'i' for index, and zipWithIndex
cb10ae5 [Reza Zadeh] remove unnecessary import
a7ae048 [Reza Zadeh] Missing linear algebra utilities
This commit is contained in:
Reza Zadeh 2015-01-21 09:48:38 -08:00 committed by Xiangrui Meng
parent 2eeada373e
commit aa1e22b17b
4 changed files with 35 additions and 0 deletions

View file

@ -69,6 +69,11 @@ class CoordinateMatrix(
nRows
}
/** Transposes this CoordinateMatrix. */
def transpose(): CoordinateMatrix = {
new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
}
/** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
def toIndexedRowMatrix(): IndexedRowMatrix = {
val nl = numCols()

View file

@ -75,6 +75,23 @@ class IndexedRowMatrix(
new RowMatrix(rows.map(_.vector), 0L, nCols)
}
/**
* Converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
*/
def toCoordinateMatrix(): CoordinateMatrix = {
val entries = rows.flatMap { row =>
val rowIndex = row.index
row.vector match {
case SparseVector(size, indices, values) =>
Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
case DenseVector(values) =>
Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
}
}
new CoordinateMatrix(entries, numRows(), numCols())
}
/**
* Computes the singular value decomposition of this IndexedRowMatrix.
* Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.

View file

@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(mat.toBreeze() === expected)
}
test("transpose") {
val transposed = mat.transpose()
assert(mat.toBreeze().t === transposed.toBreeze())
}
test("toIndexedRowMatrix") {
val indexedRowMatrix = mat.toIndexedRowMatrix()
val expected = BDM(

View file

@ -80,6 +80,14 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
}
test("toCoordinateMatrix") {
val idxRowMat = new IndexedRowMatrix(indexedRows)
val coordMat = idxRowMat.toCoordinateMatrix()
assert(coordMat.numRows() === m)
assert(coordMat.numCols() === n)
assert(coordMat.toBreeze() === idxRowMat.toBreeze())
}
test("multiply a local matrix") {
val A = new IndexedRowMatrix(indexedRows)
val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))