[MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix
* Transpose is missing from CoordinateMatrix (this is cheap to compute, so it should be there) * IndexedRowMatrix should be convertable to CoordinateMatrix (conversion added) Tests for both added. Author: Reza Zadeh <reza@databricks.com> Closes #4089 from rezazadeh/matutils and squashes the following commits: ec5238b [Reza Zadeh] Array -> Iterator to avoid temp array 3ce0b5d [Reza Zadeh] Array -> Iterator bbc907a [Reza Zadeh] Use 'i' for index, and zipWithIndex cb10ae5 [Reza Zadeh] remove unnecessary import a7ae048 [Reza Zadeh] Missing linear algebra utilities
This commit is contained in:
parent
2eeada373e
commit
aa1e22b17b
|
@ -69,6 +69,11 @@ class CoordinateMatrix(
|
|||
nRows
|
||||
}
|
||||
|
||||
/** Transposes this CoordinateMatrix. */
|
||||
def transpose(): CoordinateMatrix = {
|
||||
new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
|
||||
}
|
||||
|
||||
/** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
|
||||
def toIndexedRowMatrix(): IndexedRowMatrix = {
|
||||
val nl = numCols()
|
||||
|
|
|
@ -75,6 +75,23 @@ class IndexedRowMatrix(
|
|||
new RowMatrix(rows.map(_.vector), 0L, nCols)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts this matrix to a
|
||||
* [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
|
||||
*/
|
||||
def toCoordinateMatrix(): CoordinateMatrix = {
|
||||
val entries = rows.flatMap { row =>
|
||||
val rowIndex = row.index
|
||||
row.vector match {
|
||||
case SparseVector(size, indices, values) =>
|
||||
Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
|
||||
case DenseVector(values) =>
|
||||
Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
|
||||
}
|
||||
}
|
||||
new CoordinateMatrix(entries, numRows(), numCols())
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the singular value decomposition of this IndexedRowMatrix.
|
||||
* Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
|
||||
|
|
|
@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
|
|||
assert(mat.toBreeze() === expected)
|
||||
}
|
||||
|
||||
test("transpose") {
|
||||
val transposed = mat.transpose()
|
||||
assert(mat.toBreeze().t === transposed.toBreeze())
|
||||
}
|
||||
|
||||
test("toIndexedRowMatrix") {
|
||||
val indexedRowMatrix = mat.toIndexedRowMatrix()
|
||||
val expected = BDM(
|
||||
|
|
|
@ -80,6 +80,14 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
|
|||
assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
|
||||
}
|
||||
|
||||
test("toCoordinateMatrix") {
|
||||
val idxRowMat = new IndexedRowMatrix(indexedRows)
|
||||
val coordMat = idxRowMat.toCoordinateMatrix()
|
||||
assert(coordMat.numRows() === m)
|
||||
assert(coordMat.numCols() === n)
|
||||
assert(coordMat.toBreeze() === idxRowMat.toBreeze())
|
||||
}
|
||||
|
||||
test("multiply a local matrix") {
|
||||
val A = new IndexedRowMatrix(indexedRows)
|
||||
val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
|
||||
|
|
Loading…
Reference in a new issue