[SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix

## What changes were proposed in this pull request?

The following Java code because of type erasing:

```Java
JavaRDD<Vector> rows = jsc.parallelize(...);
RowMatrix mat = new RowMatrix(rows.rdd());
QRDecomposition<RowMatrix, Matrix> result = mat.tallSkinnyQR(true);
```

We should use retag to restore the type to prevent the following exception:

```Java
java.lang.ClassCastException: [Ljava.lang.Object; cannot be cast to [Lorg.apache.spark.mllib.linalg.Vector;
```

## How was this patch tested?

Java unit test

Author: Xusen Yin <yinxusen@gmail.com>

Closes #14051 from yinxusen/SPARK-16372.
This commit is contained in:
Xusen Yin 2016-07-07 11:28:04 +01:00 committed by Sean Owen
parent 986b251401
commit 4c6f00d09c
3 changed files with 46 additions and 2 deletions

View file

@ -1127,7 +1127,7 @@ private[python] class PythonMLLibAPI extends Serializable {
* Wrapper around RowMatrix constructor.
*/
def createRowMatrix(rows: JavaRDD[Vector], numRows: Long, numCols: Int): RowMatrix = {
new RowMatrix(rows.rdd.retag(classOf[Vector]), numRows, numCols)
new RowMatrix(rows.rdd, numRows, numCols)
}
/**

View file

@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") (
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
val col = numCols().toInt
// split rows horizontally into smaller matrices, and compute QR for each of them
val blockQRs = rows.glom().map { partRows =>
val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows =>
val bdm = BDM.zeros[Double](partRows.length, col)
var i = 0
partRows.foreach { row =>

View file

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.mllib.linalg.distributed;
import java.util.Arrays;
import org.junit.Test;
import org.apache.spark.SharedSparkSession;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.mllib.linalg.Matrix;
import org.apache.spark.mllib.linalg.QRDecomposition;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
public class JavaRowMatrixSuite extends SharedSparkSession {
@Test
public void rowMatrixQRDecomposition() {
Vector v1 = Vectors.dense(1.0, 10.0, 100.0);
Vector v2 = Vectors.dense(2.0, 20.0, 200.0);
Vector v3 = Vectors.dense(3.0, 30.0, 300.0);
JavaRDD<Vector> rows = jsc.parallelize(Arrays.asList(v1, v2, v3), 1);
RowMatrix mat = new RowMatrix(rows.rdd());
QRDecomposition<RowMatrix, Matrix> result = mat.tallSkinnyQR(true);
}
}