[SPARK-14613][ML] Add @Since into the matrix and vector classes in spark-mllib-local
## What changes were proposed in this pull request? This PR adds `since` tag into the matrix and vector classes in spark-mllib-local. ## How was this patch tested? Scala-style checks passed. Author: Pravin Gadakh <prgadakh@in.ibm.com> Closes #12416 from pravingadakh/SPARK-14613.
This commit is contained in:
parent
78c8aaf849
commit
dae538a4d7
|
@ -66,7 +66,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
|
|
|
@ -80,7 +80,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>log4j</groupId>
|
<groupId>log4j</groupId>
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Provided dependencies -->
|
<!-- Provided dependencies -->
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -27,12 +27,12 @@
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_2.11</artifactId>
|
<artifactId>spark-tags_2.11</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<name>Spark Project Test Tags</name>
|
<name>Spark Project Tags</name>
|
||||||
<url>http://spark.apache.org/</url>
|
<url>http://spark.apache.org/</url>
|
||||||
<properties>
|
<properties>
|
||||||
<sbt.project.name>test-tags</sbt.project.name>
|
<sbt.project.name>tags</sbt.project.name>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -61,7 +61,7 @@
|
||||||
<!-- Test dependencies -->
|
<!-- Test dependencies -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
|
|
|
@ -317,7 +317,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -93,9 +93,18 @@ class Module(object):
|
||||||
return hash(self.name)
|
return hash(self.name)
|
||||||
|
|
||||||
|
|
||||||
|
tags = Module(
|
||||||
|
name="tags",
|
||||||
|
dependencies=[],
|
||||||
|
source_file_regexes=[
|
||||||
|
"common/tags/",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
catalyst = Module(
|
catalyst = Module(
|
||||||
name="catalyst",
|
name="catalyst",
|
||||||
dependencies=[],
|
dependencies=[tags],
|
||||||
source_file_regexes=[
|
source_file_regexes=[
|
||||||
"sql/catalyst/",
|
"sql/catalyst/",
|
||||||
],
|
],
|
||||||
|
@ -165,7 +174,7 @@ hivecontext_compatibility = Module(
|
||||||
|
|
||||||
sketch = Module(
|
sketch = Module(
|
||||||
name="sketch",
|
name="sketch",
|
||||||
dependencies=[],
|
dependencies=[tags],
|
||||||
source_file_regexes=[
|
source_file_regexes=[
|
||||||
"common/sketch/",
|
"common/sketch/",
|
||||||
],
|
],
|
||||||
|
@ -177,7 +186,7 @@ sketch = Module(
|
||||||
|
|
||||||
graphx = Module(
|
graphx = Module(
|
||||||
name="graphx",
|
name="graphx",
|
||||||
dependencies=[],
|
dependencies=[tags],
|
||||||
source_file_regexes=[
|
source_file_regexes=[
|
||||||
"graphx/",
|
"graphx/",
|
||||||
],
|
],
|
||||||
|
@ -189,7 +198,7 @@ graphx = Module(
|
||||||
|
|
||||||
streaming = Module(
|
streaming = Module(
|
||||||
name="streaming",
|
name="streaming",
|
||||||
dependencies=[],
|
dependencies=[tags],
|
||||||
source_file_regexes=[
|
source_file_regexes=[
|
||||||
"streaming",
|
"streaming",
|
||||||
],
|
],
|
||||||
|
@ -205,7 +214,7 @@ streaming = Module(
|
||||||
# fail other PRs.
|
# fail other PRs.
|
||||||
streaming_kinesis_asl = Module(
|
streaming_kinesis_asl = Module(
|
||||||
name="streaming-kinesis-asl",
|
name="streaming-kinesis-asl",
|
||||||
dependencies=[],
|
dependencies=[tags],
|
||||||
source_file_regexes=[
|
source_file_regexes=[
|
||||||
"external/kinesis-asl/",
|
"external/kinesis-asl/",
|
||||||
"external/kinesis-asl-assembly/",
|
"external/kinesis-asl-assembly/",
|
||||||
|
@ -270,7 +279,7 @@ streaming_flume_assembly = Module(
|
||||||
|
|
||||||
mllib_local = Module(
|
mllib_local = Module(
|
||||||
name="mllib-local",
|
name="mllib-local",
|
||||||
dependencies=[],
|
dependencies=[tags],
|
||||||
source_file_regexes=[
|
source_file_regexes=[
|
||||||
"mllib-local",
|
"mllib-local",
|
||||||
],
|
],
|
||||||
|
|
3
external/docker-integration-tests/pom.xml
vendored
3
external/docker-integration-tests/pom.xml
vendored
|
@ -128,9 +128,10 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
|
<classifier>tests</classifier>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>mysql</groupId>
|
<groupId>mysql</groupId>
|
||||||
|
|
2
external/flume-sink/pom.xml
vendored
2
external/flume-sink/pom.xml
vendored
|
@ -92,7 +92,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
2
external/flume/pom.xml
vendored
2
external/flume/pom.xml
vendored
|
@ -68,7 +68,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
2
external/java8-tests/pom.xml
vendored
2
external/java8-tests/pom.xml
vendored
|
@ -72,7 +72,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
2
external/kafka/pom.xml
vendored
2
external/kafka/pom.xml
vendored
|
@ -88,7 +88,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
2
external/kinesis-asl/pom.xml
vendored
2
external/kinesis-asl/pom.xml
vendored
|
@ -77,7 +77,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -72,7 +72,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -65,7 +65,7 @@
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Not needed by the test code, but referenced by SparkSubmit which is used by the tests. -->
|
<!-- Not needed by the test code, but referenced by SparkSubmit which is used by the tests. -->
|
||||||
|
|
|
@ -57,6 +57,10 @@
|
||||||
<artifactId>mockito-core</artifactId>
|
<artifactId>mockito-core</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<profiles>
|
<profiles>
|
||||||
<profile>
|
<profile>
|
||||||
|
|
|
@ -24,21 +24,28 @@ import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, Has
|
||||||
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
|
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
|
||||||
import com.github.fommil.netlib.BLAS.{getInstance => blas}
|
import com.github.fommil.netlib.BLAS.{getInstance => blas}
|
||||||
|
|
||||||
|
import org.apache.spark.annotation.Since
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Trait for a local matrix.
|
* Trait for a local matrix.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
sealed trait Matrix extends Serializable {
|
sealed trait Matrix extends Serializable {
|
||||||
|
|
||||||
/** Number of rows. */
|
/** Number of rows. */
|
||||||
|
@Since("2.0.0")
|
||||||
def numRows: Int
|
def numRows: Int
|
||||||
|
|
||||||
/** Number of columns. */
|
/** Number of columns. */
|
||||||
|
@Since("2.0.0")
|
||||||
def numCols: Int
|
def numCols: Int
|
||||||
|
|
||||||
/** Flag that keeps track whether the matrix is transposed or not. False by default. */
|
/** Flag that keeps track whether the matrix is transposed or not. False by default. */
|
||||||
|
@Since("2.0.0")
|
||||||
val isTransposed: Boolean = false
|
val isTransposed: Boolean = false
|
||||||
|
|
||||||
/** Converts to a dense array in column major. */
|
/** Converts to a dense array in column major. */
|
||||||
|
@Since("2.0.0")
|
||||||
def toArray: Array[Double] = {
|
def toArray: Array[Double] = {
|
||||||
val newArray = new Array[Double](numRows * numCols)
|
val newArray = new Array[Double](numRows * numCols)
|
||||||
foreachActive { (i, j, v) =>
|
foreachActive { (i, j, v) =>
|
||||||
|
@ -51,18 +58,21 @@ sealed trait Matrix extends Serializable {
|
||||||
* Returns an iterator of column vectors.
|
* Returns an iterator of column vectors.
|
||||||
* This operation could be expensive, depending on the underlying storage.
|
* This operation could be expensive, depending on the underlying storage.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def colIter: Iterator[Vector]
|
def colIter: Iterator[Vector]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an iterator of row vectors.
|
* Returns an iterator of row vectors.
|
||||||
* This operation could be expensive, depending on the underlying storage.
|
* This operation could be expensive, depending on the underlying storage.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def rowIter: Iterator[Vector] = this.transpose.colIter
|
def rowIter: Iterator[Vector] = this.transpose.colIter
|
||||||
|
|
||||||
/** Converts to a breeze matrix. */
|
/** Converts to a breeze matrix. */
|
||||||
private[ml] def toBreeze: BM[Double]
|
private[ml] def toBreeze: BM[Double]
|
||||||
|
|
||||||
/** Gets the (i, j)-th element. */
|
/** Gets the (i, j)-th element. */
|
||||||
|
@Since("2.0.0")
|
||||||
def apply(i: Int, j: Int): Double
|
def apply(i: Int, j: Int): Double
|
||||||
|
|
||||||
/** Return the index for the (i, j)-th element in the backing array. */
|
/** Return the index for the (i, j)-th element in the backing array. */
|
||||||
|
@ -72,12 +82,15 @@ sealed trait Matrix extends Serializable {
|
||||||
private[ml] def update(i: Int, j: Int, v: Double): Unit
|
private[ml] def update(i: Int, j: Int, v: Double): Unit
|
||||||
|
|
||||||
/** Get a deep copy of the matrix. */
|
/** Get a deep copy of the matrix. */
|
||||||
|
@Since("2.0.0")
|
||||||
def copy: Matrix
|
def copy: Matrix
|
||||||
|
|
||||||
/** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
|
/** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
|
||||||
|
@Since("2.0.0")
|
||||||
def transpose: Matrix
|
def transpose: Matrix
|
||||||
|
|
||||||
/** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
|
/** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
|
||||||
|
@Since("2.0.0")
|
||||||
def multiply(y: DenseMatrix): DenseMatrix = {
|
def multiply(y: DenseMatrix): DenseMatrix = {
|
||||||
val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
|
val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
|
||||||
BLAS.gemm(1.0, this, y, 0.0, C)
|
BLAS.gemm(1.0, this, y, 0.0, C)
|
||||||
|
@ -85,11 +98,13 @@ sealed trait Matrix extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
|
/** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
|
||||||
|
@Since("2.0.0")
|
||||||
def multiply(y: DenseVector): DenseVector = {
|
def multiply(y: DenseVector): DenseVector = {
|
||||||
multiply(y.asInstanceOf[Vector])
|
multiply(y.asInstanceOf[Vector])
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Convenience method for `Matrix`-`Vector` multiplication. */
|
/** Convenience method for `Matrix`-`Vector` multiplication. */
|
||||||
|
@Since("2.0.0")
|
||||||
def multiply(y: Vector): DenseVector = {
|
def multiply(y: Vector): DenseVector = {
|
||||||
val output = new DenseVector(new Array[Double](numRows))
|
val output = new DenseVector(new Array[Double](numRows))
|
||||||
BLAS.gemv(1.0, this, y, 0.0, output)
|
BLAS.gemv(1.0, this, y, 0.0, output)
|
||||||
|
@ -100,6 +115,7 @@ sealed trait Matrix extends Serializable {
|
||||||
override def toString: String = toBreeze.toString()
|
override def toString: String = toBreeze.toString()
|
||||||
|
|
||||||
/** A human readable representation of the matrix with maximum lines and width */
|
/** A human readable representation of the matrix with maximum lines and width */
|
||||||
|
@Since("2.0.0")
|
||||||
def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth)
|
def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -129,11 +145,13 @@ sealed trait Matrix extends Serializable {
|
||||||
/**
|
/**
|
||||||
* Find the number of non-zero active values.
|
* Find the number of non-zero active values.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def numNonzeros: Int
|
def numNonzeros: Int
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the number of values stored explicitly. These values can be zero as well.
|
* Find the number of values stored explicitly. These values can be zero as well.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def numActives: Int
|
def numActives: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,10 +172,11 @@ sealed trait Matrix extends Serializable {
|
||||||
* @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
|
* @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
|
||||||
* row major.
|
* row major.
|
||||||
*/
|
*/
|
||||||
class DenseMatrix (
|
@Since("2.0.0")
|
||||||
val numRows: Int,
|
class DenseMatrix @Since("2.0.0") (
|
||||||
val numCols: Int,
|
@Since("2.0.0") val numRows: Int,
|
||||||
val values: Array[Double],
|
@Since("2.0.0") val numCols: Int,
|
||||||
|
@Since("2.0.0") val values: Array[Double],
|
||||||
override val isTransposed: Boolean) extends Matrix {
|
override val isTransposed: Boolean) extends Matrix {
|
||||||
|
|
||||||
require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
|
require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
|
||||||
|
@ -178,6 +197,7 @@ class DenseMatrix (
|
||||||
* @param numCols number of columns
|
* @param numCols number of columns
|
||||||
* @param values matrix entries in column major
|
* @param values matrix entries in column major
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def this(numRows: Int, numCols: Int, values: Array[Double]) =
|
def this(numRows: Int, numCols: Int, values: Array[Double]) =
|
||||||
this(numRows, numCols, values, false)
|
this(numRows, numCols, values, false)
|
||||||
|
|
||||||
|
@ -266,6 +286,7 @@ class DenseMatrix (
|
||||||
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
|
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
|
||||||
* set to false.
|
* set to false.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def toSparse: SparseMatrix = {
|
def toSparse: SparseMatrix = {
|
||||||
val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
|
val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
|
||||||
val colPtrs: Array[Int] = new Array[Int](numCols + 1)
|
val colPtrs: Array[Int] = new Array[Int](numCols + 1)
|
||||||
|
@ -307,6 +328,7 @@ class DenseMatrix (
|
||||||
/**
|
/**
|
||||||
* Factory methods for [[org.apache.spark.ml.linalg.DenseMatrix]].
|
* Factory methods for [[org.apache.spark.ml.linalg.DenseMatrix]].
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
object DenseMatrix {
|
object DenseMatrix {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -315,6 +337,7 @@ object DenseMatrix {
|
||||||
* @param numCols number of columns of the matrix
|
* @param numCols number of columns of the matrix
|
||||||
* @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
|
* @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def zeros(numRows: Int, numCols: Int): DenseMatrix = {
|
def zeros(numRows: Int, numCols: Int): DenseMatrix = {
|
||||||
require(numRows.toLong * numCols <= Int.MaxValue,
|
require(numRows.toLong * numCols <= Int.MaxValue,
|
||||||
s"$numRows x $numCols dense matrix is too large to allocate")
|
s"$numRows x $numCols dense matrix is too large to allocate")
|
||||||
|
@ -327,6 +350,7 @@ object DenseMatrix {
|
||||||
* @param numCols number of columns of the matrix
|
* @param numCols number of columns of the matrix
|
||||||
* @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
|
* @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def ones(numRows: Int, numCols: Int): DenseMatrix = {
|
def ones(numRows: Int, numCols: Int): DenseMatrix = {
|
||||||
require(numRows.toLong * numCols <= Int.MaxValue,
|
require(numRows.toLong * numCols <= Int.MaxValue,
|
||||||
s"$numRows x $numCols dense matrix is too large to allocate")
|
s"$numRows x $numCols dense matrix is too large to allocate")
|
||||||
|
@ -338,6 +362,7 @@ object DenseMatrix {
|
||||||
* @param n number of rows and columns of the matrix
|
* @param n number of rows and columns of the matrix
|
||||||
* @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
|
* @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def eye(n: Int): DenseMatrix = {
|
def eye(n: Int): DenseMatrix = {
|
||||||
val identity = DenseMatrix.zeros(n, n)
|
val identity = DenseMatrix.zeros(n, n)
|
||||||
var i = 0
|
var i = 0
|
||||||
|
@ -355,6 +380,7 @@ object DenseMatrix {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
|
* @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
|
def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
|
||||||
require(numRows.toLong * numCols <= Int.MaxValue,
|
require(numRows.toLong * numCols <= Int.MaxValue,
|
||||||
s"$numRows x $numCols dense matrix is too large to allocate")
|
s"$numRows x $numCols dense matrix is too large to allocate")
|
||||||
|
@ -368,6 +394,7 @@ object DenseMatrix {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
|
* @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
|
def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
|
||||||
require(numRows.toLong * numCols <= Int.MaxValue,
|
require(numRows.toLong * numCols <= Int.MaxValue,
|
||||||
s"$numRows x $numCols dense matrix is too large to allocate")
|
s"$numRows x $numCols dense matrix is too large to allocate")
|
||||||
|
@ -380,6 +407,7 @@ object DenseMatrix {
|
||||||
* @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
|
* @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
|
||||||
* on the diagonal
|
* on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def diag(vector: Vector): DenseMatrix = {
|
def diag(vector: Vector): DenseMatrix = {
|
||||||
val n = vector.size
|
val n = vector.size
|
||||||
val matrix = DenseMatrix.zeros(n, n)
|
val matrix = DenseMatrix.zeros(n, n)
|
||||||
|
@ -415,12 +443,13 @@ object DenseMatrix {
|
||||||
* Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
|
* Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
|
||||||
* and `rowIndices` behave as colIndices, and `values` are stored in row major.
|
* and `rowIndices` behave as colIndices, and `values` are stored in row major.
|
||||||
*/
|
*/
|
||||||
class SparseMatrix (
|
@Since("2.0.0")
|
||||||
val numRows: Int,
|
class SparseMatrix @Since("2.0.0") (
|
||||||
val numCols: Int,
|
@Since("2.0.0") val numRows: Int,
|
||||||
val colPtrs: Array[Int],
|
@Since("2.0.0") val numCols: Int,
|
||||||
val rowIndices: Array[Int],
|
@Since("2.0.0") val colPtrs: Array[Int],
|
||||||
val values: Array[Double],
|
@Since("2.0.0") val rowIndices: Array[Int],
|
||||||
|
@Since("2.0.0") val values: Array[Double],
|
||||||
override val isTransposed: Boolean) extends Matrix {
|
override val isTransposed: Boolean) extends Matrix {
|
||||||
|
|
||||||
require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
|
require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
|
||||||
|
@ -451,6 +480,7 @@ class SparseMatrix (
|
||||||
* order for each column
|
* order for each column
|
||||||
* @param values non-zero matrix entries in column major
|
* @param values non-zero matrix entries in column major
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def this(
|
def this(
|
||||||
numRows: Int,
|
numRows: Int,
|
||||||
numCols: Int,
|
numCols: Int,
|
||||||
|
@ -550,6 +580,7 @@ class SparseMatrix (
|
||||||
* Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
|
* Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
|
||||||
* set to false.
|
* set to false.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def toDense: DenseMatrix = {
|
def toDense: DenseMatrix = {
|
||||||
new DenseMatrix(numRows, numCols, toArray)
|
new DenseMatrix(numRows, numCols, toArray)
|
||||||
}
|
}
|
||||||
|
@ -594,6 +625,7 @@ class SparseMatrix (
|
||||||
/**
|
/**
|
||||||
* Factory methods for [[org.apache.spark.ml.linalg.SparseMatrix]].
|
* Factory methods for [[org.apache.spark.ml.linalg.SparseMatrix]].
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
object SparseMatrix {
|
object SparseMatrix {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -605,6 +637,7 @@ object SparseMatrix {
|
||||||
* @param entries Array of (i, j, value) tuples
|
* @param entries Array of (i, j, value) tuples
|
||||||
* @return The corresponding `SparseMatrix`
|
* @return The corresponding `SparseMatrix`
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
|
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
|
||||||
val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
|
val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
|
||||||
val numEntries = sortedEntries.size
|
val numEntries = sortedEntries.size
|
||||||
|
@ -653,6 +686,7 @@ object SparseMatrix {
|
||||||
* @param n number of rows and columns of the matrix
|
* @param n number of rows and columns of the matrix
|
||||||
* @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
|
* @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def speye(n: Int): SparseMatrix = {
|
def speye(n: Int): SparseMatrix = {
|
||||||
new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
|
new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
|
||||||
}
|
}
|
||||||
|
@ -722,6 +756,7 @@ object SparseMatrix {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
|
* @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
|
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
|
||||||
val mat = genRandMatrix(numRows, numCols, density, rng)
|
val mat = genRandMatrix(numRows, numCols, density, rng)
|
||||||
mat.update(i => rng.nextDouble())
|
mat.update(i => rng.nextDouble())
|
||||||
|
@ -735,6 +770,7 @@ object SparseMatrix {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
|
* @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
|
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
|
||||||
val mat = genRandMatrix(numRows, numCols, density, rng)
|
val mat = genRandMatrix(numRows, numCols, density, rng)
|
||||||
mat.update(i => rng.nextGaussian())
|
mat.update(i => rng.nextGaussian())
|
||||||
|
@ -746,6 +782,7 @@ object SparseMatrix {
|
||||||
* @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
|
* @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
|
||||||
* `values` on the diagonal
|
* `values` on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def spdiag(vector: Vector): SparseMatrix = {
|
def spdiag(vector: Vector): SparseMatrix = {
|
||||||
val n = vector.size
|
val n = vector.size
|
||||||
vector match {
|
vector match {
|
||||||
|
@ -762,6 +799,7 @@ object SparseMatrix {
|
||||||
/**
|
/**
|
||||||
* Factory methods for [[org.apache.spark.ml.linalg.Matrix]].
|
* Factory methods for [[org.apache.spark.ml.linalg.Matrix]].
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
object Matrices {
|
object Matrices {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -771,6 +809,7 @@ object Matrices {
|
||||||
* @param numCols number of columns
|
* @param numCols number of columns
|
||||||
* @param values matrix entries in column major
|
* @param values matrix entries in column major
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
|
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
|
||||||
new DenseMatrix(numRows, numCols, values)
|
new DenseMatrix(numRows, numCols, values)
|
||||||
}
|
}
|
||||||
|
@ -784,6 +823,7 @@ object Matrices {
|
||||||
* @param rowIndices the row index of the entry
|
* @param rowIndices the row index of the entry
|
||||||
* @param values non-zero matrix entries in column major
|
* @param values non-zero matrix entries in column major
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sparse(
|
def sparse(
|
||||||
numRows: Int,
|
numRows: Int,
|
||||||
numCols: Int,
|
numCols: Int,
|
||||||
|
@ -825,6 +865,7 @@ object Matrices {
|
||||||
* @param numCols number of columns of the matrix
|
* @param numCols number of columns of the matrix
|
||||||
* @return `Matrix` with size `numRows` x `numCols` and values of zeros
|
* @return `Matrix` with size `numRows` x `numCols` and values of zeros
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
|
def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -833,6 +874,7 @@ object Matrices {
|
||||||
* @param numCols number of columns of the matrix
|
* @param numCols number of columns of the matrix
|
||||||
* @return `Matrix` with size `numRows` x `numCols` and values of ones
|
* @return `Matrix` with size `numRows` x `numCols` and values of ones
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
|
def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -840,6 +882,7 @@ object Matrices {
|
||||||
* @param n number of rows and columns of the matrix
|
* @param n number of rows and columns of the matrix
|
||||||
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
|
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def eye(n: Int): Matrix = DenseMatrix.eye(n)
|
def eye(n: Int): Matrix = DenseMatrix.eye(n)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -847,6 +890,7 @@ object Matrices {
|
||||||
* @param n number of rows and columns of the matrix
|
* @param n number of rows and columns of the matrix
|
||||||
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
|
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def speye(n: Int): Matrix = SparseMatrix.speye(n)
|
def speye(n: Int): Matrix = SparseMatrix.speye(n)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -856,6 +900,7 @@ object Matrices {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
|
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
|
def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
|
||||||
DenseMatrix.rand(numRows, numCols, rng)
|
DenseMatrix.rand(numRows, numCols, rng)
|
||||||
|
|
||||||
|
@ -867,6 +912,7 @@ object Matrices {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
|
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
|
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
|
||||||
SparseMatrix.sprand(numRows, numCols, density, rng)
|
SparseMatrix.sprand(numRows, numCols, density, rng)
|
||||||
|
|
||||||
|
@ -877,6 +923,7 @@ object Matrices {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
|
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
|
def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
|
||||||
DenseMatrix.randn(numRows, numCols, rng)
|
DenseMatrix.randn(numRows, numCols, rng)
|
||||||
|
|
||||||
|
@ -888,6 +935,7 @@ object Matrices {
|
||||||
* @param rng a random number generator
|
* @param rng a random number generator
|
||||||
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
|
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
|
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
|
||||||
SparseMatrix.sprandn(numRows, numCols, density, rng)
|
SparseMatrix.sprandn(numRows, numCols, density, rng)
|
||||||
|
|
||||||
|
@ -897,6 +945,7 @@ object Matrices {
|
||||||
* @return Square `Matrix` with size `values.length` x `values.length` and `values`
|
* @return Square `Matrix` with size `values.length` x `values.length` and `values`
|
||||||
* on the diagonal
|
* on the diagonal
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
|
def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -906,6 +955,7 @@ object Matrices {
|
||||||
* @param matrices array of matrices
|
* @param matrices array of matrices
|
||||||
* @return a single `Matrix` composed of the matrices that were horizontally concatenated
|
* @return a single `Matrix` composed of the matrices that were horizontally concatenated
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def horzcat(matrices: Array[Matrix]): Matrix = {
|
def horzcat(matrices: Array[Matrix]): Matrix = {
|
||||||
if (matrices.isEmpty) {
|
if (matrices.isEmpty) {
|
||||||
return new DenseMatrix(0, 0, Array[Double]())
|
return new DenseMatrix(0, 0, Array[Double]())
|
||||||
|
@ -964,6 +1014,7 @@ object Matrices {
|
||||||
* @param matrices array of matrices
|
* @param matrices array of matrices
|
||||||
* @return a single `Matrix` composed of the matrices that were vertically concatenated
|
* @return a single `Matrix` composed of the matrices that were vertically concatenated
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def vertcat(matrices: Array[Matrix]): Matrix = {
|
def vertcat(matrices: Array[Matrix]): Matrix = {
|
||||||
if (matrices.isEmpty) {
|
if (matrices.isEmpty) {
|
||||||
return new DenseMatrix(0, 0, Array[Double]())
|
return new DenseMatrix(0, 0, Array[Double]())
|
||||||
|
|
|
@ -28,21 +28,26 @@ import org.json4s.DefaultFormats
|
||||||
import org.json4s.JsonDSL._
|
import org.json4s.JsonDSL._
|
||||||
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
|
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
|
||||||
|
|
||||||
|
import org.apache.spark.annotation.Since
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a numeric vector, whose index type is Int and value type is Double.
|
* Represents a numeric vector, whose index type is Int and value type is Double.
|
||||||
*
|
*
|
||||||
* Note: Users should not implement this interface.
|
* Note: Users should not implement this interface.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
sealed trait Vector extends Serializable {
|
sealed trait Vector extends Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Size of the vector.
|
* Size of the vector.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def size: Int
|
def size: Int
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the instance to a double array.
|
* Converts the instance to a double array.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def toArray: Array[Double]
|
def toArray: Array[Double]
|
||||||
|
|
||||||
override def equals(other: Any): Boolean = {
|
override def equals(other: Any): Boolean = {
|
||||||
|
@ -96,11 +101,13 @@ sealed trait Vector extends Serializable {
|
||||||
* Gets the value of the ith element.
|
* Gets the value of the ith element.
|
||||||
* @param i index
|
* @param i index
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def apply(i: Int): Double = toBreeze(i)
|
def apply(i: Int): Double = toBreeze(i)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes a deep copy of this vector.
|
* Makes a deep copy of this vector.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def copy: Vector = {
|
def copy: Vector = {
|
||||||
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
|
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
|
||||||
}
|
}
|
||||||
|
@ -112,32 +119,38 @@ sealed trait Vector extends Serializable {
|
||||||
* the vector with type `Int`, and the second parameter is the corresponding value
|
* the vector with type `Int`, and the second parameter is the corresponding value
|
||||||
* with type `Double`.
|
* with type `Double`.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def foreachActive(f: (Int, Double) => Unit): Unit
|
def foreachActive(f: (Int, Double) => Unit): Unit
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of active entries. An "active entry" is an element which is explicitly stored,
|
* Number of active entries. An "active entry" is an element which is explicitly stored,
|
||||||
* regardless of its value. Note that inactive entries have value 0.
|
* regardless of its value. Note that inactive entries have value 0.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def numActives: Int
|
def numActives: Int
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of nonzero elements. This scans all active values and count nonzeros.
|
* Number of nonzero elements. This scans all active values and count nonzeros.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def numNonzeros: Int
|
def numNonzeros: Int
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts this vector to a sparse vector with all explicit zeros removed.
|
* Converts this vector to a sparse vector with all explicit zeros removed.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def toSparse: SparseVector
|
def toSparse: SparseVector
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts this vector to a dense vector.
|
* Converts this vector to a dense vector.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def toDense: DenseVector = new DenseVector(this.toArray)
|
def toDense: DenseVector = new DenseVector(this.toArray)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a vector in either dense or sparse format, whichever uses less storage.
|
* Returns a vector in either dense or sparse format, whichever uses less storage.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def compressed: Vector = {
|
def compressed: Vector = {
|
||||||
val nnz = numNonzeros
|
val nnz = numNonzeros
|
||||||
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
|
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
|
||||||
|
@ -152,11 +165,13 @@ sealed trait Vector extends Serializable {
|
||||||
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
|
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
|
||||||
* Returns -1 if vector has length 0.
|
* Returns -1 if vector has length 0.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def argmax: Int
|
def argmax: Int
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the vector to a JSON string.
|
* Converts the vector to a JSON string.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def toJson: String
|
def toJson: String
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,12 +180,14 @@ sealed trait Vector extends Serializable {
|
||||||
* We don't use the name `Vector` because Scala imports
|
* We don't use the name `Vector` because Scala imports
|
||||||
* [[scala.collection.immutable.Vector]] by default.
|
* [[scala.collection.immutable.Vector]] by default.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
object Vectors {
|
object Vectors {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a dense vector from its values.
|
* Creates a dense vector from its values.
|
||||||
*/
|
*/
|
||||||
@varargs
|
@varargs
|
||||||
|
@Since("2.0.0")
|
||||||
def dense(firstValue: Double, otherValues: Double*): Vector =
|
def dense(firstValue: Double, otherValues: Double*): Vector =
|
||||||
new DenseVector((firstValue +: otherValues).toArray)
|
new DenseVector((firstValue +: otherValues).toArray)
|
||||||
|
|
||||||
|
@ -178,6 +195,7 @@ object Vectors {
|
||||||
/**
|
/**
|
||||||
* Creates a dense vector from a double array.
|
* Creates a dense vector from a double array.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def dense(values: Array[Double]): Vector = new DenseVector(values)
|
def dense(values: Array[Double]): Vector = new DenseVector(values)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -187,6 +205,7 @@ object Vectors {
|
||||||
* @param indices index array, must be strictly increasing.
|
* @param indices index array, must be strictly increasing.
|
||||||
* @param values value array, must have the same length as indices.
|
* @param values value array, must have the same length as indices.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
|
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
|
||||||
new SparseVector(size, indices, values)
|
new SparseVector(size, indices, values)
|
||||||
|
|
||||||
|
@ -196,6 +215,7 @@ object Vectors {
|
||||||
* @param size vector size.
|
* @param size vector size.
|
||||||
* @param elements vector elements in (index, value) pairs.
|
* @param elements vector elements in (index, value) pairs.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
|
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
|
||||||
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
|
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
|
||||||
|
|
||||||
|
@ -217,6 +237,7 @@ object Vectors {
|
||||||
* @param size vector size.
|
* @param size vector size.
|
||||||
* @param elements vector elements in (index, value) pairs.
|
* @param elements vector elements in (index, value) pairs.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
|
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
|
||||||
sparse(size, elements.asScala.map { case (i, x) =>
|
sparse(size, elements.asScala.map { case (i, x) =>
|
||||||
(i.intValue(), x.doubleValue())
|
(i.intValue(), x.doubleValue())
|
||||||
|
@ -229,6 +250,7 @@ object Vectors {
|
||||||
* @param size vector size
|
* @param size vector size
|
||||||
* @return a zero vector
|
* @return a zero vector
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def zeros(size: Int): Vector = {
|
def zeros(size: Int): Vector = {
|
||||||
new DenseVector(new Array[Double](size))
|
new DenseVector(new Array[Double](size))
|
||||||
}
|
}
|
||||||
|
@ -236,6 +258,7 @@ object Vectors {
|
||||||
/**
|
/**
|
||||||
* Parses the JSON representation of a vector into a [[Vector]].
|
* Parses the JSON representation of a vector into a [[Vector]].
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def fromJson(json: String): Vector = {
|
def fromJson(json: String): Vector = {
|
||||||
implicit val formats = DefaultFormats
|
implicit val formats = DefaultFormats
|
||||||
val jValue = parseJson(json)
|
val jValue = parseJson(json)
|
||||||
|
@ -281,6 +304,7 @@ object Vectors {
|
||||||
* @param p norm.
|
* @param p norm.
|
||||||
* @return norm in L^p^ space.
|
* @return norm in L^p^ space.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def norm(vector: Vector, p: Double): Double = {
|
def norm(vector: Vector, p: Double): Double = {
|
||||||
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
|
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
|
||||||
s"You specified p=$p.")
|
s"You specified p=$p.")
|
||||||
|
@ -333,6 +357,7 @@ object Vectors {
|
||||||
* @param v2 second Vector.
|
* @param v2 second Vector.
|
||||||
* @return squared distance between two Vectors.
|
* @return squared distance between two Vectors.
|
||||||
*/
|
*/
|
||||||
|
@Since("2.0.0")
|
||||||
def sqdist(v1: Vector, v2: Vector): Double = {
|
def sqdist(v1: Vector, v2: Vector): Double = {
|
||||||
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
|
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
|
||||||
s"=${v2.size}.")
|
s"=${v2.size}.")
|
||||||
|
@ -449,7 +474,8 @@ object Vectors {
|
||||||
/**
|
/**
|
||||||
* A dense vector represented by a value array.
|
* A dense vector represented by a value array.
|
||||||
*/
|
*/
|
||||||
class DenseVector (val values: Array[Double]) extends Vector {
|
@Since("2.0.0")
|
||||||
|
class DenseVector @Since("2.0.0") (@Since("2.0.0") val values: Array[Double]) extends Vector {
|
||||||
|
|
||||||
override def size: Int = values.length
|
override def size: Int = values.length
|
||||||
|
|
||||||
|
@ -548,9 +574,11 @@ class DenseVector (val values: Array[Double]) extends Vector {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Since("2.0.0")
|
||||||
object DenseVector {
|
object DenseVector {
|
||||||
|
|
||||||
/** Extracts the value array from a dense vector. */
|
/** Extracts the value array from a dense vector. */
|
||||||
|
@Since("2.0.0")
|
||||||
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
|
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -561,10 +589,11 @@ object DenseVector {
|
||||||
* @param indices index array, assume to be strictly increasing.
|
* @param indices index array, assume to be strictly increasing.
|
||||||
* @param values value array, must have the same length as the index array.
|
* @param values value array, must have the same length as the index array.
|
||||||
*/
|
*/
|
||||||
class SparseVector (
|
@Since("2.0.0")
|
||||||
|
class SparseVector @Since("2.0.0") (
|
||||||
override val size: Int,
|
override val size: Int,
|
||||||
val indices: Array[Int],
|
@Since("2.0.0") val indices: Array[Int],
|
||||||
val values: Array[Double]) extends Vector {
|
@Since("2.0.0") val values: Array[Double]) extends Vector {
|
||||||
|
|
||||||
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
|
require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
|
||||||
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
|
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
|
||||||
|
@ -734,7 +763,9 @@ class SparseVector (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Since("2.0.0")
|
||||||
object SparseVector {
|
object SparseVector {
|
||||||
|
@Since("2.0.0")
|
||||||
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
|
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
|
||||||
Some((sv.size, sv.indices, sv.values))
|
Some((sv.size, sv.indices, sv.values))
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<profiles>
|
<profiles>
|
||||||
|
|
8
pom.xml
8
pom.xml
|
@ -284,8 +284,14 @@
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -700,6 +700,10 @@ object MimaExcludes {
|
||||||
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.ShuffleReadMetrics.localBlocksFetched"),
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.ShuffleReadMetrics.localBlocksFetched"),
|
||||||
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.remoteBlocksFetched"),
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.remoteBlocksFetched"),
|
||||||
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.localBlocksFetched")
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.localBlocksFetched")
|
||||||
|
) ++ Seq(
|
||||||
|
// [SPARK-14613] Add @Since into the matrix and vector classes in spark-mllib-local
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.package$"),
|
||||||
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.package")
|
||||||
)
|
)
|
||||||
case v if v.startsWith("1.6") =>
|
case v if v.startsWith("1.6") =>
|
||||||
Seq(
|
Seq(
|
||||||
|
|
|
@ -50,10 +50,10 @@ object BuildCommons {
|
||||||
).map(ProjectRef(buildLocation, _))
|
).map(ProjectRef(buildLocation, _))
|
||||||
|
|
||||||
val allProjects@Seq(
|
val allProjects@Seq(
|
||||||
core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, testTags, sketch, _*
|
core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, _*
|
||||||
) = Seq(
|
) = Seq(
|
||||||
"core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
|
"core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
|
||||||
"test-tags", "sketch"
|
"tags", "sketch"
|
||||||
).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects
|
).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects
|
||||||
|
|
||||||
val optionallyEnabledProjects@Seq(yarn, java8Tests, sparkGangliaLgpl,
|
val optionallyEnabledProjects@Seq(yarn, java8Tests, sparkGangliaLgpl,
|
||||||
|
@ -336,7 +336,7 @@ object SparkBuild extends PomBuild {
|
||||||
val mimaProjects = allProjects.filterNot { x =>
|
val mimaProjects = allProjects.filterNot { x =>
|
||||||
Seq(
|
Seq(
|
||||||
spark, hive, hiveThriftServer, hiveCompatibility, catalyst, repl, networkCommon, networkShuffle, networkYarn,
|
spark, hive, hiveThriftServer, hiveCompatibility, catalyst, repl, networkCommon, networkShuffle, networkYarn,
|
||||||
unsafe, testTags, sketch, mllibLocal
|
unsafe, tags, sketch, mllibLocal
|
||||||
).contains(x)
|
).contains(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -680,9 +680,9 @@ object Unidoc {
|
||||||
publish := {},
|
publish := {},
|
||||||
|
|
||||||
unidocProjectFilter in(ScalaUnidoc, unidoc) :=
|
unidocProjectFilter in(ScalaUnidoc, unidoc) :=
|
||||||
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn, testTags),
|
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn, tags),
|
||||||
unidocProjectFilter in(JavaUnidoc, unidoc) :=
|
unidocProjectFilter in(JavaUnidoc, unidoc) :=
|
||||||
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn, testTags),
|
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn, tags),
|
||||||
|
|
||||||
// Skip actual catalyst, but include the subproject.
|
// Skip actual catalyst, but include the subproject.
|
||||||
// Catalyst is not public API and contains quasiquotes which break scaladoc.
|
// Catalyst is not public API and contains quasiquotes which break scaladoc.
|
||||||
|
|
|
@ -87,7 +87,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.xbean</groupId>
|
<groupId>org.apache.xbean</groupId>
|
||||||
|
|
|
@ -55,7 +55,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|
|
@ -73,7 +73,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.parquet</groupId>
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
|
|
@ -95,7 +95,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -60,7 +60,9 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!--
|
<!--
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -49,7 +49,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
|
<!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
|
||||||
|
|
|
@ -53,7 +53,9 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
<classifier>tests</classifier>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
|
Loading…
Reference in a new issue