[SPARK-23085][ML] API parity for mllib.linalg.Vectors.sparse

## What changes were proposed in this pull request?
`ML.Vectors#sparse(size: Int, elements: Seq[(Int, Double)])` support zero-length

## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #20275 from zhengruifeng/SparseVector_size.
This commit is contained in:
Zheng RuiFeng 2018-01-19 09:28:35 -06:00 committed by Sean Owen
parent 6c39654efc
commit 606a7485f1
4 changed files with 30 additions and 3 deletions

View file

@ -565,7 +565,7 @@ class SparseVector @Since("2.0.0") (
// validate the data // validate the data
{ {
require(size >= 0, "The size of the requested sparse vector must be greater than 0.") require(size >= 0, "The size of the requested sparse vector must be no less than 0.")
require(indices.length == values.length, "Sparse vectors require that the dimension of the" + require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " + s" indices match the dimension of the values. You provided ${indices.length} indices and " +
s" ${values.length} values.") s" ${values.length} values.")

View file

@ -366,4 +366,18 @@ class VectorsSuite extends SparkMLFunSuite {
assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2))) assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2)))
assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4))) assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4)))
} }
test("sparse vector only support non-negative length") {
val v1 = Vectors.sparse(0, Array.emptyIntArray, Array.emptyDoubleArray)
val v2 = Vectors.sparse(0, Array.empty[(Int, Double)])
assert(v1.size === 0)
assert(v2.size === 0)
intercept[IllegalArgumentException] {
Vectors.sparse(-1, Array(1), Array(2.0))
}
intercept[IllegalArgumentException] {
Vectors.sparse(-1, Array((1, 2.0)))
}
}
} }

View file

@ -326,8 +326,6 @@ object Vectors {
*/ */
@Since("1.0.0") @Since("1.0.0")
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = { def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
val (indices, values) = elements.sortBy(_._1).unzip val (indices, values) = elements.sortBy(_._1).unzip
var prev = -1 var prev = -1
indices.foreach { i => indices.foreach { i =>
@ -758,6 +756,7 @@ class SparseVector @Since("1.0.0") (
@Since("1.0.0") val indices: Array[Int], @Since("1.0.0") val indices: Array[Int],
@Since("1.0.0") val values: Array[Double]) extends Vector { @Since("1.0.0") val values: Array[Double]) extends Vector {
require(size >= 0, "The size of the requested sparse vector must be no less than 0.")
require(indices.length == values.length, "Sparse vectors require that the dimension of the" + require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " + s" indices match the dimension of the values. You provided ${indices.length} indices and " +
s" ${values.length} values.") s" ${values.length} values.")

View file

@ -495,4 +495,18 @@ class VectorsSuite extends SparkFunSuite with Logging {
assert(mlDenseVectorToArray(dv) === mlDenseVectorToArray(newDV)) assert(mlDenseVectorToArray(dv) === mlDenseVectorToArray(newDV))
assert(mlSparseVectorToArray(sv) === mlSparseVectorToArray(newSV)) assert(mlSparseVectorToArray(sv) === mlSparseVectorToArray(newSV))
} }
test("sparse vector only support non-negative length") {
val v1 = Vectors.sparse(0, Array.emptyIntArray, Array.emptyDoubleArray)
val v2 = Vectors.sparse(0, Array.empty[(Int, Double)])
assert(v1.size === 0)
assert(v2.size === 0)
intercept[IllegalArgumentException] {
Vectors.sparse(-1, Array(1), Array(2.0))
}
intercept[IllegalArgumentException] {
Vectors.sparse(-1, Array((1, 2.0)))
}
}
} }