[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector
fix LabeledPoint parser when there is a whitespace between label and features vector, e.g.
(y, [x1, x2, x3])
Author: Oleksiy Dyagilev <oleksiy_dyagilev@epam.com>
Closes #6954 from fe2s/SPARK-8525 and squashes the following commits:
0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep on commons-lang
c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position
(cherry picked from commit a8031183af
)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
This commit is contained in:
parent
27693e1757
commit
8d6e3636e9
|
@ -98,6 +98,8 @@ private[mllib] object NumericParser {
|
||||||
}
|
}
|
||||||
} else if (token == ")") {
|
} else if (token == ")") {
|
||||||
parsing = false
|
parsing = false
|
||||||
|
} else if (token.trim.isEmpty){
|
||||||
|
// ignore whitespaces between delim chars, e.g. ", ["
|
||||||
} else {
|
} else {
|
||||||
// expecting a number
|
// expecting a number
|
||||||
items.append(parseDouble(token))
|
items.append(parseDouble(token))
|
||||||
|
|
|
@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("parse labeled points with whitespaces") {
|
||||||
|
val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
|
||||||
|
assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
|
||||||
|
}
|
||||||
|
|
||||||
test("parse labeled points with v0.9 format") {
|
test("parse labeled points with v0.9 format") {
|
||||||
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
|
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
|
||||||
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))
|
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))
|
||||||
|
|
|
@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("parser with whitespaces") {
|
||||||
|
val s = "(0.0, [1.0, 2.0])"
|
||||||
|
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
|
||||||
|
assert(parsed(0).asInstanceOf[Double] === 0.0)
|
||||||
|
assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue