[SPARK-9337] [MLLIB] Add an ut for Word2Vec to verify the empty vocabulary check

jira: https://issues.apache.org/jira/browse/SPARK-9337

Word2Vec should throw exception when vocabulary is empty

Author: Yuhao Yang <hhbyyh@gmail.com>

Closes #7660 from hhbyyh/ut4Word2vec and squashes the following commits:

17a18cb [Yuhao Yang] add ut for word2vec
This commit is contained in:
Yuhao Yang 2015-07-26 14:02:20 +01:00 committed by Sean Owen
parent 4a01bfc2a2
commit b79bf1df62

View file

@ -45,6 +45,16 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
assert(newModel.getVectors.mapValues(_.toSeq) === word2VecMap.mapValues(_.toSeq)) assert(newModel.getVectors.mapValues(_.toSeq) === word2VecMap.mapValues(_.toSeq))
} }
test("Word2Vec throws exception when vocabulary is empty") {
intercept[IllegalArgumentException] {
val sentence = "a b c"
val localDoc = Seq(sentence, sentence)
val doc = sc.parallelize(localDoc)
.map(line => line.split(" ").toSeq)
new Word2Vec().setMinCount(10).fit(doc)
}
}
test("Word2VecModel") { test("Word2VecModel") {
val num = 2 val num = 2
val word2VecMap = Map( val word2VecMap = Map(