[SPARK-21285][ML] VectorAssembler reports the column name of unsupported data type

## What changes were proposed in this pull request?
add the column name in the exception which is raised by unsupported data type.

## How was this patch tested?
+ [x] pass all tests.

Author: Yan Facai (颜发才) <facai.yan@gmail.com>

Closes #18523 from facaiy/ENH/vectorassembler_add_col.
This commit is contained in:
Yan Facai (颜发才) 2017-07-07 18:32:01 +08:00 committed by Yanbo Liang
parent 7fcbb9b57f
commit 56536e9992
2 changed files with 13 additions and 7 deletions

View file

@ -113,12 +113,15 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
override def transformSchema(schema: StructType): StructType = {
val inputColNames = $(inputCols)
val outputColName = $(outputCol)
val inputDataTypes = inputColNames.map(name => schema(name).dataType)
inputDataTypes.foreach {
case _: NumericType | BooleanType =>
case t if t.isInstanceOf[VectorUDT] =>
case other =>
throw new IllegalArgumentException(s"Data type $other is not supported.")
val incorrectColumns = inputColNames.flatMap { name =>
schema(name).dataType match {
case _: NumericType | BooleanType => None
case t if t.isInstanceOf[VectorUDT] => None
case other => Some(s"Data type $other of column $name is not supported.")
}
}
if (incorrectColumns.nonEmpty) {
throw new IllegalArgumentException(incorrectColumns.mkString("\n"))
}
if (schema.fieldNames.contains(outputColName)) {
throw new IllegalArgumentException(s"Output column $outputColName already exists.")

View file

@ -79,7 +79,10 @@ class VectorAssemblerSuite
val thrown = intercept[IllegalArgumentException] {
assembler.transform(df)
}
assert(thrown.getMessage contains "Data type StringType is not supported")
assert(thrown.getMessage contains
"Data type StringType of column a is not supported.\n" +
"Data type StringType of column b is not supported.\n" +
"Data type StringType of column c is not supported.")
}
test("ML attributes") {