[SPARK-21285][ML] VectorAssembler reports the column name of unsupported data type
## What changes were proposed in this pull request? add the column name in the exception which is raised by unsupported data type. ## How was this patch tested? + [x] pass all tests. Author: Yan Facai (颜发才) <facai.yan@gmail.com> Closes #18523 from facaiy/ENH/vectorassembler_add_col.
This commit is contained in:
parent
7fcbb9b57f
commit
56536e9992
|
@ -113,12 +113,15 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
|
|||
override def transformSchema(schema: StructType): StructType = {
|
||||
val inputColNames = $(inputCols)
|
||||
val outputColName = $(outputCol)
|
||||
val inputDataTypes = inputColNames.map(name => schema(name).dataType)
|
||||
inputDataTypes.foreach {
|
||||
case _: NumericType | BooleanType =>
|
||||
case t if t.isInstanceOf[VectorUDT] =>
|
||||
case other =>
|
||||
throw new IllegalArgumentException(s"Data type $other is not supported.")
|
||||
val incorrectColumns = inputColNames.flatMap { name =>
|
||||
schema(name).dataType match {
|
||||
case _: NumericType | BooleanType => None
|
||||
case t if t.isInstanceOf[VectorUDT] => None
|
||||
case other => Some(s"Data type $other of column $name is not supported.")
|
||||
}
|
||||
}
|
||||
if (incorrectColumns.nonEmpty) {
|
||||
throw new IllegalArgumentException(incorrectColumns.mkString("\n"))
|
||||
}
|
||||
if (schema.fieldNames.contains(outputColName)) {
|
||||
throw new IllegalArgumentException(s"Output column $outputColName already exists.")
|
||||
|
|
|
@ -79,7 +79,10 @@ class VectorAssemblerSuite
|
|||
val thrown = intercept[IllegalArgumentException] {
|
||||
assembler.transform(df)
|
||||
}
|
||||
assert(thrown.getMessage contains "Data type StringType is not supported")
|
||||
assert(thrown.getMessage contains
|
||||
"Data type StringType of column a is not supported.\n" +
|
||||
"Data type StringType of column b is not supported.\n" +
|
||||
"Data type StringType of column c is not supported.")
|
||||
}
|
||||
|
||||
test("ML attributes") {
|
||||
|
|
Loading…
Reference in a new issue