[SPARK-15211][SQL] Select features column from LibSVMRelation causes failure
## What changes were proposed in this pull request? We need to use `requiredSchema` in `LibSVMRelation` to project the fetch required columns when loading data from this data source. Otherwise, when users try to select `features` column, it will cause failure. ## How was this patch tested? `LibSVMRelationSuite`. Author: Liang-Chi Hsieh <simonh@tw.ibm.com> Closes #12986 from viirya/fix-libsvmrelation.
This commit is contained in:
parent
a59ab594ca
commit
635ef407e1
|
@ -203,10 +203,18 @@ class DefaultSource extends FileFormat with DataSourceRegister {
|
|||
}
|
||||
|
||||
val converter = RowEncoder(dataSchema)
|
||||
val fullOutput = dataSchema.map { f =>
|
||||
AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()
|
||||
}
|
||||
val requiredOutput = fullOutput.filter { a =>
|
||||
requiredSchema.fieldNames.contains(a.name)
|
||||
}
|
||||
|
||||
val requiredColumns = GenerateUnsafeProjection.generate(requiredOutput, fullOutput)
|
||||
|
||||
points.map { pt =>
|
||||
val features = if (sparse) pt.features.toSparse else pt.features.toDense
|
||||
converter.toRow(Row(pt.label, features))
|
||||
requiredColumns(converter.toRow(Row(pt.label, features)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -108,5 +108,6 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
|
|||
test("select features from libsvm relation") {
|
||||
val df = sqlContext.read.format("libsvm").load(path)
|
||||
df.select("features").rdd.map { case Row(d: Vector) => d }.first
|
||||
df.select("features").collect
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue