[SPARK-23459][SQL] Improve the error message when unknown column is specified in partition columns
## What changes were proposed in this pull request? This PR avoids to print schema internal information when unknown column is specified in partition columns. This PR prints column names in the schema with more readable format. The following is an example. Source code ``` test("save with an unknown partition column") { withTempDir { dir => val path = dir.getCanonicalPath Seq(1L -> "a").toDF("i", "j").write .format("parquet") .partitionBy("unknownColumn") .save(path) } ``` Output without this PR ``` Partition column unknownColumn not found in schema StructType(StructField(i,LongType,false), StructField(j,StringType,true)); ``` Output with this PR ``` Partition column unknownColumn not found in schema struct<i:bigint,j:string>; ``` ## How was this patch tested? Manually tested Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Closes #20653 from kiszk/SPARK-23459.
This commit is contained in:
parent
855ce13d04
commit
1a198ce8f5
|
@ -486,7 +486,8 @@ object PartitioningUtils {
|
|||
val equality = columnNameEquality(caseSensitive)
|
||||
StructType(partitionColumns.map { col =>
|
||||
schema.find(f => equality(f.name, col)).getOrElse {
|
||||
throw new AnalysisException(s"Partition column $col not found in schema $schema")
|
||||
val schemaCatalog = schema.catalogString
|
||||
throw new AnalysisException(s"Partition column `$col` not found in schema $schemaCatalog")
|
||||
}
|
||||
}).asNullable
|
||||
}
|
||||
|
|
|
@ -126,4 +126,20 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA
|
|||
|
||||
checkLoad(df2, "jsonTable2")
|
||||
}
|
||||
|
||||
test("SPARK-23459: Improve error message when specified unknown column in partition columns") {
|
||||
withTempDir { dir =>
|
||||
val path = dir.getCanonicalPath
|
||||
val unknown = "unknownColumn"
|
||||
val df = Seq(1L -> "a").toDF("i", "j")
|
||||
val schemaCatalog = df.schema.catalogString
|
||||
val e = intercept[AnalysisException] {
|
||||
df.write
|
||||
.format("parquet")
|
||||
.partitionBy(unknown)
|
||||
.save(path)
|
||||
}.getMessage
|
||||
assert(e.contains(s"Partition column `$unknown` not found in schema $schemaCatalog"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue