[SPARK-23459][SQL] Improve the error message when unknown column is specified in partition columns

## What changes were proposed in this pull request?

This PR avoids to print schema internal information when unknown column is specified in partition columns. This PR prints column names in the schema with more readable format.

The following is an example.

Source code
```
test("save with an unknown partition column") {
  withTempDir { dir =>
    val path = dir.getCanonicalPath
      Seq(1L -> "a").toDF("i", "j").write
        .format("parquet")
        .partitionBy("unknownColumn")
        .save(path)
  }
```
Output without this PR
```
Partition column unknownColumn not found in schema StructType(StructField(i,LongType,false), StructField(j,StringType,true));
```

Output with this PR
```
Partition column unknownColumn not found in schema struct<i:bigint,j:string>;
```

## How was this patch tested?

Manually tested

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #20653 from kiszk/SPARK-23459.
This commit is contained in:
Kazuaki Ishizaki 2018-02-23 16:30:32 -08:00 committed by gatorsmile
parent 855ce13d04
commit 1a198ce8f5
2 changed files with 18 additions and 1 deletions

View file

@ -486,7 +486,8 @@ object PartitioningUtils {
val equality = columnNameEquality(caseSensitive)
StructType(partitionColumns.map { col =>
schema.find(f => equality(f.name, col)).getOrElse {
throw new AnalysisException(s"Partition column $col not found in schema $schema")
val schemaCatalog = schema.catalogString
throw new AnalysisException(s"Partition column `$col` not found in schema $schemaCatalog")
}
}).asNullable
}

View file

@ -126,4 +126,20 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA
checkLoad(df2, "jsonTable2")
}
test("SPARK-23459: Improve error message when specified unknown column in partition columns") {
withTempDir { dir =>
val path = dir.getCanonicalPath
val unknown = "unknownColumn"
val df = Seq(1L -> "a").toDF("i", "j")
val schemaCatalog = df.schema.catalogString
val e = intercept[AnalysisException] {
df.write
.format("parquet")
.partitionBy(unknown)
.save(path)
}.getMessage
assert(e.contains(s"Partition column `$unknown` not found in schema $schemaCatalog"))
}
}
}