[SPARK-15655][SQL] Fix Wrong Partition Column Order when Fetching Partitioned Tables
#### What changes were proposed in this pull request? When fetching the partitioned table, the output contains wrong results. The order of partition key values do not match the order of partition key columns in output schema. For example, ```SQL CREATE TABLE table_with_partition(c1 string) PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) INSERT OVERWRITE TABLE table_with_partition PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') SELECT 'blarr' SELECT p1, p2, p3, p4, p5, c1 FROM table_with_partition ``` ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | d| e| c| b| a|blarr| +---+---+---+---+---+-----+ ``` The expected result should be ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | a| b| c| d| e|blarr| +---+---+---+---+---+-----+ ``` This PR is to fix this by enforcing the order matches the table partition definition. #### How was this patch tested? Added a test case into `SQLQuerySuite` Author: gatorsmile <gatorsmile@gmail.com> Closes #13400 from gatorsmile/partitionedTableFetch.
This commit is contained in:
parent
6151d2641f
commit
bc02d01129
|
@ -160,7 +160,7 @@ private[hive] case class MetastoreRelation(
|
||||||
val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
|
val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
|
||||||
tPartition.setDbName(databaseName)
|
tPartition.setDbName(databaseName)
|
||||||
tPartition.setTableName(tableName)
|
tPartition.setTableName(tableName)
|
||||||
tPartition.setValues(p.spec.values.toList.asJava)
|
tPartition.setValues(partitionKeys.map(a => p.spec(a.name)).asJava)
|
||||||
|
|
||||||
val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
|
val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
|
||||||
tPartition.setSd(sd)
|
tPartition.setSd(sd)
|
||||||
|
|
|
@ -1610,6 +1610,38 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
||||||
assert(fs.exists(path), "This is an external table, so the data should not have been dropped")
|
assert(fs.exists(path), "This is an external table, so the data should not have been dropped")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("select partitioned table") {
|
||||||
|
val table = "table_with_partition"
|
||||||
|
withTable(table) {
|
||||||
|
sql(
|
||||||
|
s"""
|
||||||
|
|CREATE TABLE $table(c1 string)
|
||||||
|
|PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string)
|
||||||
|
""".stripMargin)
|
||||||
|
sql(
|
||||||
|
s"""
|
||||||
|
|INSERT OVERWRITE TABLE $table
|
||||||
|
|PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
|
||||||
|
|SELECT 'blarr'
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
// project list is the same order of paritioning columns in table definition
|
||||||
|
checkAnswer(
|
||||||
|
sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"),
|
||||||
|
Row("a", "b", "c", "d", "e", "blarr") :: Nil)
|
||||||
|
|
||||||
|
// project list does not have the same order of paritioning columns in table definition
|
||||||
|
checkAnswer(
|
||||||
|
sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"),
|
||||||
|
Row("b", "c", "d", "a", "e", "blarr") :: Nil)
|
||||||
|
|
||||||
|
// project list contains partial partition columns in table definition
|
||||||
|
checkAnswer(
|
||||||
|
sql(s"SELECT p2, p1, p5, c1 FROM $table"),
|
||||||
|
Row("b", "a", "e", "blarr") :: Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test("SPARK-14981: DESC not supported for sorting columns") {
|
test("SPARK-14981: DESC not supported for sorting columns") {
|
||||||
withTable("t") {
|
withTable("t") {
|
||||||
val cause = intercept[ParseException] {
|
val cause = intercept[ParseException] {
|
||||||
|
|
Loading…
Reference in a new issue