From bc02d011294fcd1ab07b9baf1011c3f2bdf749d9 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 14 Jun 2016 09:58:06 -0700 Subject: [PATCH] [SPARK-15655][SQL] Fix Wrong Partition Column Order when Fetching Partitioned Tables #### What changes were proposed in this pull request? When fetching the partitioned table, the output contains wrong results. The order of partition key values do not match the order of partition key columns in output schema. For example, ```SQL CREATE TABLE table_with_partition(c1 string) PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) INSERT OVERWRITE TABLE table_with_partition PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') SELECT 'blarr' SELECT p1, p2, p3, p4, p5, c1 FROM table_with_partition ``` ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | d| e| c| b| a|blarr| +---+---+---+---+---+-----+ ``` The expected result should be ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | a| b| c| d| e|blarr| +---+---+---+---+---+-----+ ``` This PR is to fix this by enforcing the order matches the table partition definition. #### How was this patch tested? Added a test case into `SQLQuerySuite` Author: gatorsmile Closes #13400 from gatorsmile/partitionedTableFetch. --- .../spark/sql/hive/MetastoreRelation.scala | 2 +- .../sql/hive/execution/SQLQuerySuite.scala | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala index 9c820144ae..5596a4470f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala @@ -160,7 +160,7 @@ private[hive] case class MetastoreRelation( val tPartition = new org.apache.hadoop.hive.metastore.api.Partition tPartition.setDbName(databaseName) tPartition.setTableName(tableName) - tPartition.setValues(p.spec.values.toList.asJava) + tPartition.setValues(partitionKeys.map(a => p.spec(a.name)).asJava) val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor() tPartition.setSd(sd) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 1a0eaa66c1..9c1f218253 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1610,6 +1610,38 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { assert(fs.exists(path), "This is an external table, so the data should not have been dropped") } + test("select partitioned table") { + val table = "table_with_partition" + withTable(table) { + sql( + s""" + |CREATE TABLE $table(c1 string) + |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) + """.stripMargin) + sql( + s""" + |INSERT OVERWRITE TABLE $table + |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') + |SELECT 'blarr' + """.stripMargin) + + // project list is the same order of paritioning columns in table definition + checkAnswer( + sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"), + Row("a", "b", "c", "d", "e", "blarr") :: Nil) + + // project list does not have the same order of paritioning columns in table definition + checkAnswer( + sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"), + Row("b", "c", "d", "a", "e", "blarr") :: Nil) + + // project list contains partial partition columns in table definition + checkAnswer( + sql(s"SELECT p2, p1, p5, c1 FROM $table"), + Row("b", "a", "e", "blarr") :: Nil) + } + } + test("SPARK-14981: DESC not supported for sorting columns") { withTable("t") { val cause = intercept[ParseException] {