[SPARK-15655][SQL] Fix Wrong Partition Column Order when Fetching Partitioned Tables

#### What changes were proposed in this pull request? When fetching the partitioned table, the output contains wrong results. The order of partition key values do not match the order of partition key columns in output schema. For example, ```SQL CREATE TABLE table_with_partition(c1 string) PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) INSERT OVERWRITE TABLE table_with_partition PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') SELECT 'blarr' SELECT p1, p2, p3, p4, p5, c1 FROM table_with_partition ``` ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | d| e| c| b| a|blarr| +---+---+---+---+---+-----+ ``` The expected result should be ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | a| b| c| d| e|blarr| +---+---+---+---+---+-----+ ``` This PR is to fix this by enforcing the order matches the table partition definition. #### How was this patch tested? Added a test case into `SQLQuerySuite` Author: gatorsmile <gatorsmile@gmail.com> Closes #13400 from gatorsmile/partitionedTableFetch.
2016-06-14 09:58:06 -07:00 · 2016-06-14 09:58:06 -07:00 · bc02d01129
parent 6151d2641f
commit bc02d01129
2 changed files with 33 additions and 1 deletions
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@ -160,7 +160,7 @@ private[hive] case class MetastoreRelation(
      val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
      tPartition.setDbName(databaseName)
      tPartition.setTableName(tableName)
-      tPartition.setValues(p.spec.values.toList.asJava)
+      tPartition.setValues(partitionKeys.map(a => p.spec(a.name)).asJava)
      val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
      tPartition.setSd(sd)
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@ -1610,6 +1610,38 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
    assert(fs.exists(path), "This is an external table, so the data should not have been dropped")
  }
  test("select partitioned table") {
    val table = "table_with_partition"
    withTable(table) {
      sql(
        s"""
           |CREATE TABLE $table(c1 string)
           |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string)
         """.stripMargin)
      sql(
        s"""
           |INSERT OVERWRITE TABLE $table
           |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
           |SELECT 'blarr'
         """.stripMargin)
      // project list is the same order of paritioning columns in table definition
      checkAnswer(
        sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"),
        Row("a", "b", "c", "d", "e", "blarr") :: Nil)
      // project list does not have the same order of paritioning columns in table definition
      checkAnswer(
        sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"),
        Row("b", "c", "d", "a", "e", "blarr") :: Nil)
      // project list contains partial partition columns in table definition
      checkAnswer(
        sql(s"SELECT p2, p1, p5, c1 FROM $table"),
        Row("b", "a", "e", "blarr") :: Nil)
    }
  }
  test("SPARK-14981: DESC not supported for sorting columns") {
    withTable("t") {
      val cause = intercept[ParseException] {