[SPARK-16024][SQL][TEST] Verify Column Comment for Data Source Tables

#### What changes were proposed in this pull request? This PR is to improve test coverage. It verifies whether `Comment` of `Column` can be appropriate handled. The test cases verify the related parts in Parser, both SQL and DataFrameWriter interface, and both Hive Metastore catalog and In-memory catalog. #### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #13764 from gatorsmile/dataSourceComment.
2016-06-23 09:12:20 +08:00 · 2016-06-23 09:12:20 +08:00 · 9f990fa3f9
parent 4f869f88ee
commit 9f990fa3f9
3 changed files with 34 additions and 3 deletions
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.execution.datasources.{BucketSpec, CreateTableUsing}
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
-import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, StringType, StructType}


 // TODO: merge this with DDLSuite (SPARK-14441)
@ -349,10 +349,14 @@ class DDLCommandSuite extends PlanTest {
  }

  test("create table using - with partitioned by") {
-    val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet PARTITIONED BY (a)"
+    val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " +
+      "USING parquet PARTITIONED BY (a)"
    val expected = CreateTableUsing(
      TableIdentifier("my_tab"),
-      Some(new StructType().add("a", IntegerType).add("b", StringType)),
+      Some(new StructType()
+        .add("a", IntegerType, nullable = true,
+          new MetadataBuilder().putString("comment", s"test").build())
+        .add("b", StringType)),
      "parquet",
      false,
      Map.empty,
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@ -252,6 +252,19 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
    }
  }

+  test("desc table for parquet data source table using in-memory catalog") {
+    assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
+    val tabName = "tab1"
+    withTable(tabName) {
+      sql(s"CREATE TABLE $tabName(a int comment 'test') USING parquet ")
+
+      checkAnswer(
+        sql(s"DESC $tabName").select("col_name", "data_type", "comment"),
+        Row("a", "int", "test")
+      )
+    }
+  }
+
  test("Alter/Describe Database") {
    withTempDir { tmpDir =>
      val path = tmpDir.toString
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@ -22,6 +22,7 @@ import java.io.File
 import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfterEach

+import org.apache.spark.internal.config._
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTableType}
 import org.apache.spark.sql.catalyst.TableIdentifier
@ -407,6 +408,19 @@ class HiveDDLSuite
    }
  }

+  test("desc table for data source table using Hive Metastore") {
+    assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")
+    val tabName = "tab1"
+    withTable(tabName) {
+      sql(s"CREATE TABLE $tabName(a int comment 'test') USING parquet ")
+
+      checkAnswer(
+        sql(s"DESC $tabName").select("col_name", "data_type", "comment"),
+        Row("a", "int", "test")
+      )
+    }
+  }
+
  private def createDatabaseWithLocation(tmpDir: File, dirExists: Boolean): Unit = {
    val catalog = spark.sessionState.catalog
    val dbName = "db1"