[SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE

### What changes were proposed in this pull request? When exec the command `SHOW CREATE TABLE`, we should not lost the info null flag if the table column that is specified `NOT NULL` ### Why are the changes needed? [SPARK-36012](https://issues.apache.org/jira/browse/SPARK-36012) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add UT test for V1 and existed UT for V2 Closes #33219 from Peng-Lei/SPARK-36012. Authored-by: PengLei <peng.8lei@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
2021-07-09 01:21:38 +08:00 · 2021-07-09 01:21:38 +08:00 · e071721a51
parent 2df67a1a1b
commit e071721a51
5 changed files with 98 additions and 34 deletions
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@ -97,9 +97,12 @@ case class StructField(
  /**
   * Returns a string containing a schema in DDL format. For example, the following value:
-   * `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
+   * `StructField("eventId", IntegerType, false)` will be converted to `eventId` INT NOT NULL.
-   *
+   * `StructField("eventId", IntegerType, true)` will be converted to `eventId` INT.
   * @since 2.4.0
   */
-  def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment"
+  def toDDL: String = {
    val nullString = if (nullable) "" else " NOT NULL"
    s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment"
  }
 }
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
        Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil
    )
-    assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT")
+    var schema = new StructType()
      .add("a", new StructType()
        .add("_1", IntegerType, true)
        .add("_2", IntegerType, true)
        .add("_3", IntegerType, true), true)
      .add("idx", IntegerType, false)
    assert(unionDf.schema == schema)
    unionDf = df1.unionByName(df2, true).unionByName(df3, true)
@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
        Row(Row(110, 111, 112, 113), 1) ::
        Row(Row(120, 121, 122, 123), 2) :: Nil // df3
    )
-    assert(unionDf.schema.toDDL ==
+    schema = new StructType()
-      "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT")
+      .add("a", new StructType()
        .add("_1", IntegerType, true)
        .add("_2", IntegerType, true)
        .add("_3", IntegerType, true)
        .add("_4", IntegerType, true), true)
      .add("idx", IntegerType, false)
    assert(unionDf.schema == schema)
  }
  test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") {
@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
    val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
    var unionDf = df1.unionByName(df2, true)
-    assert(unionDf.schema.toDDL ==
+    val schema1 = new StructType()
-      "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+      .add("id", IntegerType, false)
-        "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
+      .add("a", new StructType()
        .add("a", IntegerType, true)
        .add("b", LongType, true)
        .add("nested", new StructType()
          .add("a", IntegerType, true)
          .add("c", StringType, true)
          .add("b", LongType, true), true), true)
    assert(unionDf.schema == schema1)
    checkAnswer(unionDf,
      Row(0, Row(0, 1, Row(1, "2", null))) ::
        Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil)
    unionDf = df2.unionByName(df1, true)
-    assert(unionDf.schema.toDDL ==
+    val schema2 = new StructType()
-      "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+      .add("id", IntegerType, false)
-        "`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>")
+      .add("a", new StructType()
        .add("a", IntegerType, true)
        .add("b", LongType, true)
        .add("nested", new StructType()
          .add("a", IntegerType, true)
          .add("b", LongType, true)
          .add("c", StringType, true), true), true)
    assert(unionDf.schema== schema2)
    checkAnswer(unionDf,
      Row(1, Row(1, 2, Row(2, 3L, null))) ::
        Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil)
    val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a")
    unionDf = df1.unionByName(df3, true)
-    assert(unionDf.schema.toDDL ==
+    assert(unionDf.schema == schema1)
      "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
        "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
    checkAnswer(unionDf,
      Row(0, Row(0, 1, Row(1, "2", null))) ::
        Row(2, Row(2, 3, null)) :: Nil)
@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
      val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a")
      var unionDf = df1.unionByName(df2, true)
-      assert(unionDf.schema.toDDL ==
+      var schema = new StructType()
-        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+        .add("id", IntegerType, false)
-          "`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>")
+        .add("a", new StructType()
          .add("a", IntegerType, true)
          .add("b", LongType, true)
          .add("nested", new StructType()
            .add("a", IntegerType, true)
            .add("c", StringType, true)
            .add("A", IntegerType, true)
            .add("b", LongType, true), true), true)
      assert(unionDf.schema == schema)
      checkAnswer(unionDf,
        Row(0, Row(0, 1, Row(1, "2", null, null))) ::
          Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil)
      unionDf = df2.unionByName(df1, true)
-      assert(unionDf.schema.toDDL ==
+      schema = new StructType()
-        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+        .add("id", IntegerType, false)
-          "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>")
+        .add("a", new StructType()
          .add("a", IntegerType, true)
          .add("b", LongType, true)
          .add("nested", new StructType()
            .add("A", IntegerType, true)
            .add("b", LongType, true)
            .add("a", IntegerType, true)
            .add("c", StringType, true), true), true)
      assert(unionDf.schema == schema)
      checkAnswer(unionDf,
        Row(1, Row(1, 2, Row(2, 3L, null, null))) ::
          Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil)
      val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a")
      unionDf = df2.unionByName(df3, true)
-      assert(unionDf.schema.toDDL ==
+      schema = new StructType()
-        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+        .add("id", IntegerType, false)
-          "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>")
+        .add("a", new StructType()
          .add("a", IntegerType, true)
          .add("b", LongType, true)
          .add("nested", new StructType()
            .add("A", IntegerType, true)
            .add("b", LongType, true)
            .add("a", IntegerType, true), true), true)
      assert(unionDf.schema == schema)
      checkAnswer(unionDf,
        Row(1, Row(1, 2, Row(2, 3L, null))) ::
          Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil)
--- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@ -19,6 +19,7 @@ package org.apache.spark.sql
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.sources.SimpleInsertSource
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.util.Utils
@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
      val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
      sql(s"$createTable USING json")
      val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
-      assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)")
+      assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json")
      checkCreateTable("t1")
    }
  }
  test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") {
    val t = "SPARK_36012"
    withTable(t) {
      sql(
        s"""
           |CREATE TABLE $t (
           |  a bigint NOT NULL,
           |  b bigint
           |)
           |USING ${classOf[SimpleInsertSource].getName}
        """.stripMargin)
      val showDDL = getShowDDL(s"SHOW CREATE TABLE $t")
      assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," +
        s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}")
    }
  }
  protected def getShowDDL(showCreateTableSql: String): String = {
-    val result = sql(showCreateTableSql)
+    sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ")
      .head()
      .getString(0)
      .split("\n")
      .map(_.trim)
    if (result.length > 1) result(0) + result(1) else result.head
  }
  protected def checkCreateTable(table: String, serde: Boolean = false): Unit = {
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite
      sql(
        s"""
           |CREATE TABLE $t (
-           |  a bigint,
+           |  a bigint NOT NULL,
           |  b bigint,
           |  c bigint,
           |  `extra col` ARRAY<INT>,
@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite
      val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
      assert(showDDL === Array(
        "CREATE TABLE testcat.ns1.ns2.tbl (",
-        "`a` BIGINT,",
+        "`a` BIGINT NOT NULL,",
        "`b` BIGINT,",
        "`c` BIGINT,",
        "`extra col` ARRAY<INT>,",
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
      val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
      sql(createTable)
      val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
-      assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)")
+      assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) ==
        "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)")
      checkCreateTable("t1", serde = true)
    }