[SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE

### What changes were proposed in this pull request?
When exec the command `SHOW CREATE TABLE`, we should not lost the info null flag if the table column that
is specified `NOT NULL`

### Why are the changes needed?
[SPARK-36012](https://issues.apache.org/jira/browse/SPARK-36012)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add UT test for V1 and existed UT for V2

Closes #33219 from Peng-Lei/SPARK-36012.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
PengLei 2021-07-09 01:21:38 +08:00 committed by Wenchen Fan
parent 2df67a1a1b
commit e071721a51
5 changed files with 98 additions and 34 deletions

View file

@ -97,9 +97,12 @@ case class StructField(
/** /**
* Returns a string containing a schema in DDL format. For example, the following value: * Returns a string containing a schema in DDL format. For example, the following value:
* `StructField("eventId", IntegerType)` will be converted to `eventId` INT. * `StructField("eventId", IntegerType, false)` will be converted to `eventId` INT NOT NULL.
* * `StructField("eventId", IntegerType, true)` will be converted to `eventId` INT.
* @since 2.4.0 * @since 2.4.0
*/ */
def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment" def toDDL: String = {
val nullString = if (nullable) "" else " NOT NULL"
s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment"
}
} }

View file

@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil
) )
assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT") var schema = new StructType()
.add("a", new StructType()
.add("_1", IntegerType, true)
.add("_2", IntegerType, true)
.add("_3", IntegerType, true), true)
.add("idx", IntegerType, false)
assert(unionDf.schema == schema)
unionDf = df1.unionByName(df2, true).unionByName(df3, true) unionDf = df1.unionByName(df2, true).unionByName(df3, true)
@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
Row(Row(110, 111, 112, 113), 1) :: Row(Row(110, 111, 112, 113), 1) ::
Row(Row(120, 121, 122, 123), 2) :: Nil // df3 Row(Row(120, 121, 122, 123), 2) :: Nil // df3
) )
assert(unionDf.schema.toDDL == schema = new StructType()
"`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT") .add("a", new StructType()
.add("_1", IntegerType, true)
.add("_2", IntegerType, true)
.add("_3", IntegerType, true)
.add("_4", IntegerType, true), true)
.add("idx", IntegerType, false)
assert(unionDf.schema == schema)
} }
test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") { test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") {
@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a") val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
var unionDf = df1.unionByName(df2, true) var unionDf = df1.unionByName(df2, true)
assert(unionDf.schema.toDDL == val schema1 = new StructType()
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + .add("id", IntegerType, false)
"`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>") .add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("a", IntegerType, true)
.add("c", StringType, true)
.add("b", LongType, true), true), true)
assert(unionDf.schema == schema1)
checkAnswer(unionDf, checkAnswer(unionDf,
Row(0, Row(0, 1, Row(1, "2", null))) :: Row(0, Row(0, 1, Row(1, "2", null))) ::
Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil) Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil)
unionDf = df2.unionByName(df1, true) unionDf = df2.unionByName(df1, true)
assert(unionDf.schema.toDDL == val schema2 = new StructType()
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + .add("id", IntegerType, false)
"`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>") .add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("c", StringType, true), true), true)
assert(unionDf.schema== schema2)
checkAnswer(unionDf, checkAnswer(unionDf,
Row(1, Row(1, 2, Row(2, 3L, null))) :: Row(1, Row(1, 2, Row(2, 3L, null))) ::
Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil) Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil)
val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a") val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a")
unionDf = df1.unionByName(df3, true) unionDf = df1.unionByName(df3, true)
assert(unionDf.schema.toDDL == assert(unionDf.schema == schema1)
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
checkAnswer(unionDf, checkAnswer(unionDf,
Row(0, Row(0, 1, Row(1, "2", null))) :: Row(0, Row(0, 1, Row(1, "2", null))) ::
Row(2, Row(2, 3, null)) :: Nil) Row(2, Row(2, 3, null)) :: Nil)
@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a") val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a")
var unionDf = df1.unionByName(df2, true) var unionDf = df1.unionByName(df2, true)
assert(unionDf.schema.toDDL == var schema = new StructType()
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + .add("id", IntegerType, false)
"`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>") .add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("a", IntegerType, true)
.add("c", StringType, true)
.add("A", IntegerType, true)
.add("b", LongType, true), true), true)
assert(unionDf.schema == schema)
checkAnswer(unionDf, checkAnswer(unionDf,
Row(0, Row(0, 1, Row(1, "2", null, null))) :: Row(0, Row(0, 1, Row(1, "2", null, null))) ::
Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil) Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil)
unionDf = df2.unionByName(df1, true) unionDf = df2.unionByName(df1, true)
assert(unionDf.schema.toDDL == schema = new StructType()
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + .add("id", IntegerType, false)
"`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>") .add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("A", IntegerType, true)
.add("b", LongType, true)
.add("a", IntegerType, true)
.add("c", StringType, true), true), true)
assert(unionDf.schema == schema)
checkAnswer(unionDf, checkAnswer(unionDf,
Row(1, Row(1, 2, Row(2, 3L, null, null))) :: Row(1, Row(1, 2, Row(2, 3L, null, null))) ::
Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil) Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil)
val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a") val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a")
unionDf = df2.unionByName(df3, true) unionDf = df2.unionByName(df3, true)
assert(unionDf.schema.toDDL == schema = new StructType()
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + .add("id", IntegerType, false)
"`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>") .add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("A", IntegerType, true)
.add("b", LongType, true)
.add("a", IntegerType, true), true), true)
assert(unionDf.schema == schema)
checkAnswer(unionDf, checkAnswer(unionDf,
Row(1, Row(1, 2, Row(2, 3L, null))) :: Row(1, Row(1, 2, Row(2, 3L, null))) ::
Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil) Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil)

View file

@ -19,6 +19,7 @@ package org.apache.spark.sql
import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.sources.SimpleInsertSource
import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
import org.apache.spark.util.Utils import org.apache.spark.util.Utils
@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)" val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
sql(s"$createTable USING json") sql(s"$createTable USING json")
val shownDDL = getShowDDL("SHOW CREATE TABLE t1") val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)") assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json")
checkCreateTable("t1") checkCreateTable("t1")
} }
} }
test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") {
val t = "SPARK_36012"
withTable(t) {
sql(
s"""
|CREATE TABLE $t (
| a bigint NOT NULL,
| b bigint
|)
|USING ${classOf[SimpleInsertSource].getName}
""".stripMargin)
val showDDL = getShowDDL(s"SHOW CREATE TABLE $t")
assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," +
s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}")
}
}
protected def getShowDDL(showCreateTableSql: String): String = { protected def getShowDDL(showCreateTableSql: String): String = {
val result = sql(showCreateTableSql) sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ")
.head()
.getString(0)
.split("\n")
.map(_.trim)
if (result.length > 1) result(0) + result(1) else result.head
} }
protected def checkCreateTable(table: String, serde: Boolean = false): Unit = { protected def checkCreateTable(table: String, serde: Boolean = false): Unit = {

View file

@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite
sql( sql(
s""" s"""
|CREATE TABLE $t ( |CREATE TABLE $t (
| a bigint, | a bigint NOT NULL,
| b bigint, | b bigint,
| c bigint, | c bigint,
| `extra col` ARRAY<INT>, | `extra col` ARRAY<INT>,
@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite
val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t") val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
assert(showDDL === Array( assert(showDDL === Array(
"CREATE TABLE testcat.ns1.ns2.tbl (", "CREATE TABLE testcat.ns1.ns2.tbl (",
"`a` BIGINT,", "`a` BIGINT NOT NULL,",
"`b` BIGINT,", "`b` BIGINT,",
"`c` BIGINT,", "`c` BIGINT,",
"`extra col` ARRAY<INT>,", "`extra col` ARRAY<INT>,",

View file

@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive" val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
sql(createTable) sql(createTable)
val shownDDL = getShowDDL("SHOW CREATE TABLE t1") val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)") assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) ==
"CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)")
checkCreateTable("t1", serde = true) checkCreateTable("t1", serde = true)
} }