[SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE

### What changes were proposed in this pull request?
When exec the command `SHOW CREATE TABLE`, we should not lost the info null flag if the table column that
is specified `NOT NULL`

### Why are the changes needed?
[SPARK-36012](https://issues.apache.org/jira/browse/SPARK-36012)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add UT test for V1 and existed UT for V2

Closes #33219 from Peng-Lei/SPARK-36012.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
PengLei 2021-07-09 01:21:38 +08:00 committed by Wenchen Fan
parent 2df67a1a1b
commit e071721a51
5 changed files with 98 additions and 34 deletions

View file

@ -97,9 +97,12 @@ case class StructField(
/**
* Returns a string containing a schema in DDL format. For example, the following value:
* `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
*
* `StructField("eventId", IntegerType, false)` will be converted to `eventId` INT NOT NULL.
* `StructField("eventId", IntegerType, true)` will be converted to `eventId` INT.
* @since 2.4.0
*/
def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment"
def toDDL: String = {
val nullString = if (nullable) "" else " NOT NULL"
s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment"
}
}

View file

@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil
)
assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT")
var schema = new StructType()
.add("a", new StructType()
.add("_1", IntegerType, true)
.add("_2", IntegerType, true)
.add("_3", IntegerType, true), true)
.add("idx", IntegerType, false)
assert(unionDf.schema == schema)
unionDf = df1.unionByName(df2, true).unionByName(df3, true)
@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
Row(Row(110, 111, 112, 113), 1) ::
Row(Row(120, 121, 122, 123), 2) :: Nil // df3
)
assert(unionDf.schema.toDDL ==
"`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT")
schema = new StructType()
.add("a", new StructType()
.add("_1", IntegerType, true)
.add("_2", IntegerType, true)
.add("_3", IntegerType, true)
.add("_4", IntegerType, true), true)
.add("idx", IntegerType, false)
assert(unionDf.schema == schema)
}
test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") {
@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
var unionDf = df1.unionByName(df2, true)
assert(unionDf.schema.toDDL ==
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
val schema1 = new StructType()
.add("id", IntegerType, false)
.add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("a", IntegerType, true)
.add("c", StringType, true)
.add("b", LongType, true), true), true)
assert(unionDf.schema == schema1)
checkAnswer(unionDf,
Row(0, Row(0, 1, Row(1, "2", null))) ::
Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil)
unionDf = df2.unionByName(df1, true)
assert(unionDf.schema.toDDL ==
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>")
val schema2 = new StructType()
.add("id", IntegerType, false)
.add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("c", StringType, true), true), true)
assert(unionDf.schema== schema2)
checkAnswer(unionDf,
Row(1, Row(1, 2, Row(2, 3L, null))) ::
Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil)
val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a")
unionDf = df1.unionByName(df3, true)
assert(unionDf.schema.toDDL ==
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
assert(unionDf.schema == schema1)
checkAnswer(unionDf,
Row(0, Row(0, 1, Row(1, "2", null))) ::
Row(2, Row(2, 3, null)) :: Nil)
@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a")
var unionDf = df1.unionByName(df2, true)
assert(unionDf.schema.toDDL ==
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>")
var schema = new StructType()
.add("id", IntegerType, false)
.add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("a", IntegerType, true)
.add("c", StringType, true)
.add("A", IntegerType, true)
.add("b", LongType, true), true), true)
assert(unionDf.schema == schema)
checkAnswer(unionDf,
Row(0, Row(0, 1, Row(1, "2", null, null))) ::
Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil)
unionDf = df2.unionByName(df1, true)
assert(unionDf.schema.toDDL ==
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>")
schema = new StructType()
.add("id", IntegerType, false)
.add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("A", IntegerType, true)
.add("b", LongType, true)
.add("a", IntegerType, true)
.add("c", StringType, true), true), true)
assert(unionDf.schema == schema)
checkAnswer(unionDf,
Row(1, Row(1, 2, Row(2, 3L, null, null))) ::
Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil)
val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a")
unionDf = df2.unionByName(df3, true)
assert(unionDf.schema.toDDL ==
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
"`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>")
schema = new StructType()
.add("id", IntegerType, false)
.add("a", new StructType()
.add("a", IntegerType, true)
.add("b", LongType, true)
.add("nested", new StructType()
.add("A", IntegerType, true)
.add("b", LongType, true)
.add("a", IntegerType, true), true), true)
assert(unionDf.schema == schema)
checkAnswer(unionDf,
Row(1, Row(1, 2, Row(2, 3L, null))) ::
Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil)

View file

@ -19,6 +19,7 @@ package org.apache.spark.sql
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.sources.SimpleInsertSource
import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
import org.apache.spark.util.Utils
@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
sql(s"$createTable USING json")
val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)")
assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json")
checkCreateTable("t1")
}
}
test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") {
val t = "SPARK_36012"
withTable(t) {
sql(
s"""
|CREATE TABLE $t (
| a bigint NOT NULL,
| b bigint
|)
|USING ${classOf[SimpleInsertSource].getName}
""".stripMargin)
val showDDL = getShowDDL(s"SHOW CREATE TABLE $t")
assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," +
s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}")
}
}
protected def getShowDDL(showCreateTableSql: String): String = {
val result = sql(showCreateTableSql)
.head()
.getString(0)
.split("\n")
.map(_.trim)
if (result.length > 1) result(0) + result(1) else result.head
sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ")
}
protected def checkCreateTable(table: String, serde: Boolean = false): Unit = {

View file

@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite
sql(
s"""
|CREATE TABLE $t (
| a bigint,
| a bigint NOT NULL,
| b bigint,
| c bigint,
| `extra col` ARRAY<INT>,
@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite
val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
assert(showDDL === Array(
"CREATE TABLE testcat.ns1.ns2.tbl (",
"`a` BIGINT,",
"`a` BIGINT NOT NULL,",
"`b` BIGINT,",
"`c` BIGINT,",
"`extra col` ARRAY<INT>,",

View file

@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
sql(createTable)
val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)")
assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) ==
"CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)")
checkCreateTable("t1", serde = true)
}