[SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE
### What changes were proposed in this pull request? When exec the command `SHOW CREATE TABLE`, we should not lost the info null flag if the table column that is specified `NOT NULL` ### Why are the changes needed? [SPARK-36012](https://issues.apache.org/jira/browse/SPARK-36012) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add UT test for V1 and existed UT for V2 Closes #33219 from Peng-Lei/SPARK-36012. Authored-by: PengLei <peng.8lei@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
2df67a1a1b
commit
e071721a51
|
@ -97,9 +97,12 @@ case class StructField(
|
|||
|
||||
/**
|
||||
* Returns a string containing a schema in DDL format. For example, the following value:
|
||||
* `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
|
||||
*
|
||||
* `StructField("eventId", IntegerType, false)` will be converted to `eventId` INT NOT NULL.
|
||||
* `StructField("eventId", IntegerType, true)` will be converted to `eventId` INT.
|
||||
* @since 2.4.0
|
||||
*/
|
||||
def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment"
|
||||
def toDDL: String = {
|
||||
val nullString = if (nullable) "" else " NOT NULL"
|
||||
s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
|
|||
Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil
|
||||
)
|
||||
|
||||
assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT")
|
||||
var schema = new StructType()
|
||||
.add("a", new StructType()
|
||||
.add("_1", IntegerType, true)
|
||||
.add("_2", IntegerType, true)
|
||||
.add("_3", IntegerType, true), true)
|
||||
.add("idx", IntegerType, false)
|
||||
assert(unionDf.schema == schema)
|
||||
|
||||
unionDf = df1.unionByName(df2, true).unionByName(df3, true)
|
||||
|
||||
|
@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
|
|||
Row(Row(110, 111, 112, 113), 1) ::
|
||||
Row(Row(120, 121, 122, 123), 2) :: Nil // df3
|
||||
)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT")
|
||||
schema = new StructType()
|
||||
.add("a", new StructType()
|
||||
.add("_1", IntegerType, true)
|
||||
.add("_2", IntegerType, true)
|
||||
.add("_3", IntegerType, true)
|
||||
.add("_4", IntegerType, true), true)
|
||||
.add("idx", IntegerType, false)
|
||||
assert(unionDf.schema == schema)
|
||||
}
|
||||
|
||||
test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") {
|
||||
|
@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
|
|||
val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
|
||||
|
||||
var unionDf = df1.unionByName(df2, true)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
|
||||
"`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
|
||||
val schema1 = new StructType()
|
||||
.add("id", IntegerType, false)
|
||||
.add("a", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("nested", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("c", StringType, true)
|
||||
.add("b", LongType, true), true), true)
|
||||
assert(unionDf.schema == schema1)
|
||||
checkAnswer(unionDf,
|
||||
Row(0, Row(0, 1, Row(1, "2", null))) ::
|
||||
Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil)
|
||||
|
||||
unionDf = df2.unionByName(df1, true)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
|
||||
"`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>")
|
||||
val schema2 = new StructType()
|
||||
.add("id", IntegerType, false)
|
||||
.add("a", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("nested", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("c", StringType, true), true), true)
|
||||
assert(unionDf.schema== schema2)
|
||||
checkAnswer(unionDf,
|
||||
Row(1, Row(1, 2, Row(2, 3L, null))) ::
|
||||
Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil)
|
||||
|
||||
val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a")
|
||||
unionDf = df1.unionByName(df3, true)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
|
||||
"`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
|
||||
assert(unionDf.schema == schema1)
|
||||
checkAnswer(unionDf,
|
||||
Row(0, Row(0, 1, Row(1, "2", null))) ::
|
||||
Row(2, Row(2, 3, null)) :: Nil)
|
||||
|
@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
|
|||
val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a")
|
||||
|
||||
var unionDf = df1.unionByName(df2, true)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
|
||||
"`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>")
|
||||
var schema = new StructType()
|
||||
.add("id", IntegerType, false)
|
||||
.add("a", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("nested", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("c", StringType, true)
|
||||
.add("A", IntegerType, true)
|
||||
.add("b", LongType, true), true), true)
|
||||
assert(unionDf.schema == schema)
|
||||
checkAnswer(unionDf,
|
||||
Row(0, Row(0, 1, Row(1, "2", null, null))) ::
|
||||
Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil)
|
||||
|
||||
unionDf = df2.unionByName(df1, true)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
|
||||
"`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>")
|
||||
schema = new StructType()
|
||||
.add("id", IntegerType, false)
|
||||
.add("a", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("nested", new StructType()
|
||||
.add("A", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("a", IntegerType, true)
|
||||
.add("c", StringType, true), true), true)
|
||||
assert(unionDf.schema == schema)
|
||||
checkAnswer(unionDf,
|
||||
Row(1, Row(1, 2, Row(2, 3L, null, null))) ::
|
||||
Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil)
|
||||
|
||||
val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a")
|
||||
unionDf = df2.unionByName(df3, true)
|
||||
assert(unionDf.schema.toDDL ==
|
||||
"`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
|
||||
"`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>")
|
||||
schema = new StructType()
|
||||
.add("id", IntegerType, false)
|
||||
.add("a", new StructType()
|
||||
.add("a", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("nested", new StructType()
|
||||
.add("A", IntegerType, true)
|
||||
.add("b", LongType, true)
|
||||
.add("a", IntegerType, true), true), true)
|
||||
assert(unionDf.schema == schema)
|
||||
checkAnswer(unionDf,
|
||||
Row(1, Row(1, 2, Row(2, 3L, null))) ::
|
||||
Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil)
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.spark.sql
|
|||
|
||||
import org.apache.spark.sql.catalyst.TableIdentifier
|
||||
import org.apache.spark.sql.catalyst.catalog.CatalogTable
|
||||
import org.apache.spark.sql.sources.SimpleInsertSource
|
||||
import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
|
||||
import org.apache.spark.util.Utils
|
||||
|
||||
|
@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
|
|||
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
|
||||
sql(s"$createTable USING json")
|
||||
val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
|
||||
assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)")
|
||||
assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json")
|
||||
|
||||
checkCreateTable("t1")
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") {
|
||||
val t = "SPARK_36012"
|
||||
withTable(t) {
|
||||
sql(
|
||||
s"""
|
||||
|CREATE TABLE $t (
|
||||
| a bigint NOT NULL,
|
||||
| b bigint
|
||||
|)
|
||||
|USING ${classOf[SimpleInsertSource].getName}
|
||||
""".stripMargin)
|
||||
val showDDL = getShowDDL(s"SHOW CREATE TABLE $t")
|
||||
assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," +
|
||||
s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}")
|
||||
}
|
||||
}
|
||||
|
||||
protected def getShowDDL(showCreateTableSql: String): String = {
|
||||
val result = sql(showCreateTableSql)
|
||||
.head()
|
||||
.getString(0)
|
||||
.split("\n")
|
||||
.map(_.trim)
|
||||
if (result.length > 1) result(0) + result(1) else result.head
|
||||
sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ")
|
||||
}
|
||||
|
||||
protected def checkCreateTable(table: String, serde: Boolean = false): Unit = {
|
||||
|
|
|
@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite
|
|||
sql(
|
||||
s"""
|
||||
|CREATE TABLE $t (
|
||||
| a bigint,
|
||||
| a bigint NOT NULL,
|
||||
| b bigint,
|
||||
| c bigint,
|
||||
| `extra col` ARRAY<INT>,
|
||||
|
@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite
|
|||
val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
|
||||
assert(showDDL === Array(
|
||||
"CREATE TABLE testcat.ns1.ns2.tbl (",
|
||||
"`a` BIGINT,",
|
||||
"`a` BIGINT NOT NULL,",
|
||||
"`b` BIGINT,",
|
||||
"`c` BIGINT,",
|
||||
"`extra col` ARRAY<INT>,",
|
||||
|
|
|
@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
|
|||
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
|
||||
sql(createTable)
|
||||
val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
|
||||
assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)")
|
||||
assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) ==
|
||||
"CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)")
|
||||
|
||||
checkCreateTable("t1", serde = true)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue