diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala index f4c7370be5..93d57a7fe6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -97,9 +97,12 @@ case class StructField( /** * Returns a string containing a schema in DDL format. For example, the following value: - * `StructField("eventId", IntegerType)` will be converted to `eventId` INT. - * + * `StructField("eventId", IntegerType, false)` will be converted to `eventId` INT NOT NULL. + * `StructField("eventId", IntegerType, true)` will be converted to `eventId` INT. * @since 2.4.0 */ - def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment" + def toDDL: String = { + val nullString = if (nullable) "" else " NOT NULL" + s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment" + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala index fcd3e8315b..e3259a2460 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala @@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil ) - assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT") + var schema = new StructType() + .add("a", new StructType() + .add("_1", IntegerType, true) + .add("_2", IntegerType, true) + .add("_3", IntegerType, true), true) + .add("idx", IntegerType, false) + assert(unionDf.schema == schema) unionDf = df1.unionByName(df2, true).unionByName(df3, true) @@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { Row(Row(110, 111, 112, 113), 1) :: Row(Row(120, 121, 122, 123), 2) :: Nil // df3 ) - assert(unionDf.schema.toDDL == - "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT") + schema = new StructType() + .add("a", new StructType() + .add("_1", IntegerType, true) + .add("_2", IntegerType, true) + .add("_3", IntegerType, true) + .add("_4", IntegerType, true), true) + .add("idx", IntegerType, false) + assert(unionDf.schema == schema) } test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") { @@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a") var unionDf = df1.unionByName(df2, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>") + val schema1 = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("a", IntegerType, true) + .add("c", StringType, true) + .add("b", LongType, true), true), true) + assert(unionDf.schema == schema1) checkAnswer(unionDf, Row(0, Row(0, 1, Row(1, "2", null))) :: Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil) unionDf = df2.unionByName(df1, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>") + val schema2 = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("c", StringType, true), true), true) + assert(unionDf.schema== schema2) checkAnswer(unionDf, Row(1, Row(1, 2, Row(2, 3L, null))) :: Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil) val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a") unionDf = df1.unionByName(df3, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>") + assert(unionDf.schema == schema1) checkAnswer(unionDf, Row(0, Row(0, 1, Row(1, "2", null))) :: Row(2, Row(2, 3, null)) :: Nil) @@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a") var unionDf = df1.unionByName(df2, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>") + var schema = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("a", IntegerType, true) + .add("c", StringType, true) + .add("A", IntegerType, true) + .add("b", LongType, true), true), true) + assert(unionDf.schema == schema) checkAnswer(unionDf, Row(0, Row(0, 1, Row(1, "2", null, null))) :: Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil) unionDf = df2.unionByName(df1, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>") + schema = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("A", IntegerType, true) + .add("b", LongType, true) + .add("a", IntegerType, true) + .add("c", StringType, true), true), true) + assert(unionDf.schema == schema) checkAnswer(unionDf, Row(1, Row(1, 2, Row(2, 3L, null, null))) :: Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil) val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a") unionDf = df2.unionByName(df3, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>") + schema = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("A", IntegerType, true) + .add("b", LongType, true) + .add("a", IntegerType, true), true), true) + assert(unionDf.schema == schema) checkAnswer(unionDf, Row(1, Row(1, 2, Row(2, 3L, null))) :: Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala index 5ce5d36c5e..6839294348 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.sources.SimpleInsertSource import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} import org.apache.spark.util.Utils @@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)" sql(s"$createTable USING json") val shownDDL = getShowDDL("SHOW CREATE TABLE t1") - assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)") + assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json") checkCreateTable("t1") } } + test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") { + val t = "SPARK_36012" + withTable(t) { + sql( + s""" + |CREATE TABLE $t ( + | a bigint NOT NULL, + | b bigint + |) + |USING ${classOf[SimpleInsertSource].getName} + """.stripMargin) + val showDDL = getShowDDL(s"SHOW CREATE TABLE $t") + assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," + + s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}") + } + } + protected def getShowDDL(showCreateTableSql: String): String = { - val result = sql(showCreateTableSql) - .head() - .getString(0) - .split("\n") - .map(_.trim) - if (result.length > 1) result(0) + result(1) else result.head + sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ") } protected def checkCreateTable(table: String, serde: Boolean = false): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index bdf198bbf6..4f1f4c2389 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite sql( s""" |CREATE TABLE $t ( - | a bigint, + | a bigint NOT NULL, | b bigint, | c bigint, | `extra col` ARRAY, @@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t") assert(showDDL === Array( "CREATE TABLE testcat.ns1.ns2.tbl (", - "`a` BIGINT,", + "`a` BIGINT NOT NULL,", "`b` BIGINT,", "`c` BIGINT,", "`extra col` ARRAY,", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala index 2fb67c793d..e3a1034ad4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala @@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive" sql(createTable) val shownDDL = getShowDDL("SHOW CREATE TABLE t1") - assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)") + assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) == + "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)") checkCreateTable("t1", serde = true) }