diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 0bb1f00b7a..8bdc8379ff 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -511,8 +511,7 @@ class TypesTests(ReusedSQLTestCase): def test_parse_datatype_string(self): from pyspark.sql.types import _all_atomic_types, _parse_datatype_string for k, t in _all_atomic_types.items(): - if t != NullType: - self.assertEqual(t(), _parse_datatype_string(k)) + self.assertEqual(t(), _parse_datatype_string(k)) self.assertEqual(IntegerType(), _parse_datatype_string("int")) self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1 ,1)")) self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 10,1 )")) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index c302968741..13faf47c2b 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -107,7 +107,9 @@ class NullType(DataType, metaclass=DataTypeSingleton): The data type representing None, used for the types that cannot be inferred. """ - pass + @classmethod + def typeName(cls): + return 'void' class AtomicType(DataType): diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index ff6a49a10c..585045d898 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -195,6 +195,8 @@ object DataType { case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt) case CHAR_TYPE(length) => CharType(length.toInt) case VARCHAR_TYPE(length) => VarcharType(length.toInt) + // For backwards compatibility, previously the type name of NullType is "null" + case "null" => NullType case other => otherTypes.getOrElse( other, throw new IllegalArgumentException( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala index 14097a5280..d211fac70c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala @@ -32,6 +32,8 @@ class NullType private() extends DataType { override def defaultSize: Int = 1 private[spark] override def asNullable: NullType = this + + override def typeName: String = "void" } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 1c34b509f1..4ac82817a8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -183,6 +183,10 @@ class DataTypeSuite extends SparkFunSuite { assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType])) } + test("SPARK-36224: Backwards compatibility test for NullType.json") { + assert(DataType.fromJson("\"null\"") == NullType) + } + def checkDataTypeFromJson(dataType: DataType): Unit = { test(s"from Json - $dataType") { assert(DataType.fromJson(dataType.json) === dataType) @@ -198,6 +202,7 @@ class DataTypeSuite extends SparkFunSuite { } checkDataTypeFromJson(NullType) + checkDataTypeFromDDL(NullType) checkDataTypeFromJson(BooleanType) checkDataTypeFromDDL(BooleanType) @@ -424,6 +429,7 @@ class DataTypeSuite extends SparkFunSuite { i => StructField(s"col$i", IntegerType, nullable = true) }) + checkCatalogString(NullType) checkCatalogString(BooleanType) checkCatalogString(ByteType) checkCatalogString(ShortType) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 41692d20ed..6eafb38e7b 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -34,7 +34,7 @@ | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | -| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | +| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | @@ -223,7 +223,7 @@ | org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | -| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct | +| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct | @@ -366,4 +366,4 @@ | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('b1b2b3c1c2','a/b/text()') | structb1b2b3c1c2, a/b/text()):array> | | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('12', 'sum(a/b)') | struct12, sum(a/b)):bigint> | | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('12', 'sum(a/b)') | struct12, sum(a/b)):smallint> | -| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out index ab4bc738ce..ac740bd195 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out @@ -5,7 +5,7 @@ -- !query select null, Null, nUll -- !query schema -struct +struct -- !query output NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 3f4399fe08..3f01c8f755 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -74,7 +74,7 @@ select left(null, -2) struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7 +cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7 -- !query @@ -101,7 +101,7 @@ select right(null, -2) struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7 +cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 9943b93c43..12dcf33143 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -49,7 +49,7 @@ two 2 -- !query select * from values ("one", null), ("two", null) as data(a, b) -- !query schema -struct +struct -- !query output one NULL two NULL diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index ab4bc738ce..ac740bd195 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -5,7 +5,7 @@ -- !query select null, Null, nUll -- !query schema -struct +struct -- !query output NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out index bb08d07e5a..bf29cc26b7 100644 --- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -7,7 +7,7 @@ select typeof(null) -- !query schema struct -- !query output -null +void -- !query @@ -61,7 +61,7 @@ array map struct -- !query SELECT assert_true(true), assert_true(boolean(1)) -- !query schema -struct +struct -- !query output NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out index 1e59036b97..d3674d6bc1 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out @@ -308,7 +308,7 @@ struct<1:int> -- !query select foo.* from (select null) as foo -- !query schema -struct +struct -- !query output NULL @@ -316,7 +316,7 @@ NULL -- !query select foo.* from (select 'xyzzy',1,null) as foo -- !query schema -struct +struct -- !query output xyzzy 1 NULL diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 2387dd2441..e1c7a7f8fa 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -130,7 +130,7 @@ select concat_ws(',',10,20,null,30) struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'concat_ws(',', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array or string) type, however, '10' is of int type. argument 3 requires (array or string) type, however, '20' is of int type. argument 4 requires (array or string) type, however, 'NULL' is of null type. argument 5 requires (array or string) type, however, '30' is of int type.; line 1 pos 7 +cannot resolve 'concat_ws(',', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array or string) type, however, '10' is of int type. argument 3 requires (array or string) type, however, '20' is of int type. argument 4 requires (array or string) type, however, 'NULL' is of void type. argument 5 requires (array or string) type, however, '30' is of int type.; line 1 pos 7 -- !query @@ -139,7 +139,7 @@ select concat_ws('',10,20,null,30) struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'concat_ws('', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array or string) type, however, '10' is of int type. argument 3 requires (array or string) type, however, '20' is of int type. argument 4 requires (array or string) type, however, 'NULL' is of null type. argument 5 requires (array or string) type, however, '30' is of int type.; line 1 pos 7 +cannot resolve 'concat_ws('', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array or string) type, however, '10' is of int type. argument 3 requires (array or string) type, however, '20' is of int type. argument 4 requires (array or string) type, however, 'NULL' is of void type. argument 5 requires (array or string) type, however, '30' is of int type.; line 1 pos 7 -- !query @@ -148,7 +148,7 @@ select concat_ws(NULL,10,20,null,30) is null struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'concat_ws(CAST(NULL AS STRING), 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array or string) type, however, '10' is of int type. argument 3 requires (array or string) type, however, '20' is of int type. argument 4 requires (array or string) type, however, 'NULL' is of null type. argument 5 requires (array or string) type, however, '30' is of int type.; line 1 pos 7 +cannot resolve 'concat_ws(CAST(NULL AS STRING), 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array or string) type, however, '10' is of int type. argument 3 requires (array or string) type, however, '20' is of int type. argument 4 requires (array or string) type, however, 'NULL' is of void type. argument 5 requires (array or string) type, however, '30' is of int type.; line 1 pos 7 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out index 3549f2ccf9..065424dfd7 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out @@ -5,7 +5,7 @@ -- !query SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null) -- !query schema -struct +struct -- !query output x y NULL @@ -21,7 +21,7 @@ NULL x -- !query SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null) -- !query schema -struct +struct -- !query output x y NULL @@ -29,7 +29,7 @@ x y NULL -- !query SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null) -- !query schema -struct +struct -- !query output y x NULL diff --git a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out index 157b2cebf3..cd85308299 100644 --- a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out @@ -89,7 +89,7 @@ Table-valued function range with alternatives: range(start: long, end: long, step: long) range(start: long, end: long) range(end: long) -cannot be applied to (integer, null): Incompatible input data type. Expected: long; Found: null; line 1 pos 14 +cannot be applied to (integer, void): Incompatible input data type. Expected: long; Found: void; line 1 pos 14 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out index 78e9190820..2872f1bc10 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out @@ -49,7 +49,7 @@ two 2 -- !query select udf(a), b from values ("one", null), ("two", null) as data(a, b) -- !query schema -struct +struct -- !query output one NULL two NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 6452e6778e..c71f667939 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -421,7 +421,7 @@ class FileBasedDataSourceSuite extends QueryTest "" } def errorMessage(format: String): String = { - s"$format data source does not support null data type." + s"$format data source does not support void data type." } withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1List) { withTempDir { dir => diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index cdee33932b..bfff5d7217 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -375,7 +375,6 @@ object SparkExecuteStatementOperation { def getTableSchema(structType: StructType): TableSchema = { val schema = structType.map { field => val attrTypeString = field.dataType match { - case NullType => "void" case CalendarIntervalType => StringType.catalogString case _: YearMonthIntervalType => "interval_year_month" case _: DayTimeIntervalType => "interval_day_time" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 87cd852694..b1c83af228 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1000,7 +1000,7 @@ private[hive] object HiveClientImpl extends Logging { // When reading data in parquet, orc, or avro file format with string type for char, // the tailing spaces may lost if we are not going to pad it. val typeString = CharVarcharUtils.getRawTypeString(c.metadata) - .getOrElse(HiveVoidType.replaceVoidType(c.dataType).catalogString) + .getOrElse(c.dataType.catalogString) new FieldSchema(c.name, typeString, c.getComment().orNull) } @@ -1278,22 +1278,3 @@ private[hive] object HiveClientImpl extends Logging { hiveConf } } - -private[hive] case object HiveVoidType extends DataType { - override def defaultSize: Int = 1 - override def asNullable: DataType = HiveVoidType - override def simpleString: String = "void" - - def replaceVoidType(dt: DataType): DataType = dt match { - case ArrayType(et, nullable) => - ArrayType(replaceVoidType(et), nullable) - case MapType(kt, vt, nullable) => - MapType(replaceVoidType(kt), replaceVoidType(vt), nullable) - case StructType(fields) => - StructType(fields.map { field => - field.copy(dataType = replaceVoidType(field.dataType)) - }) - case _: NullType => HiveVoidType - case _ => dt - } -} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 7f42b3c0f5..6d8938b387 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2393,12 +2393,12 @@ class HiveDDLSuite } } - test("SPARK-36241: support creating tables with null datatype") { - // CTAS with null type + test("SPARK-36241: support creating tables with void datatype") { + // CTAS with void type withTable("t1", "t2", "t3") { assertAnalysisError( "CREATE TABLE t1 USING PARQUET AS SELECT NULL AS null_col", - "Parquet data source does not support null data type") + "Parquet data source does not support void data type") assertAnalysisError( "CREATE TABLE t2 STORED AS PARQUET AS SELECT null as null_col", @@ -2408,11 +2408,11 @@ class HiveDDLSuite checkAnswer(sql("SELECT * FROM t3"), Row(null)) } - // Create table with null type + // Create table with void type withTable("t1", "t2", "t3", "t4") { assertAnalysisError( "CREATE TABLE t1 (v VOID) USING PARQUET", - "Parquet data source does not support null data type") + "Parquet data source does not support void data type") assertAnalysisError( "CREATE TABLE t2 (v VOID) STORED AS PARQUET", @@ -2425,7 +2425,7 @@ class HiveDDLSuite checkAnswer(sql("SELECT * FROM t4"), Seq.empty) } - // Create table with null type using spark.catalog.createTable + // Create table with void type using spark.catalog.createTable withTable("t") { val schema = new StructType().add("c", NullType) spark.catalog.createTable( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index e94e0b39c8..a66c337b6e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -121,7 +121,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { msg = intercept[AnalysisException] { sql("select null").write.mode("overwrite").orc(orcDir) }.getMessage - assert(msg.contains("ORC data source does not support null data type.")) + assert(msg.contains("ORC data source does not support void data type.")) msg = intercept[AnalysisException] { spark.udf.register("testType", () => new IntervalData())