[SPARK-34614][SQL] ANSI mode: Casting String to Boolean should throw exception on parse error
### What changes were proposed in this pull request? In ANSI mode, casting String to Boolean should throw an exception on parse error, instead of returning null ### Why are the changes needed? For better ANSI compliance ### Does this PR introduce _any_ user-facing change? Yes, in ANSI mode there will be an exception on parse failure of casting String value to Boolean type. ### How was this patch tested? Unit tests. Closes #31734 from gengliangwang/ansiCastToBoolean. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
This commit is contained in:
parent
53e4dba7c4
commit
2b1c170016
|
@ -165,6 +165,7 @@ The behavior of some SQL operators can be different under ANSI mode (`spark.sql.
|
|||
- `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
|
||||
- `CAST(string_col AS TIMESTAMP)`: This operator should fail with an exception if the input string can't be parsed.
|
||||
- `CAST(string_col AS DATE)`: This operator should fail with an exception if the input string can't be parsed.
|
||||
- `CAST(string_col AS BOOLEAN)`: This operator should fail with an exception if the input string can't be parsed.
|
||||
|
||||
### SQL Keywords
|
||||
|
||||
|
|
|
@ -426,9 +426,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
true
|
||||
} else if (StringUtils.isFalseString(s)) {
|
||||
false
|
||||
} else {
|
||||
if (ansiEnabled) {
|
||||
throw new UnsupportedOperationException(s"invalid input syntax for type boolean: $s")
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
})
|
||||
case TimestampType =>
|
||||
buildCast[Long](_, t => t != 0)
|
||||
|
@ -1349,13 +1353,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
case StringType =>
|
||||
val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}"
|
||||
(c, evPrim, evNull) =>
|
||||
val castFailureCode = if (ansiEnabled) {
|
||||
val errorMessage = s""""invalid input syntax for type boolean: " + $c"""
|
||||
s"throw new java.lang.UnsupportedOperationException($errorMessage);"
|
||||
} else {
|
||||
s"$evNull = true;"
|
||||
}
|
||||
code"""
|
||||
if ($stringUtils.isTrueString($c)) {
|
||||
$evPrim = true;
|
||||
} else if ($stringUtils.isFalseString($c)) {
|
||||
$evPrim = false;
|
||||
} else {
|
||||
$evNull = true;
|
||||
$castFailureCode
|
||||
}
|
||||
"""
|
||||
case TimestampType =>
|
||||
|
|
|
@ -351,12 +351,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
|
||||
checkNullCast(ArrayType(StringType), ArrayType(IntegerType))
|
||||
|
||||
{
|
||||
val ret = cast(array, ArrayType(BooleanType, containsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Seq(null, true, false, null))
|
||||
}
|
||||
|
||||
{
|
||||
val array = Literal.create(Seq.empty, ArrayType(NullType, containsNull = false))
|
||||
val ret = cast(array, ArrayType(IntegerType, containsNull = false))
|
||||
|
@ -369,11 +363,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
assert(ret.resolved === false)
|
||||
}
|
||||
|
||||
{
|
||||
val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Seq(null, true, false))
|
||||
}
|
||||
{
|
||||
val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
|
||||
assert(ret.resolved === false)
|
||||
|
@ -395,11 +384,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
|
||||
checkNullCast(MapType(StringType, IntegerType), MapType(StringType, StringType))
|
||||
|
||||
{
|
||||
val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
|
||||
}
|
||||
{
|
||||
val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = false))
|
||||
assert(ret.resolved === false)
|
||||
|
@ -408,11 +392,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
|
||||
assert(ret.resolved === false)
|
||||
}
|
||||
{
|
||||
val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false))
|
||||
}
|
||||
{
|
||||
val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
|
||||
assert(ret.resolved === false)
|
||||
|
@ -458,15 +437,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
StructField("b", StringType, nullable = false),
|
||||
StructField("c", StringType, nullable = false))))
|
||||
|
||||
{
|
||||
val ret = cast(struct, StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
StructField("b", BooleanType, nullable = true),
|
||||
StructField("c", BooleanType, nullable = true),
|
||||
StructField("d", BooleanType, nullable = true))))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, InternalRow(null, true, false, null))
|
||||
}
|
||||
{
|
||||
val ret = cast(struct, StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
|
@ -476,14 +446,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
assert(ret.resolved === false)
|
||||
}
|
||||
|
||||
{
|
||||
val ret = cast(struct_notNull, StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
StructField("b", BooleanType, nullable = true),
|
||||
StructField("c", BooleanType, nullable = true))))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, InternalRow(null, true, false))
|
||||
}
|
||||
{
|
||||
val ret = cast(struct_notNull, StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
|
@ -571,9 +533,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
checkCast("n", false)
|
||||
checkCast("no", false)
|
||||
checkCast("0", false)
|
||||
|
||||
checkEvaluation(cast("abc", BooleanType), null)
|
||||
checkEvaluation(cast("", BooleanType), null)
|
||||
}
|
||||
|
||||
protected def checkInvalidCastFromNumericType(to: DataType): Unit = {
|
||||
|
@ -955,6 +914,114 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
|
|||
"invalid input syntax for type numeric")
|
||||
}
|
||||
|
||||
protected def checkCastToBooleanError(l: Literal, to: DataType): Unit = {
|
||||
checkExceptionInExpression[UnsupportedOperationException](
|
||||
cast(l, to), s"invalid input syntax for type boolean")
|
||||
}
|
||||
|
||||
test("ANSI mode: cast string to boolean with parse error") {
|
||||
checkCastToBooleanError(Literal("abc"), BooleanType)
|
||||
checkCastToBooleanError(Literal(""), BooleanType)
|
||||
}
|
||||
|
||||
test("cast from array II") {
|
||||
val array = Literal.create(Seq("123", "true", "f", null),
|
||||
ArrayType(StringType, containsNull = true))
|
||||
val array_notNull = Literal.create(Seq("123", "true", "f"),
|
||||
ArrayType(StringType, containsNull = false))
|
||||
|
||||
{
|
||||
val to: DataType = ArrayType(BooleanType, containsNull = true)
|
||||
val ret = cast(array, to)
|
||||
assert(ret.resolved)
|
||||
checkCastToBooleanError(array, to)
|
||||
}
|
||||
|
||||
{
|
||||
val to: DataType = ArrayType(BooleanType, containsNull = true)
|
||||
val ret = cast(array_notNull, to)
|
||||
assert(ret.resolved)
|
||||
checkCastToBooleanError(array_notNull, to)
|
||||
}
|
||||
}
|
||||
|
||||
test("cast from map II") {
|
||||
val map = Literal.create(
|
||||
Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
|
||||
MapType(StringType, StringType, valueContainsNull = true))
|
||||
val map_notNull = Literal.create(
|
||||
Map("a" -> "123", "b" -> "true", "c" -> "f"),
|
||||
MapType(StringType, StringType, valueContainsNull = false))
|
||||
|
||||
checkNullCast(MapType(StringType, IntegerType), MapType(StringType, StringType))
|
||||
|
||||
{
|
||||
val to: DataType = MapType(StringType, BooleanType, valueContainsNull = true)
|
||||
val ret = cast(map, to)
|
||||
assert(ret.resolved)
|
||||
checkCastToBooleanError(map, to)
|
||||
}
|
||||
|
||||
{
|
||||
val to: DataType = MapType(StringType, BooleanType, valueContainsNull = true)
|
||||
val ret = cast(map_notNull, to)
|
||||
assert(ret.resolved)
|
||||
checkCastToBooleanError(map_notNull, to)
|
||||
}
|
||||
}
|
||||
|
||||
test("cast from struct II") {
|
||||
checkNullCast(
|
||||
StructType(Seq(
|
||||
StructField("a", StringType),
|
||||
StructField("b", IntegerType))),
|
||||
StructType(Seq(
|
||||
StructField("a", StringType),
|
||||
StructField("b", StringType))))
|
||||
|
||||
val struct = Literal.create(
|
||||
InternalRow(
|
||||
UTF8String.fromString("123"),
|
||||
UTF8String.fromString("true"),
|
||||
UTF8String.fromString("f"),
|
||||
null),
|
||||
StructType(Seq(
|
||||
StructField("a", StringType, nullable = true),
|
||||
StructField("b", StringType, nullable = true),
|
||||
StructField("c", StringType, nullable = true),
|
||||
StructField("d", StringType, nullable = true))))
|
||||
val struct_notNull = Literal.create(
|
||||
InternalRow(
|
||||
UTF8String.fromString("123"),
|
||||
UTF8String.fromString("true"),
|
||||
UTF8String.fromString("f")),
|
||||
StructType(Seq(
|
||||
StructField("a", StringType, nullable = false),
|
||||
StructField("b", StringType, nullable = false),
|
||||
StructField("c", StringType, nullable = false))))
|
||||
|
||||
{
|
||||
val to: DataType = StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
StructField("b", BooleanType, nullable = true),
|
||||
StructField("c", BooleanType, nullable = true),
|
||||
StructField("d", BooleanType, nullable = true)))
|
||||
val ret = cast(struct, to)
|
||||
assert(ret.resolved)
|
||||
checkCastToBooleanError(struct, to)
|
||||
}
|
||||
|
||||
{
|
||||
val to: DataType = StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
StructField("b", BooleanType, nullable = true),
|
||||
StructField("c", BooleanType, nullable = true)))
|
||||
val ret = cast(struct_notNull, to)
|
||||
assert(ret.resolved)
|
||||
checkCastToBooleanError(struct_notNull, to)
|
||||
}
|
||||
}
|
||||
|
||||
test("ANSI mode: cast string to timestamp with parse error") {
|
||||
val activeConf = conf
|
||||
DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
|
||||
|
@ -1185,6 +1252,101 @@ class CastSuite extends CastSuiteBase {
|
|||
StructType(StructField("a", IntegerType, true) :: Nil)))
|
||||
}
|
||||
|
||||
test("cast string to boolean II") {
|
||||
checkEvaluation(cast("abc", BooleanType), null)
|
||||
checkEvaluation(cast("", BooleanType), null)
|
||||
}
|
||||
|
||||
test("cast from array II") {
|
||||
val array = Literal.create(Seq("123", "true", "f", null),
|
||||
ArrayType(StringType, containsNull = true))
|
||||
val array_notNull = Literal.create(Seq("123", "true", "f"),
|
||||
ArrayType(StringType, containsNull = false))
|
||||
|
||||
{
|
||||
val ret = cast(array, ArrayType(BooleanType, containsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Seq(null, true, false, null))
|
||||
}
|
||||
|
||||
{
|
||||
val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Seq(null, true, false))
|
||||
}
|
||||
}
|
||||
|
||||
test("cast from map II") {
|
||||
val map = Literal.create(
|
||||
Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
|
||||
MapType(StringType, StringType, valueContainsNull = true))
|
||||
val map_notNull = Literal.create(
|
||||
Map("a" -> "123", "b" -> "true", "c" -> "f"),
|
||||
MapType(StringType, StringType, valueContainsNull = false))
|
||||
|
||||
{
|
||||
val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
|
||||
}
|
||||
|
||||
{
|
||||
val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false))
|
||||
}
|
||||
}
|
||||
|
||||
test("cast from struct II") {
|
||||
checkNullCast(
|
||||
StructType(Seq(
|
||||
StructField("a", StringType),
|
||||
StructField("b", IntegerType))),
|
||||
StructType(Seq(
|
||||
StructField("a", StringType),
|
||||
StructField("b", StringType))))
|
||||
|
||||
val struct = Literal.create(
|
||||
InternalRow(
|
||||
UTF8String.fromString("123"),
|
||||
UTF8String.fromString("true"),
|
||||
UTF8String.fromString("f"),
|
||||
null),
|
||||
StructType(Seq(
|
||||
StructField("a", StringType, nullable = true),
|
||||
StructField("b", StringType, nullable = true),
|
||||
StructField("c", StringType, nullable = true),
|
||||
StructField("d", StringType, nullable = true))))
|
||||
val struct_notNull = Literal.create(
|
||||
InternalRow(
|
||||
UTF8String.fromString("123"),
|
||||
UTF8String.fromString("true"),
|
||||
UTF8String.fromString("f")),
|
||||
StructType(Seq(
|
||||
StructField("a", StringType, nullable = false),
|
||||
StructField("b", StringType, nullable = false),
|
||||
StructField("c", StringType, nullable = false))))
|
||||
|
||||
{
|
||||
val ret = cast(struct, StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
StructField("b", BooleanType, nullable = true),
|
||||
StructField("c", BooleanType, nullable = true),
|
||||
StructField("d", BooleanType, nullable = true))))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, InternalRow(null, true, false, null))
|
||||
}
|
||||
|
||||
{
|
||||
val ret = cast(struct_notNull, StructType(Seq(
|
||||
StructField("a", BooleanType, nullable = true),
|
||||
StructField("b", BooleanType, nullable = true),
|
||||
StructField("c", BooleanType, nullable = true))))
|
||||
assert(ret.resolved)
|
||||
checkEvaluation(ret, InternalRow(null, true, false))
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-31227: Non-nullable null type should not coerce to nullable type") {
|
||||
TypeCoercionSuite.allTypes.foreach { t =>
|
||||
assert(Cast.canCast(ArrayType(NullType, false), ArrayType(t, false)))
|
||||
|
|
|
@ -53,9 +53,10 @@ true
|
|||
-- !query
|
||||
SELECT boolean('test') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: test
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -69,9 +70,10 @@ false
|
|||
-- !query
|
||||
SELECT boolean('foo') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: foo
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -93,9 +95,10 @@ true
|
|||
-- !query
|
||||
SELECT boolean('yeah') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: yeah
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -117,57 +120,64 @@ false
|
|||
-- !query
|
||||
SELECT boolean('nay') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: nay
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('on') AS true
|
||||
-- !query schema
|
||||
struct<true:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: on
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('off') AS `false`
|
||||
-- !query schema
|
||||
struct<false:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: off
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('of') AS `false`
|
||||
-- !query schema
|
||||
struct<false:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: of
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('o') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: o
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('on_') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: on_
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('off_') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: off_
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -181,9 +191,10 @@ true
|
|||
-- !query
|
||||
SELECT boolean('11') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: 11
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -197,17 +208,19 @@ false
|
|||
-- !query
|
||||
SELECT boolean('000') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: 000
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean('') AS error
|
||||
-- !query schema
|
||||
struct<error:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean:
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -310,17 +323,19 @@ true false
|
|||
-- !query
|
||||
SELECT boolean(string(' tru e ')) AS invalid
|
||||
-- !query schema
|
||||
struct<invalid:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean: tru e
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT boolean(string('')) AS invalid
|
||||
-- !query schema
|
||||
struct<invalid:boolean>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.lang.UnsupportedOperationException
|
||||
invalid input syntax for type boolean:
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -463,7 +478,8 @@ INSERT INTO BOOLTBL2
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
|
||||
org.apache.spark.sql.AnalysisException
|
||||
failed to evaluate expression CAST('XXX' AS BOOLEAN): invalid input syntax for type boolean: XXX; line 2 pos 3
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -471,7 +487,6 @@ SELECT '' AS f_4, BOOLTBL2.* FROM BOOLTBL2
|
|||
-- !query schema
|
||||
struct<f_4:string,f1:boolean>
|
||||
-- !query output
|
||||
NULL
|
||||
false
|
||||
false
|
||||
false
|
||||
|
@ -545,9 +560,6 @@ struct<tf_12_ff_4:string,f1:boolean,f1:boolean>
|
|||
false false
|
||||
false false
|
||||
false false
|
||||
true NULL
|
||||
true NULL
|
||||
true NULL
|
||||
true false
|
||||
true false
|
||||
true false
|
||||
|
@ -623,7 +635,7 @@ SELECT '' AS `Not False`, f1
|
|||
-- !query schema
|
||||
struct<Not False:string,f1:boolean>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -646,7 +658,6 @@ SELECT '' AS `Not True`, f1
|
|||
-- !query schema
|
||||
struct<Not True:string,f1:boolean>
|
||||
-- !query output
|
||||
NULL
|
||||
false
|
||||
false
|
||||
false
|
||||
|
|
Loading…
Reference in a new issue