[SPARK-34856][SQL] ANSI mode: Allow casting complex types as string type
### What changes were proposed in this pull request? Allow casting complex types as string type in ANSI mode. ### Why are the changes needed? Currently, complex types are not allowed to cast as string type. This breaks the DataFrame.show() API. E.g ``` scala> sql(“select array(1, 2, 2)“).show(false) org.apache.spark.sql.AnalysisException: cannot resolve ‘CAST(`array(1, 2, 2)` AS STRING)’ due to data type mismatch: cannot cast array<int> to string with ANSI mode on. ``` We should allow the conversion as the extension of the ANSI SQL standard, so that the DataFrame.show() still work in ANSI mode. ### Does this PR introduce _any_ user-facing change? Yes, casting complex types as string type is now allowed in ANSI mode. ### How was this patch tested? Unit tests. Closes #31954 from gengliangwang/fixExplicitCast. Authored-by: Gengliang Wang <ltnwgl@gmail.com> Signed-off-by: Gengliang Wang <ltnwgl@gmail.com>
This commit is contained in:
parent
0d91f9c3f3
commit
0515f49018
|
@ -76,6 +76,9 @@ The type conversion of Spark ANSI mode follows the syntax rules of section 6.13
|
|||
straightforward type conversions which are disallowed as per the ANSI standard:
|
||||
* NumericType <=> BooleanType
|
||||
* StringType <=> BinaryType
|
||||
* ArrayType => String
|
||||
* MapType => String
|
||||
* StructType => String
|
||||
|
||||
The valid combinations of target data type and source data type in a `CAST` expression are given by the following table.
|
||||
“Y” indicates that the combination is syntactically valid without restriction and “N” indicates that the combination is not valid.
|
||||
|
@ -89,9 +92,9 @@ The type conversion of Spark ANSI mode follows the syntax rules of section 6.13
|
|||
| Interval | N | Y | N | N | Y | N | N | N | N | N |
|
||||
| Boolean | Y | Y | N | N | N | Y | N | N | N | N |
|
||||
| Binary | N | Y | N | N | N | N | Y | N | N | N |
|
||||
| Array | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N | N |
|
||||
| Map | N | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N |
|
||||
| Struct | N | N | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> |
|
||||
| Array | N | Y | N | N | N | N | N | <span style="color:red">**Y**</span> | N | N |
|
||||
| Map | N | Y | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N |
|
||||
| Struct | N | Y | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> |
|
||||
|
||||
In the table above, all the `CAST`s that can cause runtime exceptions are marked as red <span style="color:red">**Y**</span>:
|
||||
* CAST(Numeric AS Numeric): raise an overflow exception if the value is out of the target data type's range.
|
||||
|
|
|
@ -1873,6 +1873,8 @@ object AnsiCast {
|
|||
|
||||
case (NullType, _) => true
|
||||
|
||||
case (_, StringType) => true
|
||||
|
||||
case (StringType, _: BinaryType) => true
|
||||
|
||||
case (StringType, BooleanType) => true
|
||||
|
@ -1890,13 +1892,6 @@ object AnsiCast {
|
|||
case (StringType, _: NumericType) => true
|
||||
case (BooleanType, _: NumericType) => true
|
||||
|
||||
case (_: NumericType, StringType) => true
|
||||
case (_: DateType, StringType) => true
|
||||
case (_: TimestampType, StringType) => true
|
||||
case (_: CalendarIntervalType, StringType) => true
|
||||
case (BooleanType, StringType) => true
|
||||
case (BinaryType, StringType) => true
|
||||
|
||||
case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
|
||||
canCast(fromType, toType) &&
|
||||
resolvableNullability(fn || forceNullable(fromType, toType), tn)
|
||||
|
|
|
@ -686,6 +686,117 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
checkEvaluation(cast(value, DoubleType), Double.NaN)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-22825 Cast array to string") {
|
||||
val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
|
||||
checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
|
||||
val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
|
||||
checkEvaluation(ret2, "[ab, cde, f]")
|
||||
Seq(false, true).foreach { omitNull =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
|
||||
val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
|
||||
checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
|
||||
}
|
||||
}
|
||||
val ret4 =
|
||||
cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
|
||||
checkEvaluation(ret4, "[ab, cde, f]")
|
||||
val ret5 = cast(
|
||||
Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
|
||||
StringType)
|
||||
checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
|
||||
val ret6 = cast(
|
||||
Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
|
||||
.map(Timestamp.valueOf)),
|
||||
StringType)
|
||||
checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
|
||||
val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
|
||||
checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
|
||||
val ret8 = cast(
|
||||
Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
|
||||
StringType)
|
||||
checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
|
||||
}
|
||||
|
||||
test("SPARK-33291: Cast array with null elements to string") {
|
||||
Seq(false, true).foreach { omitNull =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
|
||||
val ret1 = cast(Literal.create(Array(null, null)), StringType)
|
||||
checkEvaluation(
|
||||
ret1,
|
||||
s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-22973 Cast map to string") {
|
||||
Seq(
|
||||
false -> ("{", "}"),
|
||||
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
|
||||
val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
|
||||
checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
|
||||
val ret2 = cast(
|
||||
Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
|
||||
StringType)
|
||||
checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
|
||||
val ret3 = cast(
|
||||
Literal.create(Map(
|
||||
1 -> Date.valueOf("2014-12-03"),
|
||||
2 -> Date.valueOf("2014-12-04"),
|
||||
3 -> Date.valueOf("2014-12-05"))),
|
||||
StringType)
|
||||
checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
|
||||
val ret4 = cast(
|
||||
Literal.create(Map(
|
||||
1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
|
||||
2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
|
||||
StringType)
|
||||
checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
|
||||
val ret5 = cast(
|
||||
Literal.create(Map(
|
||||
1 -> Array(1, 2, 3),
|
||||
2 -> Array(4, 5, 6))),
|
||||
StringType)
|
||||
checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-22981 Cast struct to string") {
|
||||
Seq(
|
||||
false -> ("{", "}"),
|
||||
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
|
||||
val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
|
||||
checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
|
||||
val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
|
||||
checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
|
||||
val ret3 = cast(Literal.create(
|
||||
(Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
|
||||
checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
|
||||
val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
|
||||
checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
|
||||
val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
|
||||
checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
|
||||
val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
|
||||
checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-33291: Cast struct with null elements to string") {
|
||||
Seq(
|
||||
false -> ("{", "}"),
|
||||
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
|
||||
val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
|
||||
checkEvaluation(
|
||||
ret1,
|
||||
s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
abstract class AnsiCastSuiteBase extends CastSuiteBase {
|
||||
|
@ -851,12 +962,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
|
|||
assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure)
|
||||
}
|
||||
|
||||
test("ANSI mode: disallow casting complex types as String type") {
|
||||
verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType))
|
||||
verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType))
|
||||
verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType))
|
||||
}
|
||||
|
||||
test("cast from invalid string to numeric should throw NumberFormatException") {
|
||||
// cast to IntegerType
|
||||
Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
|
||||
|
@ -1569,117 +1674,6 @@ class CastSuite extends CastSuiteBase {
|
|||
checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
|
||||
}
|
||||
|
||||
test("SPARK-22825 Cast array to string") {
|
||||
val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
|
||||
checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
|
||||
val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
|
||||
checkEvaluation(ret2, "[ab, cde, f]")
|
||||
Seq(false, true).foreach { omitNull =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
|
||||
val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
|
||||
checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
|
||||
}
|
||||
}
|
||||
val ret4 =
|
||||
cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
|
||||
checkEvaluation(ret4, "[ab, cde, f]")
|
||||
val ret5 = cast(
|
||||
Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
|
||||
StringType)
|
||||
checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
|
||||
val ret6 = cast(
|
||||
Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
|
||||
.map(Timestamp.valueOf)),
|
||||
StringType)
|
||||
checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
|
||||
val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
|
||||
checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
|
||||
val ret8 = cast(
|
||||
Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
|
||||
StringType)
|
||||
checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
|
||||
}
|
||||
|
||||
test("SPARK-33291: Cast array with null elements to string") {
|
||||
Seq(false, true).foreach { omitNull =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
|
||||
val ret1 = cast(Literal.create(Array(null, null)), StringType)
|
||||
checkEvaluation(
|
||||
ret1,
|
||||
s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-22973 Cast map to string") {
|
||||
Seq(
|
||||
false -> ("{", "}"),
|
||||
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
|
||||
val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
|
||||
checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
|
||||
val ret2 = cast(
|
||||
Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
|
||||
StringType)
|
||||
checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
|
||||
val ret3 = cast(
|
||||
Literal.create(Map(
|
||||
1 -> Date.valueOf("2014-12-03"),
|
||||
2 -> Date.valueOf("2014-12-04"),
|
||||
3 -> Date.valueOf("2014-12-05"))),
|
||||
StringType)
|
||||
checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
|
||||
val ret4 = cast(
|
||||
Literal.create(Map(
|
||||
1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
|
||||
2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
|
||||
StringType)
|
||||
checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
|
||||
val ret5 = cast(
|
||||
Literal.create(Map(
|
||||
1 -> Array(1, 2, 3),
|
||||
2 -> Array(4, 5, 6))),
|
||||
StringType)
|
||||
checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-22981 Cast struct to string") {
|
||||
Seq(
|
||||
false -> ("{", "}"),
|
||||
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
|
||||
val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
|
||||
checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
|
||||
val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
|
||||
checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
|
||||
val ret3 = cast(Literal.create(
|
||||
(Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
|
||||
checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
|
||||
val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
|
||||
checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
|
||||
val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
|
||||
checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
|
||||
val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
|
||||
checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-33291: Cast struct with null elements to string") {
|
||||
Seq(
|
||||
false -> ("{", "}"),
|
||||
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
|
||||
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
|
||||
val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
|
||||
checkEvaluation(
|
||||
ret1,
|
||||
s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("data type casting II") {
|
||||
checkEvaluation(
|
||||
cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
|
||||
|
|
Loading…
Reference in a new issue