[SPARK-35698][SQL] Support casting of timestamp without time zone to strings

### What changes were proposed in this pull request?

Extend the Cast expression and support TimestampWithoutTZType in casting to StringType.

### Why are the changes needed?

To conform the ANSI SQL standard which requires to support such casting.

### Does this PR introduce _any_ user-facing change?

No, the new timestamp type is not released yet.

### How was this patch tested?

Unit test

Closes #32846 from gengliangwang/tswtzToString.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
Gengliang Wang 2021-06-10 02:29:37 +08:00
parent f49bf1a072
commit 74b3df86f3
2 changed files with 23 additions and 6 deletions

View file

@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.time.ZoneId
import java.time.{ZoneId, ZoneOffset}
import java.util.Locale
import java.util.concurrent.TimeUnit._
@ -306,6 +306,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
private lazy val dateFormatter = DateFormatter()
private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
private lazy val timestampWithoutTZFormatter =
TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
private val legacyCastToStr = SQLConf.get.getConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING)
// The brackets that are used in casting structs and maps to strings
@ -319,6 +321,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
case TimestampType => buildCast[Long](_,
t => UTF8String.fromString(timestampFormatter.format(t)))
case TimestampWithoutTZType => buildCast[Long](_,
t => UTF8String.fromString(timestampWithoutTZFormatter.format(t)))
case ArrayType(et, _) =>
buildCast[ArrayData](_, array => {
val builder = new UTF8StringBuilder
@ -1100,6 +1104,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
ctx.addReferenceObj("timestampFormatter", timestampFormatter),
timestampFormatter.getClass)
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));"""
case TimestampWithoutTZType =>
val tf = JavaCode.global(
ctx.addReferenceObj("timestampWithoutTZFormatter", timestampWithoutTZFormatter),
timestampWithoutTZFormatter.getClass)
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));"""
case CalendarIntervalType =>
(c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());"""
case ArrayType(et, _) =>

View file

@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
import java.time.{DateTimeException, Duration, Period}
import java.time.{DateTimeException, Duration, LocalDateTime, Period}
import java.time.temporal.ChronoUnit
import java.util.{Calendar, TimeZone}
@ -63,10 +63,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
checkNullCast(from, to)
}
(atomicTypes -- Set(
// TODO(SPARK-35698): Support casting timestamp without time zone to strings.
TimestampWithoutTZType
)).foreach(dt => checkNullCast(NullType, dt))
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
atomicTypes.foreach(dt => checkNullCast(dt, StringType))
checkNullCast(StringType, BinaryType)
checkNullCast(StringType, BooleanType)
@ -1711,6 +1708,17 @@ class CastSuite extends CastSuiteBase {
}
}
test("SPARK-35698: cast timestamp without time zone to string") {
Seq(
"0001-01-01 00:00:00", // the fist timestamp of Common Era
"1582-10-15 23:59:59", // the cutover date from Julian to Gregorian calendar
"1970-01-01 00:00:00", // the epoch timestamp
"9999-12-31 23:59:59" // the last supported timestamp according to SQL standard
).foreach { s =>
checkEvaluation(cast(LocalDateTime.parse(s.replace(" ", "T")), StringType), s)
}
}
test("SPARK-32828: cast from a derived user-defined type to a base type") {
val v = Literal.create(Row(1), new ExampleSubTypeUDT())
checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))