From 74b3df86f347ed6279bf127153a2c8c4927af21e Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 10 Jun 2021 02:29:37 +0800 Subject: [PATCH] [SPARK-35698][SQL] Support casting of timestamp without time zone to strings ### What changes were proposed in this pull request? Extend the Cast expression and support TimestampWithoutTZType in casting to StringType. ### Why are the changes needed? To conform the ANSI SQL standard which requires to support such casting. ### Does this PR introduce _any_ user-facing change? No, the new timestamp type is not released yet. ### How was this patch tested? Unit test Closes #32846 from gengliangwang/tswtzToString. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../spark/sql/catalyst/expressions/Cast.scala | 11 ++++++++++- .../sql/catalyst/expressions/CastSuite.scala | 18 +++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index d879fb089e..aea4d54bec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.time.ZoneId +import java.time.{ZoneId, ZoneOffset} import java.util.Locale import java.util.concurrent.TimeUnit._ @@ -306,6 +306,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private lazy val dateFormatter = DateFormatter() private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) + private lazy val timestampWithoutTZFormatter = + TimestampFormatter.getFractionFormatter(ZoneOffset.UTC) private val legacyCastToStr = SQLConf.get.getConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING) // The brackets that are used in casting structs and maps to strings @@ -319,6 +321,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d))) case TimestampType => buildCast[Long](_, t => UTF8String.fromString(timestampFormatter.format(t))) + case TimestampWithoutTZType => buildCast[Long](_, + t => UTF8String.fromString(timestampWithoutTZFormatter.format(t))) case ArrayType(et, _) => buildCast[ArrayData](_, array => { val builder = new UTF8StringBuilder @@ -1100,6 +1104,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit ctx.addReferenceObj("timestampFormatter", timestampFormatter), timestampFormatter.getClass) (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));""" + case TimestampWithoutTZType => + val tf = JavaCode.global( + ctx.addReferenceObj("timestampWithoutTZFormatter", timestampWithoutTZFormatter), + timestampWithoutTZFormatter.getClass) + (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));""" case CalendarIntervalType => (c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());""" case ArrayType(et, _) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ad1da9634a..946fdcdfe7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} -import java.time.{DateTimeException, Duration, Period} +import java.time.{DateTimeException, Duration, LocalDateTime, Period} import java.time.temporal.ChronoUnit import java.util.{Calendar, TimeZone} @@ -63,10 +63,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkNullCast(from, to) } - (atomicTypes -- Set( - // TODO(SPARK-35698): Support casting timestamp without time zone to strings. - TimestampWithoutTZType - )).foreach(dt => checkNullCast(NullType, dt)) + atomicTypes.foreach(dt => checkNullCast(NullType, dt)) atomicTypes.foreach(dt => checkNullCast(dt, StringType)) checkNullCast(StringType, BinaryType) checkNullCast(StringType, BooleanType) @@ -1711,6 +1708,17 @@ class CastSuite extends CastSuiteBase { } } + test("SPARK-35698: cast timestamp without time zone to string") { + Seq( + "0001-01-01 00:00:00", // the fist timestamp of Common Era + "1582-10-15 23:59:59", // the cutover date from Julian to Gregorian calendar + "1970-01-01 00:00:00", // the epoch timestamp + "9999-12-31 23:59:59" // the last supported timestamp according to SQL standard + ).foreach { s => + checkEvaluation(cast(LocalDateTime.parse(s.replace(" ", "T")), StringType), s) + } + } + test("SPARK-32828: cast from a derived user-defined type to a base type") { val v = Literal.create(Row(1), new ExampleSubTypeUDT()) checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))