From 6272222bc076ddd09d89ff952a546bc0d0b47e2d Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Sun, 13 Jun 2021 18:44:24 +0300 Subject: [PATCH] [SPARK-35719][SQL] Support type conversion between timestamp and timestamp without time zone type ### What changes were proposed in this pull request? 1. Extend the Cast expression and support TimestampType in casting to TimestampWithoutTZType. 2. There was a mistake in casting TimestampWithoutTZType as TimestampType in https://github.com/apache/spark/pull/32864. The target value should be `sourceValue - timeZoneOffset` instead of being the same value. ### Why are the changes needed? To conform the ANSI SQL standard which requires to support such casting. ### Does this PR introduce _any_ user-facing change? No, the new timestamp type is not released yet. ### How was this patch tested? Unit test Closes #32878 from gengliangwang/timestampToTimestampWithoutTZ. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../spark/sql/catalyst/expressions/Cast.scala | 27 ++++++++++++++++--- .../sql/catalyst/expressions/CastSuite.scala | 24 ++++++++++++++--- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index cd99ee2a13..5598085eeb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -71,6 +71,7 @@ object Cast { case (TimestampWithoutTZType, TimestampType) => true case (DateType, TimestampWithoutTZType) => true + case (TimestampType, TimestampWithoutTZType) => true case (StringType, DateType) => true case (TimestampType, DateType) => true @@ -498,7 +499,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit buildCast[Byte](_, b => longToTimestamp(b.toLong)) case DateType => buildCast[Int](_, d => daysToMicros(d, zoneId)) - case TimestampWithoutTZType => buildCast[Long](_, ts => ts) + case TimestampWithoutTZType => + buildCast[Long](_, ts => convertTz(ts, zoneId, ZoneOffset.UTC)) // TimestampWritable.decimalToTimestamp case DecimalType() => buildCast[Decimal](_, d => decimalToTimestamp(d)) @@ -513,6 +515,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToTimestampWithoutTZ(from: DataType): Any => Any = from match { case DateType => buildCast[Int](_, d => daysToMicros(d, ZoneOffset.UTC)) + case TimestampType => + buildCast[Long](_, ts => convertTz(ts, ZoneOffset.UTC, zoneId)) } private[this] def decimalToTimestamp(d: Decimal): Long = { @@ -930,7 +934,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case DateType => castToDateCode(from, ctx) case decimal: DecimalType => castToDecimalCode(from, decimal, ctx) case TimestampType => castToTimestampCode(from, ctx) - case TimestampWithoutTZType => castToTimestampWithoutTZCode(from) + case TimestampWithoutTZType => castToTimestampWithoutTZCode(from, ctx) case CalendarIntervalType => castToIntervalCode(from) case it: DayTimeIntervalType => castToDayTimeIntervalCode(from, it) case YearMonthIntervalType => castToYearMonthIntervalCode(from) @@ -1371,7 +1375,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit code"""$evPrim = org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMicros($c, $zid);""" case TimestampWithoutTZType => - (c, evPrim, evNull) => code"$evPrim = $c;" + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = $dateTimeUtilsCls.convertTz($c, $zid, java.time.ZoneOffset.UTC);" case DecimalType() => (c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};" case DoubleType => @@ -1394,10 +1403,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit """ } - private[this] def castToTimestampWithoutTZCode(from: DataType): CastFunction = from match { + private[this] def castToTimestampWithoutTZCode( + from: DataType, + ctx: CodegenContext): CastFunction = from match { case DateType => (c, evPrim, evNull) => code"$evPrim = $dateTimeUtilsCls.daysToMicros($c, java.time.ZoneOffset.UTC);" + case TimestampType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + (c, evPrim, evNull) => + code"$evPrim = $dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid);" } private[this] def castToIntervalCode(from: DataType): CastFunction = from match { @@ -1990,6 +2008,7 @@ object AnsiCast { case (TimestampWithoutTZType, TimestampType) => true case (DateType, TimestampWithoutTZType) => true + case (TimestampType, TimestampWithoutTZType) => true case (StringType, _: CalendarIntervalType) => true case (StringType, _: DayTimeIntervalType) => true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index d3d693b44d..c268d52c4f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -35,7 +35,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration import org.apache.spark.sql.internal.SQLConf @@ -1258,9 +1257,14 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { } test("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") { - specialTs.foreach { s => - val dt = LocalDateTime.parse(s) - checkEvaluation(cast(dt, TimestampType), DateTimeUtils.localDateTimeToMicros(dt)) + outstandingZoneIds.foreach { zoneId => + withDefaultTimeZone(zoneId) { + specialTs.foreach { s => + val input = LocalDateTime.parse(s) + val expectedTs = Timestamp.valueOf(s.replace("T", " ")) + checkEvaluation(cast(input, TimestampType), expectedTs) + } + } } } @@ -1279,6 +1283,18 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { checkEvaluation(cast(inputDate, TimestampWithoutTZType), expectedTs) } } + + test("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") { + outstandingZoneIds.foreach { zoneId => + withDefaultTimeZone(zoneId) { + specialTs.foreach { s => + val input = Timestamp.valueOf(s.replace("T", " ")) + val expectedTs = LocalDateTime.parse(s) + checkEvaluation(cast(input, TimestampWithoutTZType), expectedTs) + } + } + } + } } /**