[SPARK-35719][SQL] Support type conversion between timestamp and timestamp without time zone type

### What changes were proposed in this pull request?

1. Extend the Cast expression and support TimestampType in casting to TimestampWithoutTZType.
2. There was a mistake in casting TimestampWithoutTZType as TimestampType in https://github.com/apache/spark/pull/32864. The target value should be `sourceValue - timeZoneOffset` instead of being the same value.

### Why are the changes needed?

To conform the ANSI SQL standard which requires to support such casting.

### Does this PR introduce _any_ user-facing change?

No, the new timestamp type is not released yet.

### How was this patch tested?

Unit test

Closes #32878 from gengliangwang/timestampToTimestampWithoutTZ.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
Gengliang Wang 2021-06-13 18:44:24 +03:00 committed by Max Gekk
parent 0ba1d3852b
commit 6272222bc0
2 changed files with 43 additions and 8 deletions

View file

@ -71,6 +71,7 @@ object Cast {
case (TimestampWithoutTZType, TimestampType) => true
case (DateType, TimestampWithoutTZType) => true
case (TimestampType, TimestampWithoutTZType) => true
case (StringType, DateType) => true
case (TimestampType, DateType) => true
@ -498,7 +499,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
buildCast[Byte](_, b => longToTimestamp(b.toLong))
case DateType =>
buildCast[Int](_, d => daysToMicros(d, zoneId))
case TimestampWithoutTZType => buildCast[Long](_, ts => ts)
case TimestampWithoutTZType =>
buildCast[Long](_, ts => convertTz(ts, zoneId, ZoneOffset.UTC))
// TimestampWritable.decimalToTimestamp
case DecimalType() =>
buildCast[Decimal](_, d => decimalToTimestamp(d))
@ -513,6 +515,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
private[this] def castToTimestampWithoutTZ(from: DataType): Any => Any = from match {
case DateType =>
buildCast[Int](_, d => daysToMicros(d, ZoneOffset.UTC))
case TimestampType =>
buildCast[Long](_, ts => convertTz(ts, ZoneOffset.UTC, zoneId))
}
private[this] def decimalToTimestamp(d: Decimal): Long = {
@ -930,7 +934,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case DateType => castToDateCode(from, ctx)
case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
case TimestampType => castToTimestampCode(from, ctx)
case TimestampWithoutTZType => castToTimestampWithoutTZCode(from)
case TimestampWithoutTZType => castToTimestampWithoutTZCode(from, ctx)
case CalendarIntervalType => castToIntervalCode(from)
case it: DayTimeIntervalType => castToDayTimeIntervalCode(from, it)
case YearMonthIntervalType => castToYearMonthIntervalCode(from)
@ -1371,7 +1375,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
code"""$evPrim =
org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMicros($c, $zid);"""
case TimestampWithoutTZType =>
(c, evPrim, evNull) => code"$evPrim = $c;"
val zoneIdClass = classOf[ZoneId]
val zid = JavaCode.global(
ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
zoneIdClass)
(c, evPrim, evNull) =>
code"$evPrim = $dateTimeUtilsCls.convertTz($c, $zid, java.time.ZoneOffset.UTC);"
case DecimalType() =>
(c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};"
case DoubleType =>
@ -1394,10 +1403,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
"""
}
private[this] def castToTimestampWithoutTZCode(from: DataType): CastFunction = from match {
private[this] def castToTimestampWithoutTZCode(
from: DataType,
ctx: CodegenContext): CastFunction = from match {
case DateType =>
(c, evPrim, evNull) =>
code"$evPrim = $dateTimeUtilsCls.daysToMicros($c, java.time.ZoneOffset.UTC);"
case TimestampType =>
val zoneIdClass = classOf[ZoneId]
val zid = JavaCode.global(
ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
zoneIdClass)
(c, evPrim, evNull) =>
code"$evPrim = $dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid);"
}
private[this] def castToIntervalCode(from: DataType): CastFunction = from match {
@ -1990,6 +2008,7 @@ object AnsiCast {
case (TimestampWithoutTZType, TimestampType) => true
case (DateType, TimestampWithoutTZType) => true
case (TimestampType, TimestampWithoutTZType) => true
case (StringType, _: CalendarIntervalType) => true
case (StringType, _: DayTimeIntervalType) => true

View file

@ -35,7 +35,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration
import org.apache.spark.sql.internal.SQLConf
@ -1258,9 +1257,14 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
}
test("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") {
specialTs.foreach { s =>
val dt = LocalDateTime.parse(s)
checkEvaluation(cast(dt, TimestampType), DateTimeUtils.localDateTimeToMicros(dt))
outstandingZoneIds.foreach { zoneId =>
withDefaultTimeZone(zoneId) {
specialTs.foreach { s =>
val input = LocalDateTime.parse(s)
val expectedTs = Timestamp.valueOf(s.replace("T", " "))
checkEvaluation(cast(input, TimestampType), expectedTs)
}
}
}
}
@ -1279,6 +1283,18 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
checkEvaluation(cast(inputDate, TimestampWithoutTZType), expectedTs)
}
}
test("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") {
outstandingZoneIds.foreach { zoneId =>
withDefaultTimeZone(zoneId) {
specialTs.foreach { s =>
val input = Timestamp.valueOf(s.replace("T", " "))
val expectedTs = LocalDateTime.parse(s)
checkEvaluation(cast(input, TimestampWithoutTZType), expectedTs)
}
}
}
}
}
/**