[SPARK-27423][SQL] Cast DATE <-> TIMESTAMP according to the SQL standard

## What changes were proposed in this pull request?

According to SQL standard, value of `DATE` type is union of year, month, dayInMonth, and it is independent from any time zones. To convert it to Catalyst's `TIMESTAMP`, `DATE` value should be "extended" by the time at midnight - `00:00:00`. The resulted local date+time should be considered as a timestamp in the session time zone, and casted to microseconds since epoch in `UTC` accordingly.

The reverse casting from `TIMESTAMP` to `DATE` should be performed in the similar way. `TIMESTAMP` values should be represented as a local date+time in the session time zone. And the time component should be just removed. For example, `TIMESTAMP 2019-04-10 00:10:12` -> `DATE 2019-04-10`. The resulted date is converted to days since epoch `1970-01-01`.

## How was this patch tested?

The changes were tested by existing test suites - `DateFunctionsSuite`, `DateExpressionsSuite` and `CastSuite`.

Closes #24332 from MaxGekk/cast-timestamp-to-date2.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Maxim Gekk 2019-04-10 22:41:19 +08:00 committed by Wenchen Fan
parent 1470f23ec9
commit ab8710b579
2 changed files with 22 additions and 8 deletions

View file

@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.math.{BigDecimal => JavaBigDecimal}
import java.time.{LocalDate, LocalDateTime, LocalTime}
import java.util.concurrent.TimeUnit._
import org.apache.spark.SparkException
@ -381,7 +382,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case ByteType =>
buildCast[Byte](_, b => longToTimestamp(b.toLong))
case DateType =>
buildCast[Int](_, d => MILLISECONDS.toMicros(DateTimeUtils.daysToMillis(d, timeZone)))
buildCast[Int](_, d => epochDaysToMicros(d, zoneId))
// TimestampWritable.decimalToTimestamp
case DecimalType() =>
buildCast[Decimal](_, d => decimalToTimestamp(d))
@ -418,7 +419,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case TimestampType =>
// throw valid precision more than seconds, according to Hive.
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
buildCast[Long](_, t => DateTimeUtils.millisToDays(MICROSECONDS.toMillis(t), timeZone))
buildCast[Long](_, t => microsToEpochDays(t, zoneId))
}
// IntervalConverter
@ -935,11 +936,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
}
"""
case TimestampType =>
val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
val zid = JavaCode.global(
ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
zoneId.getClass)
(c, evPrim, evNull) =>
code"""$evPrim =
org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToDays(
$c / $MICROS_PER_MILLIS, $tz);"""
org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
case _ =>
(c, evPrim, evNull) => code"$evNull = true;"
}
@ -1043,11 +1045,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case _: IntegralType =>
(c, evPrim, evNull) => code"$evPrim = ${longToTimeStampCode(c)};"
case DateType =>
val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
val zid = JavaCode.global(
ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
zoneId.getClass)
(c, evPrim, evNull) =>
code"""$evPrim =
org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMillis(
$c, $tz) * $MICROS_PER_MILLIS;"""
org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros($c, $zid);"""
case DecimalType() =>
(c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};"
case DoubleType =>

View file

@ -170,6 +170,17 @@ object DateTimeUtils {
MILLISECONDS.toMicros(millis)
}
def microsToEpochDays(epochMicros: SQLTimestamp, zoneId: ZoneId): SQLDate = {
localDateToDays(microsToInstant(epochMicros).atZone(zoneId).toLocalDate)
}
def epochDaysToMicros(epochDays: SQLDate, zoneId: ZoneId): SQLTimestamp = {
val localDate = LocalDate.ofEpochDay(epochDays)
val zeroLocalTime = LocalTime.MIDNIGHT
val localDateTime = LocalDateTime.of(localDate, zeroLocalTime)
instantToMicros(localDateTime.atZone(zoneId).toInstant)
}
/**
* Trim and parse a given UTF8 date string to the corresponding a corresponding [[Long]] value.
* The return type is [[Option]] in order to distinguish between 0L and null. The following