From 0c364e607dae344a3eb88443ae87fd1d32819def Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 26 Aug 2021 10:09:18 +0800 Subject: [PATCH] [SPARK-36590][SQL] Convert special timestamp_ntz values in the session time zone In the PR, I propose to use the session time zone ( see the SQL config `spark.sql.session.timeZone`) instead of JVM default time zone while converting of special timestamp_ntz strings such as "today", "tomorrow" and so on. Current implementation is based on the system time zone, and it controverses to other functions/classes that use the session time zone. For example, Spark doesn't respects user's settings: ```sql $ export TZ="Europe/Amsterdam" $ ./bin/spark-sql -S spark-sql> select timestamp_ntz'now'; 2021-08-25 18:12:36.233 spark-sql> set spark.sql.session.timeZone=America/Los_Angeles; spark.sql.session.timeZone America/Los_Angeles spark-sql> select timestamp_ntz'now'; 2021-08-25 18:14:40.547 ``` Yes. For the example above, after the changes: ```sql spark-sql> select timestamp_ntz'now'; 2021-08-25 18:47:46.832 spark-sql> set spark.sql.session.timeZone=America/Los_Angeles; spark.sql.session.timeZone America/Los_Angeles spark-sql> select timestamp_ntz'now'; 2021-08-25 09:48:05.211 ``` By running the affected test suites: ``` $ build/sbt "test:testOnly *DateTimeUtilsSuite" ``` Closes #33838 from MaxGekk/fix-ts_ntz-special-values. Authored-by: Max Gekk Signed-off-by: Wenchen Fan (cherry picked from commit 159ff9fd14f7e0581833428c495c0e2c34f7e320) Signed-off-by: Wenchen Fan --- .../catalyst/optimizer/finishAnalysis.scala | 3 +- .../sql/catalyst/parser/AstBuilder.scala | 28 ++++++++++--------- .../sql/catalyst/util/DateTimeUtils.scala | 13 +++++---- .../SpecialDatetimeValuesSuite.scala | 6 ++-- .../catalyst/util/DateTimeUtilsSuite.scala | 22 ++++++++------- 5 files changed, 38 insertions(+), 34 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index daf4c5e275..802e0b4ef7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -129,8 +129,7 @@ object SpecialDatetimeValues extends Rule[LogicalPlan] { private val conv = Map[DataType, (String, java.time.ZoneId) => Option[Any]]( DateType -> convertSpecialDate, TimestampType -> convertSpecialTimestamp, - TimestampNTZType -> ((s: String, _: java.time.ZoneId) => convertSpecialTimestampNTZ(s)) - ) + TimestampNTZType -> convertSpecialTimestampNTZ) def apply(plan: LogicalPlan): LogicalPlan = { plan.transformAllExpressionsWithPruning(_.containsPattern(CAST)) { case cast @ Cast(e, dt @ (DateType | TimestampType | TimestampNTZType), _, _) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b447dc3ce4..51ef5df6bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2134,25 +2134,27 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg specialDate.getOrElse(toLiteral(stringToDate, DateType)) // SPARK-36227: Remove TimestampNTZ type support in Spark 3.2 with minimal code changes. case "TIMESTAMP_NTZ" if isTesting => - val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, TimestampNTZType)) - specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) + convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) + .map(Literal(_, TimestampNTZType)) + .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) case "TIMESTAMP_LTZ" if isTesting => constructTimestampLTZLiteral(value) case "TIMESTAMP" => SQLConf.get.timestampType match { case TimestampNTZType => - val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, TimestampNTZType)) - specialTs.getOrElse { - val containsTimeZonePart = - DateTimeUtils.parseTimestampString(UTF8String.fromString(value))._2.isDefined - // If the input string contains time zone part, return a timestamp with local time - // zone literal. - if (containsTimeZonePart) { - constructTimestampLTZLiteral(value) - } else { - toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType) + convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) + .map(Literal(_, TimestampNTZType)) + .getOrElse { + val containsTimeZonePart = + DateTimeUtils.parseTimestampString(UTF8String.fromString(value))._2.isDefined + // If the input string contains time zone part, return a timestamp with local time + // zone literal. + if (containsTimeZonePart) { + constructTimestampLTZLiteral(value) + } else { + toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType) + } } - } case TimestampType => constructTimestampLTZLiteral(value) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 36d2b9b16b..89b7414528 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1043,7 +1043,7 @@ object DateTimeUtils { * Converts notational shorthands that are converted to ordinary timestamps. * * @param input A string to parse. It can contain trailing or leading whitespaces. - * @param zoneId Zone identifier used to get the current date. + * @param zoneId Zone identifier used to get the current timestamp. * @return Some of microseconds since the epoch if the conversion completed * successfully otherwise None. */ @@ -1063,18 +1063,19 @@ object DateTimeUtils { * Converts notational shorthands that are converted to ordinary timestamps without time zone. * * @param input A string to parse. It can contain trailing or leading whitespaces. + * @param zoneId Zone identifier used to get the current local timestamp. * @return Some of microseconds since the epoch if the conversion completed * successfully otherwise None. */ - def convertSpecialTimestampNTZ(input: String): Option[Long] = { + def convertSpecialTimestampNTZ(input: String, zoneId: ZoneId): Option[Long] = { val localDateTime = extractSpecialValue(input.trim).flatMap { case "epoch" => Some(LocalDateTime.of(1970, 1, 1, 0, 0)) - case "now" => Some(LocalDateTime.now()) - case "today" => Some(LocalDateTime.now().`with`(LocalTime.MIDNIGHT)) + case "now" => Some(LocalDateTime.now(zoneId)) + case "today" => Some(LocalDateTime.now(zoneId).`with`(LocalTime.MIDNIGHT)) case "tomorrow" => - Some(LocalDateTime.now().`with`(LocalTime.MIDNIGHT).plusDays(1)) + Some(LocalDateTime.now(zoneId).`with`(LocalTime.MIDNIGHT).plusDays(1)) case "yesterday" => - Some(LocalDateTime.now().`with`(LocalTime.MIDNIGHT).minusDays(1)) + Some(LocalDateTime.now(zoneId).`with`(LocalTime.MIDNIGHT).minusDays(1)) case _ => None } localDateTime.map(localDateTimeToMicros) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SpecialDatetimeValuesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SpecialDatetimeValuesSuite.scala index e68a751a6e..12dd8c9ae0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SpecialDatetimeValuesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SpecialDatetimeValuesSuite.scala @@ -91,9 +91,9 @@ class SpecialDatetimeValuesSuite extends PlanTest { testSpecialDatetimeValues { zoneId => val expected = Set( LocalDateTime.of(1970, 1, 1, 0, 0), - LocalDateTime.now(), - LocalDateTime.now().`with`(LocalTime.MIDNIGHT).plusDays(1), - LocalDateTime.now().`with`(LocalTime.MIDNIGHT).minusDays(1) + LocalDateTime.now(zoneId), + LocalDateTime.now(zoneId).`with`(LocalTime.MIDNIGHT).plusDays(1), + LocalDateTime.now(zoneId).`with`(LocalTime.MIDNIGHT).minusDays(1) ).map(localDateTimeToMicros) testSpecialTs(TimestampNTZType, expected, zoneId) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 9e61cb978a..a339f1c55c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -793,16 +793,18 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { test("SPARK-35979: special timestamp without time zone values") { val tolerance = TimeUnit.SECONDS.toMicros(30) - assert(convertSpecialTimestampNTZ("Epoch").get === 0) - val now = DateTimeUtils.localDateTimeToMicros(LocalDateTime.now()) - convertSpecialTimestampNTZ("NOW").get should be(now +- tolerance) - val localToday = LocalDateTime.now().`with`(LocalTime.MIDNIGHT) - val yesterday = DateTimeUtils.localDateTimeToMicros(localToday.minusDays(1)) - convertSpecialTimestampNTZ(" Yesterday").get should be(yesterday) - val today = DateTimeUtils.localDateTimeToMicros(localToday) - convertSpecialTimestampNTZ("Today ").get should be(today) - val tomorrow = DateTimeUtils.localDateTimeToMicros(localToday.plusDays(1)) - convertSpecialTimestampNTZ(" tomorrow ").get should be(tomorrow) + testSpecialDatetimeValues { zoneId => + assert(convertSpecialTimestampNTZ("Epoch", zoneId).get === 0) + val now = DateTimeUtils.localDateTimeToMicros(LocalDateTime.now(zoneId)) + convertSpecialTimestampNTZ("NOW", zoneId).get should be(now +- tolerance) + val localToday = LocalDateTime.now(zoneId).`with`(LocalTime.MIDNIGHT) + val yesterday = DateTimeUtils.localDateTimeToMicros(localToday.minusDays(1)) + convertSpecialTimestampNTZ(" Yesterday", zoneId).get should be(yesterday) + val today = DateTimeUtils.localDateTimeToMicros(localToday) + convertSpecialTimestampNTZ("Today ", zoneId).get should be(today) + val tomorrow = DateTimeUtils.localDateTimeToMicros(localToday.plusDays(1)) + convertSpecialTimestampNTZ(" tomorrow ", zoneId).get should be(tomorrow) + } } test("SPARK-28141: special date values") {