From 2781bf07218eae280184dcd1bbe38c68424358e9 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 13 Jul 2021 17:37:44 +0800 Subject: [PATCH] [SPARK-36119][SQL] Add new SQL function to_timestamp_ltz ### What changes were proposed in this pull request? Add new SQL function `to_timestamp_ltz` syntax: ``` to_timestamp_ltz(timestamp_str_column[, fmt]) to_timestamp_ltz(timestamp_column) to_timestamp_ltz(date_column) ``` ### Why are the changes needed? As the result of to_timestamp become consistent with the SQL configuration spark.sql.timestmapType and there is already a SQL function to_timestmap_ntz, we need new function to_timestamp_ltz to construct timestamp with local time zone values. ### Does this PR introduce _any_ user-facing change? Yes, a new function for constructing timestamp with local time zone values ### How was this patch tested? Unit test Closes #33318 from gengliangwang/to_timestamp_ltz. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 01ddaf3918c770dcbfdf3103f80995902f650339) Signed-off-by: Gengliang Wang --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/datetimeExpressions.scala | 47 +++++++++++++++++++ .../sql-functions/sql-expression-schema.md | 3 +- .../resources/sql-tests/inputs/datetime.sql | 4 ++ .../sql-tests/results/ansi/datetime.sql.out | 26 +++++++++- .../sql-tests/results/datetime-legacy.sql.out | 26 +++++++++- .../sql-tests/results/datetime.sql.out | 26 +++++++++- .../results/timestampNTZ/datetime.sql.out | 26 +++++++++- 8 files changed, 154 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 4fd871db75..d518bf3de8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -542,6 +542,7 @@ object FunctionRegistry { expression[ToUnixTimestamp]("to_unix_timestamp"), expression[ToUTCTimestamp]("to_utc_timestamp"), expression[ParseToTimestampNTZ]("to_timestamp_ntz"), + expression[ParseToTimestampLTZ]("to_timestamp_ltz"), expression[TruncDate]("trunc"), expression[TruncTimestamp]("date_trunc"), expression[UnixTimestamp]("unix_timestamp"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 2840b18a1c..bee3ae4917 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1079,6 +1079,53 @@ case class ParseToTimestampNTZ( copy(child = newChild) } +/** + * Parses a column to a timestamp with local time zone based on the supplied format. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(timestamp_str[, fmt]) - Parses the `timestamp_str` expression with the `fmt` expression + to a timestamp with local time zone. Returns null with invalid input. By default, it follows casting rules to + a timestamp if the `fmt` is omitted. + """, + arguments = """ + Arguments: + * timestamp_str - A string to be parsed to timestamp with local time zone. + * fmt - Timestamp format pattern to follow. See Datetime Patterns for valid + date and time format patterns. + """, + examples = """ + Examples: + > SELECT _FUNC_('2016-12-31 00:12:00'); + 2016-12-31 00:12:00 + > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd'); + 2016-12-31 00:00:00 + """, + group = "datetime_funcs", + since = "3.2.0") +// scalastyle:on line.size.limit +case class ParseToTimestampLTZ( + left: Expression, + format: Option[Expression], + child: Expression) extends RuntimeReplaceable { + + def this(left: Expression, format: Expression) = { + this(left, Option(format), GetTimestamp(left, format, TimestampType)) + } + + def this(left: Expression) = this(left, None, Cast(left, TimestampType)) + + override def flatArguments: Iterator[Any] = Iterator(left, format) + override def exprsReplaced: Seq[Expression] = left +: format.toSeq + + override def prettyName: String = "to_timestamp_ltz" + override def dataType: DataType = TimestampType + + override protected def withNewChildInternal(newChild: Expression): ParseToTimestampLTZ = + copy(child = newChild) +} + abstract class ToTimestamp extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes { diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 76fc1b7509..f71f3a8639 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 358 + - Number of queries: 359 - Number of expressions that missing example: 13 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions @@ -208,6 +208,7 @@ | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct | | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct | | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct | +| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct | | org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct | | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index c4a89a6657..db30c22f51 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -33,6 +33,10 @@ select to_timestamp_ntz(null), to_timestamp_ntz('2016-12-31 00:12:00'), to_times select to_timestamp_ntz(to_date(null)), to_timestamp_ntz(to_date('2016-12-31')), to_timestamp_ntz(to_date('2016-12-31', 'yyyy-MM-dd')); select to_timestamp_ntz(to_timestamp(null)), to_timestamp_ntz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ntz(to_timestamp('2016-12-31', 'yyyy-MM-dd')); +select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd'); +select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd')); +select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd')); + select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15'); -- [SPARK-22333]: timeFunctionCall has conflicts with columnReference diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 195bc69c8f..761511d309 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 202 +-- Number of queries: 205 -- !query @@ -181,6 +181,30 @@ struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:00:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + -- !query select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 965f179b55..ebeb127184 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 202 +-- Number of queries: 205 -- !query @@ -175,6 +175,30 @@ struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:00:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + -- !query select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index fdc9c03f59..e95cade7e2 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 202 +-- Number of queries: 205 -- !query @@ -175,6 +175,30 @@ struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:00:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + -- !query select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out index 4b148b0b73..ec384a51eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 202 +-- Number of queries: 205 -- !query @@ -175,6 +175,30 @@ struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:00:00 2016-12-31 00:00:00 + + +-- !query +select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd')) +-- !query schema +struct +-- !query output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 + + -- !query select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15') -- !query schema