[SPARK-36119][SQL] Add new SQL function to_timestamp_ltz

### What changes were proposed in this pull request?

Add new SQL function `to_timestamp_ltz`
syntax:
```
to_timestamp_ltz(timestamp_str_column[, fmt])
to_timestamp_ltz(timestamp_column)
to_timestamp_ltz(date_column)
```

### Why are the changes needed?

As the result of to_timestamp become consistent with the SQL configuration spark.sql.timestmapType and there is already a SQL function to_timestmap_ntz, we need new function to_timestamp_ltz to construct timestamp with local time zone values.

### Does this PR introduce _any_ user-facing change?

Yes, a new function for constructing timestamp with local time zone values

### How was this patch tested?

Unit test

Closes #33318 from gengliangwang/to_timestamp_ltz.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit 01ddaf3918)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
Gengliang Wang 2021-07-13 17:37:44 +08:00
parent 78c4e3710d
commit 2781bf0721
8 changed files with 154 additions and 5 deletions

View file

@ -542,6 +542,7 @@ object FunctionRegistry {
expression[ToUnixTimestamp]("to_unix_timestamp"),
expression[ToUTCTimestamp]("to_utc_timestamp"),
expression[ParseToTimestampNTZ]("to_timestamp_ntz"),
expression[ParseToTimestampLTZ]("to_timestamp_ltz"),
expression[TruncDate]("trunc"),
expression[TruncTimestamp]("date_trunc"),
expression[UnixTimestamp]("unix_timestamp"),

View file

@ -1079,6 +1079,53 @@ case class ParseToTimestampNTZ(
copy(child = newChild)
}
/**
* Parses a column to a timestamp with local time zone based on the supplied format.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_(timestamp_str[, fmt]) - Parses the `timestamp_str` expression with the `fmt` expression
to a timestamp with local time zone. Returns null with invalid input. By default, it follows casting rules to
a timestamp if the `fmt` is omitted.
""",
arguments = """
Arguments:
* timestamp_str - A string to be parsed to timestamp with local time zone.
* fmt - Timestamp format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
date and time format patterns.
""",
examples = """
Examples:
> SELECT _FUNC_('2016-12-31 00:12:00');
2016-12-31 00:12:00
> SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd');
2016-12-31 00:00:00
""",
group = "datetime_funcs",
since = "3.2.0")
// scalastyle:on line.size.limit
case class ParseToTimestampLTZ(
left: Expression,
format: Option[Expression],
child: Expression) extends RuntimeReplaceable {
def this(left: Expression, format: Expression) = {
this(left, Option(format), GetTimestamp(left, format, TimestampType))
}
def this(left: Expression) = this(left, None, Cast(left, TimestampType))
override def flatArguments: Iterator[Any] = Iterator(left, format)
override def exprsReplaced: Seq[Expression] = left +: format.toSeq
override def prettyName: String = "to_timestamp_ltz"
override def dataType: DataType = TimestampType
override protected def withNewChildInternal(newChild: Expression): ParseToTimestampLTZ =
copy(child = newChild)
}
abstract class ToTimestamp
extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {

View file

@ -1,6 +1,6 @@
<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- Number of queries: 358
- Number of queries: 359
- Number of expressions that missing example: 13
- Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
## Schema of Built-in Functions
@ -208,6 +208,7 @@
| org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct<overlay(Spark SQL, _, 6, -1):string> |
| org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct<to_date(2009-07-30 04:17:52):date> |
| org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp> |
| org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz> |
| org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |

View file

@ -33,6 +33,10 @@ select to_timestamp_ntz(null), to_timestamp_ntz('2016-12-31 00:12:00'), to_times
select to_timestamp_ntz(to_date(null)), to_timestamp_ntz(to_date('2016-12-31')), to_timestamp_ntz(to_date('2016-12-31', 'yyyy-MM-dd'));
select to_timestamp_ntz(to_timestamp(null)), to_timestamp_ntz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ntz(to_timestamp('2016-12-31', 'yyyy-MM-dd'));
select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd');
select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'));
select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'));
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15');
-- [SPARK-22333]: timeFunctionCall has conflicts with columnReference

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 202
-- Number of queries: 205
-- !query
@ -181,6 +181,30 @@ struct<to_timestamp_ntz(to_timestamp(NULL)):timestamp_ntz,to_timestamp_ntz(to_ti
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
-- !query schema
struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 202
-- Number of queries: 205
-- !query
@ -175,6 +175,30 @@ struct<to_timestamp_ntz(to_timestamp(NULL)):timestamp_ntz,to_timestamp_ntz(to_ti
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
-- !query schema
struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 202
-- Number of queries: 205
-- !query
@ -175,6 +175,30 @@ struct<to_timestamp_ntz(to_timestamp(NULL)):timestamp_ntz,to_timestamp_ntz(to_ti
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
-- !query schema
struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 202
-- Number of queries: 205
-- !query
@ -175,6 +175,30 @@ struct<to_timestamp_ntz(to_timestamp(NULL)):timestamp_ntz,to_timestamp_ntz(to_ti
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
-- !query schema
struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
-- !query
select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
-- !query schema
struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
-- !query output
NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema