[SPARK-36046][SQL] Support new functions make_timestamp_ntz and make_timestamp_ltz

### What changes were proposed in this pull request?

Support new functions make_timestamp_ntz and make_timestamp_ltz
Syntax:
* `make_timestamp_ntz(year, month, day, hour, min, sec)`: Create local date-time from year, month, day, hour, min, sec fields
* `make_timestamp_ltz(year, month, day, hour, min, sec[, timezone])`: Create current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields

### Why are the changes needed?

As the result of `make_timestamp` become consistent with the SQL configuration `spark.sql.timestmapType`, we need these two new functions to construct timestamp literals. They align to the functions [`make_timestamp` and `make_timestamptz`](https://www.postgresql.org/docs/9.4/functions-datetime.html) in PostgreSQL

### Does this PR introduce _any_ user-facing change?

Yes, two new datetime functions: make_timestamp_ntz and make_timestamp_ltz.

### How was this patch tested?

End-to-end tests.

Closes #33299 from gengliangwang/make_timestamp_ntz_ltz.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 92bf83ed0a)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
Gengliang Wang 2021-07-12 22:44:26 +03:00 committed by Max Gekk
parent 12aecb4330
commit fba3e90863
8 changed files with 374 additions and 6 deletions

View file

@ -552,6 +552,8 @@ object FunctionRegistry {
expression[TimeWindow]("window"),
expression[MakeDate]("make_date"),
expression[MakeTimestamp]("make_timestamp"),
expression[MakeTimestampNTZ]("make_timestamp_ntz", true),
expression[MakeTimestampLTZ]("make_timestamp_ltz", true),
expression[MakeInterval]("make_interval"),
expression[MakeDTInterval]("make_dt_interval"),
expression[MakeYMInterval]("make_ym_interval"),

View file

@ -2270,6 +2270,128 @@ case class MakeDate(
copy(year = newFirst, month = newSecond, day = newThird)
}
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(year, month, day, hour, min, sec) - Create local date-time from year, month, day, hour, min, sec fields. ",
arguments = """
Arguments:
* year - the year to represent, from 1 to 9999
* month - the month-of-year to represent, from 1 (January) to 12 (December)
* day - the day-of-month to represent, from 1 to 31
* hour - the hour-of-day to represent, from 0 to 23
* min - the minute-of-hour to represent, from 0 to 59
* sec - the second-of-minute and its micro-fraction to represent, from
0 to 60. If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
""",
examples = """
Examples:
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
2014-12-28 06:30:45.887
> SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
2019-07-01 00:00:00
> SELECT _FUNC_(null, 7, 22, 15, 30, 0);
NULL
""",
group = "datetime_funcs",
since = "3.2.0")
// scalastyle:on line.size.limit
case class MakeTimestampNTZ(
year: Expression,
month: Expression,
day: Expression,
hour: Expression,
min: Expression,
sec: Expression,
failOnError: Boolean = SQLConf.get.ansiEnabled,
child: Expression) extends RuntimeReplaceable {
def this(
year: Expression,
month: Expression,
day: Expression,
hour: Expression,
min: Expression,
sec: Expression) = {
this(year, month, day, hour, min, sec, failOnError = SQLConf.get.ansiEnabled,
MakeTimestamp(year, month, day, hour, min, sec, dataType = TimestampNTZType))
}
override def exprsReplaced: Seq[Expression] = Seq(year, month, day, hour, min, sec)
override protected def withNewChildInternal(newChild: Expression): Expression =
copy(child = newChild)
}
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. ",
arguments = """
Arguments:
* year - the year to represent, from 1 to 9999
* month - the month-of-year to represent, from 1 (January) to 12 (December)
* day - the day-of-month to represent, from 1 to 31
* hour - the hour-of-day to represent, from 0 to 23
* min - the minute-of-hour to represent, from 0 to 59
* sec - the second-of-minute and its micro-fraction to represent, from
0 to 60. If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
* timezone - the time zone identifier. For example, CET, UTC and etc.
""",
examples = """
Examples:
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
2014-12-28 06:30:45.887
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
2014-12-27 21:30:45.887
> SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
2019-07-01 00:00:00
> SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
NULL
> SELECT _FUNC_(null, 7, 22, 15, 30, 0);
NULL
""",
group = "datetime_funcs",
since = "3.2.0")
// scalastyle:on line.size.limit
case class MakeTimestampLTZ(
year: Expression,
month: Expression,
day: Expression,
hour: Expression,
min: Expression,
sec: Expression,
timezone: Option[Expression],
failOnError: Boolean = SQLConf.get.ansiEnabled,
child: Expression) extends RuntimeReplaceable {
def this(
year: Expression,
month: Expression,
day: Expression,
hour: Expression,
min: Expression,
sec: Expression) = {
this(year, month, day, hour, min, sec, None, failOnError = SQLConf.get.ansiEnabled,
MakeTimestamp(year, month, day, hour, min, sec, dataType = TimestampType))
}
def this(
year: Expression,
month: Expression,
day: Expression,
hour: Expression,
min: Expression,
sec: Expression,
timezone: Expression) = {
this(year, month, day, hour, min, sec, Some(timezone), failOnError = SQLConf.get.ansiEnabled,
MakeTimestamp(year, month, day, hour, min, sec, Some(timezone), dataType = TimestampType))
}
override def exprsReplaced: Seq[Expression] = Seq(year, month, day, hour, min, sec)
override protected def withNewChildInternal(newChild: Expression): Expression =
copy(child = newChild)
}
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create timestamp from year, month, day, hour, min, sec and timezone fields. " +

View file

@ -1,6 +1,6 @@
<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- Number of queries: 356
- Number of queries: 358
- Number of expressions that missing example: 13
- Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
## Schema of Built-in Functions
@ -173,6 +173,8 @@
| org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval> |
| org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZ | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZ | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |
| org.apache.spark.sql.catalyst.expressions.MakeYMInterval | make_ym_interval | SELECT make_ym_interval(1, 2) | struct<make_ym_interval(1, 2):interval year to month> |
| org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
| org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct<map_entries(map(1, a, 2, b)):array<struct<key:int,value:string>>> |
@ -361,4 +363,4 @@
| org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |

View file

@ -260,4 +260,16 @@ select to_timestamp_ntz('2021-06-25 10:11:12') - interval '20 15:40:32.99899999'
-- timestamp numeric fields constructor
SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678);
SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET');
SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007);
-- TimestampNTZ numeric fields constructor
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678);
-- make_timestamp_ntz should not accept time zone input
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET');
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007);
-- TimestampLTZ numeric fields constructor
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678);
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET');
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007);

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 195
-- Number of queries: 202
-- !query
@ -1652,6 +1652,14 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678):timestamp>
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
-- !query schema
@ -1659,3 +1667,54 @@ struct<>
-- !query output
java.time.DateTimeException
The fraction of sec must be zero. Valid range is [0, 60].
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 45.678):timestamp_ntz>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function make_timestamp_ntz. Expected: 6; Found: 7; line 1 pos 7
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<>
-- !query output
java.time.DateTimeException
The fraction of sec must be zero. Valid range is [0, 60].
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<>
-- !query output
java.time.DateTimeException
The fraction of sec must be zero. Valid range is [0, 60].

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 195
-- Number of queries: 202
-- !query
@ -1594,9 +1594,66 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678):timestamp>
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 60.007):timestamp>
-- !query output
NULL
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 45.678):timestamp_ntz>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function make_timestamp_ntz. Expected: 6; Found: 7; line 1 pos 7
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 60.007):timestamp_ntz>
-- !query output
NULL
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 60.007):timestamp>
-- !query output
NULL

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 195
-- Number of queries: 202
-- !query
@ -1602,9 +1602,66 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678):timestamp>
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 60.007):timestamp>
-- !query output
NULL
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 45.678):timestamp_ntz>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function make_timestamp_ntz. Expected: 6; Found: 7; line 1 pos 7
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 60.007):timestamp_ntz>
-- !query output
NULL
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 60.007):timestamp>
-- !query output
NULL

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 195
-- Number of queries: 202
-- !query
@ -1603,9 +1603,66 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678):timestamp_ntz>
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp_ntz>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp(2021, 7, 11, 6, 30, 60.007):timestamp_ntz>
-- !query output
NULL
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 45.678):timestamp_ntz>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function make_timestamp_ntz. Expected: 6; Found: 7; line 1 pos 7
-- !query
SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 60.007):timestamp_ntz>
-- !query output
NULL
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-11 06:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET')
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
-- !query output
2021-07-10 21:30:45.678
-- !query
SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007)
-- !query schema
struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 60.007):timestamp>
-- !query output
NULL