[SPARK-28598][SQL] Few date time manipulation functions does not provide versions supporting Column as input through the Dataframe API

## What changes were proposed in this pull request?

Add following functions:
```
def add_months(startDate: Column, numMonths: Column): Column
def date_add(start: Column, days: Column): Column
def date_sub(start: Column, days: Column): Column
```

## How was this patch tested?

UT.

Please review https://spark.apache.org/contributing.html before opening a pull request.

Closes #25334 from WeichenXu123/datefunc_impr.

Authored-by: WeichenXu <weichen.xu@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
WeichenXu 2019-08-19 11:41:13 +09:00 committed by HyukjinKwon
parent f0834d3a7f
commit 4ddad79060
2 changed files with 53 additions and 4 deletions

View file

@ -2594,8 +2594,21 @@ object functions {
* @group datetime_funcs
* @since 1.5.0
*/
def add_months(startDate: Column, numMonths: Int): Column = withExpr {
AddMonths(startDate.expr, Literal(numMonths))
def add_months(startDate: Column, numMonths: Int): Column = add_months(startDate, lit(numMonths))
/**
* Returns the date that is `numMonths` after `startDate`.
*
* @param startDate A date, timestamp or string. If a string, the data must be in a format that
* can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param numMonths A column of the number of months to add to `startDate`, can be negative to
* subtract months
* @return A date, or null if `startDate` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.0.0
*/
def add_months(startDate: Column, numMonths: Column): Column = withExpr {
AddMonths(startDate.expr, numMonths.expr)
}
/**
@ -2644,7 +2657,19 @@ object functions {
* @group datetime_funcs
* @since 1.5.0
*/
def date_add(start: Column, days: Int): Column = withExpr { DateAdd(start.expr, Literal(days)) }
def date_add(start: Column, days: Int): Column = date_add(start, lit(days))
/**
* Returns the date that is `days` days after `start`
*
* @param start A date, timestamp or string. If a string, the data must be in a format that
* can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days A column of the number of days to add to `start`, can be negative to subtract days
* @return A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.0.0
*/
def date_add(start: Column, days: Column): Column = withExpr { DateAdd(start.expr, days.expr) }
/**
* Returns the date that is `days` days before `start`
@ -2656,7 +2681,20 @@ object functions {
* @group datetime_funcs
* @since 1.5.0
*/
def date_sub(start: Column, days: Int): Column = withExpr { DateSub(start.expr, Literal(days)) }
def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
/**
* Returns the date that is `days` days before `start`
*
* @param start A date, timestamp or string. If a string, the data must be in a format that
* can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param days A column of the number of days to subtract from `start`, can be negative to add
* days
* @return A date, or null if `start` was a string that could not be cast to a date
* @group datetime_funcs
* @since 3.0.0
*/
def date_sub(start: Column, days: Column): Column = withExpr { DateSub(start.expr, days.expr) }
/**
* Returns the number of days from `start` to `end`.

View file

@ -239,6 +239,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
df.select(date_add(col("ss"), 7)),
Seq(Row(Date.valueOf("2015-06-08")), Row(Date.valueOf("2015-06-09"))))
checkAnswer(
df.withColumn("x", lit(1)).select(date_add(col("d"), col("x"))),
Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
checkAnswer(df.selectExpr("DATE_ADD(null, 1)"), Seq(Row(null), Row(null)))
checkAnswer(
df.selectExpr("""DATE_ADD(d, 1)"""),
@ -270,6 +274,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
checkAnswer(
df.select(date_sub(lit(null), 1)).limit(1), Row(null))
checkAnswer(
df.withColumn("x", lit(1)).select(date_sub(col("d"), col("x"))),
Seq(Row(Date.valueOf("2015-05-31")), Row(Date.valueOf("2015-06-01"))))
checkAnswer(df.selectExpr("""DATE_SUB(d, null)"""), Seq(Row(null), Row(null)))
checkAnswer(
df.selectExpr("""DATE_SUB(d, 1)"""),
@ -318,6 +326,9 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
checkAnswer(
df.selectExpr("add_months(d, -1)"),
Seq(Row(Date.valueOf("2015-07-31")), Row(Date.valueOf("2015-01-28"))))
checkAnswer(
df.withColumn("x", lit(1)).select(add_months(col("d"), col("x"))),
Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2015-03-28"))))
}
test("function months_between") {