[SPARK-28598][SQL] Few date time manipulation functions does not provide versions supporting Column as input through the Dataframe API
## What changes were proposed in this pull request? Add following functions: ``` def add_months(startDate: Column, numMonths: Column): Column def date_add(start: Column, days: Column): Column def date_sub(start: Column, days: Column): Column ``` ## How was this patch tested? UT. Please review https://spark.apache.org/contributing.html before opening a pull request. Closes #25334 from WeichenXu123/datefunc_impr. Authored-by: WeichenXu <weichen.xu@databricks.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
f0834d3a7f
commit
4ddad79060
|
@ -2594,8 +2594,21 @@ object functions {
|
|||
* @group datetime_funcs
|
||||
* @since 1.5.0
|
||||
*/
|
||||
def add_months(startDate: Column, numMonths: Int): Column = withExpr {
|
||||
AddMonths(startDate.expr, Literal(numMonths))
|
||||
def add_months(startDate: Column, numMonths: Int): Column = add_months(startDate, lit(numMonths))
|
||||
|
||||
/**
|
||||
* Returns the date that is `numMonths` after `startDate`.
|
||||
*
|
||||
* @param startDate A date, timestamp or string. If a string, the data must be in a format that
|
||||
* can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
|
||||
* @param numMonths A column of the number of months to add to `startDate`, can be negative to
|
||||
* subtract months
|
||||
* @return A date, or null if `startDate` was a string that could not be cast to a date
|
||||
* @group datetime_funcs
|
||||
* @since 3.0.0
|
||||
*/
|
||||
def add_months(startDate: Column, numMonths: Column): Column = withExpr {
|
||||
AddMonths(startDate.expr, numMonths.expr)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2644,7 +2657,19 @@ object functions {
|
|||
* @group datetime_funcs
|
||||
* @since 1.5.0
|
||||
*/
|
||||
def date_add(start: Column, days: Int): Column = withExpr { DateAdd(start.expr, Literal(days)) }
|
||||
def date_add(start: Column, days: Int): Column = date_add(start, lit(days))
|
||||
|
||||
/**
|
||||
* Returns the date that is `days` days after `start`
|
||||
*
|
||||
* @param start A date, timestamp or string. If a string, the data must be in a format that
|
||||
* can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
|
||||
* @param days A column of the number of days to add to `start`, can be negative to subtract days
|
||||
* @return A date, or null if `start` was a string that could not be cast to a date
|
||||
* @group datetime_funcs
|
||||
* @since 3.0.0
|
||||
*/
|
||||
def date_add(start: Column, days: Column): Column = withExpr { DateAdd(start.expr, days.expr) }
|
||||
|
||||
/**
|
||||
* Returns the date that is `days` days before `start`
|
||||
|
@ -2656,7 +2681,20 @@ object functions {
|
|||
* @group datetime_funcs
|
||||
* @since 1.5.0
|
||||
*/
|
||||
def date_sub(start: Column, days: Int): Column = withExpr { DateSub(start.expr, Literal(days)) }
|
||||
def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
|
||||
|
||||
/**
|
||||
* Returns the date that is `days` days before `start`
|
||||
*
|
||||
* @param start A date, timestamp or string. If a string, the data must be in a format that
|
||||
* can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
|
||||
* @param days A column of the number of days to subtract from `start`, can be negative to add
|
||||
* days
|
||||
* @return A date, or null if `start` was a string that could not be cast to a date
|
||||
* @group datetime_funcs
|
||||
* @since 3.0.0
|
||||
*/
|
||||
def date_sub(start: Column, days: Column): Column = withExpr { DateSub(start.expr, days.expr) }
|
||||
|
||||
/**
|
||||
* Returns the number of days from `start` to `end`.
|
||||
|
|
|
@ -239,6 +239,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
|
|||
df.select(date_add(col("ss"), 7)),
|
||||
Seq(Row(Date.valueOf("2015-06-08")), Row(Date.valueOf("2015-06-09"))))
|
||||
|
||||
checkAnswer(
|
||||
df.withColumn("x", lit(1)).select(date_add(col("d"), col("x"))),
|
||||
Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
|
||||
|
||||
checkAnswer(df.selectExpr("DATE_ADD(null, 1)"), Seq(Row(null), Row(null)))
|
||||
checkAnswer(
|
||||
df.selectExpr("""DATE_ADD(d, 1)"""),
|
||||
|
@ -270,6 +274,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
|
|||
checkAnswer(
|
||||
df.select(date_sub(lit(null), 1)).limit(1), Row(null))
|
||||
|
||||
checkAnswer(
|
||||
df.withColumn("x", lit(1)).select(date_sub(col("d"), col("x"))),
|
||||
Seq(Row(Date.valueOf("2015-05-31")), Row(Date.valueOf("2015-06-01"))))
|
||||
|
||||
checkAnswer(df.selectExpr("""DATE_SUB(d, null)"""), Seq(Row(null), Row(null)))
|
||||
checkAnswer(
|
||||
df.selectExpr("""DATE_SUB(d, 1)"""),
|
||||
|
@ -318,6 +326,9 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
|
|||
checkAnswer(
|
||||
df.selectExpr("add_months(d, -1)"),
|
||||
Seq(Row(Date.valueOf("2015-07-31")), Row(Date.valueOf("2015-01-28"))))
|
||||
checkAnswer(
|
||||
df.withColumn("x", lit(1)).select(add_months(col("d"), col("x"))),
|
||||
Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2015-03-28"))))
|
||||
}
|
||||
|
||||
test("function months_between") {
|
||||
|
|
Loading…
Reference in a new issue