From 4ddad7906098ccde8d918f42afe856d9fe23b563 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Mon, 19 Aug 2019 11:41:13 +0900 Subject: [PATCH] [SPARK-28598][SQL] Few date time manipulation functions does not provide versions supporting Column as input through the Dataframe API ## What changes were proposed in this pull request? Add following functions: ``` def add_months(startDate: Column, numMonths: Column): Column def date_add(start: Column, days: Column): Column def date_sub(start: Column, days: Column): Column ``` ## How was this patch tested? UT. Please review https://spark.apache.org/contributing.html before opening a pull request. Closes #25334 from WeichenXu123/datefunc_impr. Authored-by: WeichenXu Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/functions.scala | 46 +++++++++++++++++-- .../apache/spark/sql/DateFunctionsSuite.scala | 11 +++++ 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index afafde114a..6b8127bab1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2594,8 +2594,21 @@ object functions { * @group datetime_funcs * @since 1.5.0 */ - def add_months(startDate: Column, numMonths: Int): Column = withExpr { - AddMonths(startDate.expr, Literal(numMonths)) + def add_months(startDate: Column, numMonths: Int): Column = add_months(startDate, lit(numMonths)) + + /** + * Returns the date that is `numMonths` after `startDate`. + * + * @param startDate A date, timestamp or string. If a string, the data must be in a format that + * can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` + * @param numMonths A column of the number of months to add to `startDate`, can be negative to + * subtract months + * @return A date, or null if `startDate` was a string that could not be cast to a date + * @group datetime_funcs + * @since 3.0.0 + */ + def add_months(startDate: Column, numMonths: Column): Column = withExpr { + AddMonths(startDate.expr, numMonths.expr) } /** @@ -2644,7 +2657,19 @@ object functions { * @group datetime_funcs * @since 1.5.0 */ - def date_add(start: Column, days: Int): Column = withExpr { DateAdd(start.expr, Literal(days)) } + def date_add(start: Column, days: Int): Column = date_add(start, lit(days)) + + /** + * Returns the date that is `days` days after `start` + * + * @param start A date, timestamp or string. If a string, the data must be in a format that + * can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` + * @param days A column of the number of days to add to `start`, can be negative to subtract days + * @return A date, or null if `start` was a string that could not be cast to a date + * @group datetime_funcs + * @since 3.0.0 + */ + def date_add(start: Column, days: Column): Column = withExpr { DateAdd(start.expr, days.expr) } /** * Returns the date that is `days` days before `start` @@ -2656,7 +2681,20 @@ object functions { * @group datetime_funcs * @since 1.5.0 */ - def date_sub(start: Column, days: Int): Column = withExpr { DateSub(start.expr, Literal(days)) } + def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days)) + + /** + * Returns the date that is `days` days before `start` + * + * @param start A date, timestamp or string. If a string, the data must be in a format that + * can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` + * @param days A column of the number of days to subtract from `start`, can be negative to add + * days + * @return A date, or null if `start` was a string that could not be cast to a date + * @group datetime_funcs + * @since 3.0.0 + */ + def date_sub(start: Column, days: Column): Column = withExpr { DateSub(start.expr, days.expr) } /** * Returns the number of days from `start` to `end`. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 69f17f5e9c..a92c4177da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -239,6 +239,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.select(date_add(col("ss"), 7)), Seq(Row(Date.valueOf("2015-06-08")), Row(Date.valueOf("2015-06-09")))) + checkAnswer( + df.withColumn("x", lit(1)).select(date_add(col("d"), col("x"))), + Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03")))) + checkAnswer(df.selectExpr("DATE_ADD(null, 1)"), Seq(Row(null), Row(null))) checkAnswer( df.selectExpr("""DATE_ADD(d, 1)"""), @@ -270,6 +274,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { checkAnswer( df.select(date_sub(lit(null), 1)).limit(1), Row(null)) + checkAnswer( + df.withColumn("x", lit(1)).select(date_sub(col("d"), col("x"))), + Seq(Row(Date.valueOf("2015-05-31")), Row(Date.valueOf("2015-06-01")))) + checkAnswer(df.selectExpr("""DATE_SUB(d, null)"""), Seq(Row(null), Row(null))) checkAnswer( df.selectExpr("""DATE_SUB(d, 1)"""), @@ -318,6 +326,9 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { checkAnswer( df.selectExpr("add_months(d, -1)"), Seq(Row(Date.valueOf("2015-07-31")), Row(Date.valueOf("2015-01-28")))) + checkAnswer( + df.withColumn("x", lit(1)).select(add_months(col("d"), col("x"))), + Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2015-03-28")))) } test("function months_between") {