[SPARK-33794][SQL] NextDay expression throw runtime IllegalArgumentException when receiving invalid input under ANSI mode

### What changes were proposed in this pull request?

Instead of returning NULL, the next_day function throws runtime IllegalArgumentException when ansiMode is enable and receiving invalid input of the dayOfWeek parameter.

### Why are the changes needed?

For ansiMode.

### Does this PR introduce _any_ user-facing change?

Yes.
When spark.sql.ansi.enabled = true, the next_day function will throw IllegalArgumentException when receiving invalid input of the dayOfWeek parameter.
When spark.sql.ansi.enabled = false, same behaviour as before.

### How was this patch tested?

Ansi mode is tested with existing tests.
End-to-end tests have been added.

Closes #30807 from chongguang/SPARK-33794.

Authored-by: Chongguang LIU <chongguang.liu@laposte.fr>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Chongguang LIU 2021-01-05 05:20:16 +00:00 committed by Wenchen Fan
parent bb6d6b5602
commit 976e97a80d
9 changed files with 220 additions and 35 deletions

View file

@ -156,6 +156,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
- `make_date`: This function should fail with an exception if the result date is invalid.
- `make_timestamp`: This function should fail with an exception if the result timestamp is invalid.
- `make_interval`: This function should fail with an exception if the result interval is invalid.
- `next_day`: This function throws `IllegalArgumentException` if input is not a valid day of week.
### SQL Operators

View file

@ -1162,7 +1162,12 @@ case class LastDay(startDate: Expression)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.",
usage =
"""_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.
The function returns NULL if at least one of the input parameters is NULL.
When both of the input parameters are not NULL and day_of_week is an invalid input,
the function throws IllegalArgumentException if `spark.sql.ansi.enabled` is set to true, otherwise NULL.
""",
examples = """
Examples:
> SELECT _FUNC_('2015-01-14', 'TU');
@ -1171,52 +1176,73 @@ case class LastDay(startDate: Expression)
group = "datetime_funcs",
since = "1.5.0")
// scalastyle:on line.size.limit
case class NextDay(startDate: Expression, dayOfWeek: Expression)
case class NextDay(
startDate: Expression,
dayOfWeek: Expression,
failOnError: Boolean = SQLConf.get.ansiEnabled)
extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
override def left: Expression = startDate
override def right: Expression = dayOfWeek
def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
override def dataType: DataType = DateType
override def nullable: Boolean = true
override def nullSafeEval(start: Any, dayOfW: Any): Any = {
val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String])
if (dow == -1) {
null
} else {
try {
val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String])
val sd = start.asInstanceOf[Int]
DateTimeUtils.getNextDateForDayOfWeek(sd, dow)
} catch {
case _: IllegalArgumentException if !failOnError => null
}
}
private def dateTimeUtilClass: String = DateTimeUtils.getClass.getName.stripSuffix("$")
private def nextDayGenCode(
ev: ExprCode,
dayOfWeekTerm: String,
sd: String,
dowS: String): String = {
if (failOnError) {
s"""
|int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS);
|${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm);
|""".stripMargin
} else {
s"""
|try {
| int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS);
| ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm);
|} catch (IllegalArgumentException e) {
| ${ev.isNull} = true;
|}
|""".stripMargin
}
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (sd, dowS) => {
val dateTimeUtilClass = DateTimeUtils.getClass.getName.stripSuffix("$")
val dayOfWeekTerm = ctx.freshName("dayOfWeek")
if (dayOfWeek.foldable) {
val input = dayOfWeek.eval().asInstanceOf[UTF8String]
if ((input eq null) || DateTimeUtils.getDayOfWeekFromString(input) == -1) {
s"""
|${ev.isNull} = true;
""".stripMargin
if (input eq null) {
s"""${ev.isNull} = true;"""
} else {
val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input)
s"""
|${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue);
""".stripMargin
try {
val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input)
s"${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue);"
} catch {
case _: IllegalArgumentException => nextDayGenCode(ev, dayOfWeekTerm, sd, dowS)
}
}
} else {
s"""
|int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS);
|if ($dayOfWeekTerm == -1) {
| ${ev.isNull} = true;
|} else {
| ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm);
|}
""".stripMargin
nextDayGenCode(ev, dayOfWeekTerm, sd, dowS)
}
})
}

View file

@ -670,9 +670,10 @@ object DateTimeUtils {
private val FRIDAY = 1
private val SATURDAY = 2
/*
/**
* Returns day of week from String. Starting from Thursday, marked as 0.
* (Because 1970-01-01 is Thursday).
* @throws IllegalArgumentException if the input is not a valid day of week.
*/
def getDayOfWeekFromString(string: UTF8String): Int = {
val dowString = string.toString.toUpperCase(Locale.ROOT)
@ -684,7 +685,8 @@ object DateTimeUtils {
case "TH" | "THU" | "THURSDAY" => THURSDAY
case "FR" | "FRI" | "FRIDAY" => FRIDAY
case "SA" | "SAT" | "SATURDAY" => SATURDAY
case _ => -1
case _ =>
throw new IllegalArgumentException(s"""Illegal input for day of week: $string""")
}
}

View file

@ -640,13 +640,33 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
testNextDay("2015-07-23", "Fri", "2015-07-24")
testNextDay("2015-07-23", "fr", "2015-07-24")
checkEvaluation(NextDay(Literal(Date.valueOf("2015-07-23")), Literal("xx")), null)
checkEvaluation(NextDay(Literal.create(null, DateType), Literal("xx")), null)
checkEvaluation(
NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null)
// Test escaping of dayOfWeek
GenerateUnsafeProjection.generate(
NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) :: Nil)
Seq(true, false).foreach { ansiEnabled =>
withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
var expr: Expression = NextDay(Literal(Date.valueOf("2015-07-23")), Literal("xx"))
if (ansiEnabled) {
val errMsg = "Illegal input for day of week: xx"
checkExceptionInExpression[Exception](expr, errMsg)
} else {
checkEvaluation(expr, null)
}
expr = NextDay(Literal.create(null, DateType), Literal("xx"))
checkEvaluation(expr, null)
expr = NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType))
checkEvaluation(expr, null)
// Test escaping of dayOfWeek
expr = NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote"))
GenerateUnsafeProjection.generate(expr :: Nil)
if (ansiEnabled) {
val errMsg = """Illegal input for day of week: "quote"""
checkExceptionInExpression[Exception](expr, errMsg)
} else {
checkEvaluation(expr, null)
}
}
}
}
private def testTruncDate(input: Date, fmt: String, expected: Date): Unit = {

View file

@ -675,4 +675,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
assert(toDate("tomorrow CET ", zoneId).get === today + 1)
}
}
test("parsing day of week") {
assert(getDayOfWeekFromString(UTF8String.fromString("THU")) == 0)
assert(getDayOfWeekFromString(UTF8String.fromString("MONDAY")) == 4)
intercept[IllegalArgumentException](getDayOfWeekFromString(UTF8String.fromString("xx")))
intercept[IllegalArgumentException](getDayOfWeekFromString(UTF8String.fromString("\"quote")))
}
}

View file

@ -172,3 +172,10 @@ select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
select cast("Unparseable" as timestamp);
select cast("Unparseable" as date);
-- next_day
select next_day("2015-07-23", "Mon");
select next_day("2015-07-23", "xx");
select next_day("xx", "Mon");
select next_day(null, "Mon");
select next_day(null, "xx");

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 123
-- Number of queries: 128
-- !query
@ -1069,3 +1069,45 @@ struct<>
-- !query output
java.time.DateTimeException
Cannot cast Unparseable to DateType.
-- !query
select next_day("2015-07-23", "Mon")
-- !query schema
struct<next_day(CAST(2015-07-23 AS DATE), Mon):date>
-- !query output
2015-07-27
-- !query
select next_day("2015-07-23", "xx")
-- !query schema
struct<>
-- !query output
java.lang.IllegalArgumentException
Illegal input for day of week: xx
-- !query
select next_day("xx", "Mon")
-- !query schema
struct<>
-- !query output
java.time.DateTimeException
Cannot cast xx to DateType.
-- !query
select next_day(null, "Mon")
-- !query schema
struct<next_day(CAST(NULL AS DATE), Mon):date>
-- !query output
NULL
-- !query
select next_day(null, "xx")
-- !query schema
struct<next_day(CAST(NULL AS DATE), xx):date>
-- !query output
NULL

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 123
-- Number of queries: 128
-- !query
@ -1021,3 +1021,43 @@ select cast("Unparseable" as date)
struct<CAST(Unparseable AS DATE):date>
-- !query output
NULL
-- !query
select next_day("2015-07-23", "Mon")
-- !query schema
struct<next_day(CAST(2015-07-23 AS DATE), Mon):date>
-- !query output
2015-07-27
-- !query
select next_day("2015-07-23", "xx")
-- !query schema
struct<next_day(CAST(2015-07-23 AS DATE), xx):date>
-- !query output
NULL
-- !query
select next_day("xx", "Mon")
-- !query schema
struct<next_day(CAST(xx AS DATE), Mon):date>
-- !query output
NULL
-- !query
select next_day(null, "Mon")
-- !query schema
struct<next_day(CAST(NULL AS DATE), Mon):date>
-- !query output
NULL
-- !query
select next_day(null, "xx")
-- !query schema
struct<next_day(CAST(NULL AS DATE), xx):date>
-- !query output
NULL

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 123
-- Number of queries: 128
-- !query
@ -1029,3 +1029,43 @@ select cast("Unparseable" as date)
struct<CAST(Unparseable AS DATE):date>
-- !query output
NULL
-- !query
select next_day("2015-07-23", "Mon")
-- !query schema
struct<next_day(CAST(2015-07-23 AS DATE), Mon):date>
-- !query output
2015-07-27
-- !query
select next_day("2015-07-23", "xx")
-- !query schema
struct<next_day(CAST(2015-07-23 AS DATE), xx):date>
-- !query output
NULL
-- !query
select next_day("xx", "Mon")
-- !query schema
struct<next_day(CAST(xx AS DATE), Mon):date>
-- !query output
NULL
-- !query
select next_day(null, "Mon")
-- !query schema
struct<next_day(CAST(NULL AS DATE), Mon):date>
-- !query output
NULL
-- !query
select next_day(null, "xx")
-- !query schema
struct<next_day(CAST(NULL AS DATE), xx):date>
-- !query output
NULL