[SPARK-31528][SQL] Remove millennium, century, decade from trunc/date_trunc fucntions

### What changes were proposed in this pull request?

Similar to https://jira.apache.org/jira/browse/SPARK-31507, millennium, century, and decade are not commonly used in most modern platforms.

For example
Negative:
https://docs.snowflake.com/en/sql-reference/functions-date-time.html#supported-date-and-time-parts
https://prestodb.io/docs/current/functions/datetime.html#date_trunc
https://teradata.github.io/presto/docs/148t/functions/datetime.html#date_trunc
https://www.oracletutorial.com/oracle-date-functions/oracle-trunc/

Positive:
https://docs.aws.amazon.com/redshift/latest/dg/r_Dateparts_for_datetime_functions.html
https://www.postgresql.org/docs/9.1/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC

This PR removes these `fmt`s support for trunc and date_trunc functions.

### Why are the changes needed?

clean uncommon datetime unit for easy maintenance, we can add them back if they are found very useful later.

### Does this PR introduce any user-facing change?
no, targeting 3.0.0, these are newly added in 3.0.0

### How was this patch tested?

remove and modify existing units tests

Closes #28313 from yaooqinn/SPARK-31528.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
Kent Yao 2020-04-24 18:28:41 -07:00 committed by Dongjoon Hyun
parent caf3ab8411
commit f92652d0b5
No known key found for this signature in database
GPG key ID: EDA00CE834F0FC5C
6 changed files with 79 additions and 101 deletions

View file

@ -1622,7 +1622,15 @@ trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
@ExpressionDescription(
usage = """
_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.
`fmt` should be one of ["week", "mon", "month", "mm", "quarter", "year", "yyyy", "yy", "decade", "century", "millennium"]
""",
arguments = """
Arguments:
* date - date value or valid date string
* fmt - the format representing the unit to be truncated to
- "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `date` falls in
- "QUARTER" - truncate to the first date of the quarter that the `date` falls in
- "MONTH", "MM", "MON" - truncate to the first date of the month that the `date` falls in
- "WEEK" - truncate to the Monday of the week that the `date` falls in
""",
examples = """
Examples:
@ -1634,12 +1642,6 @@ trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
2009-02-01
> SELECT _FUNC_('2015-10-27', 'YEAR');
2015-01-01
> SELECT _FUNC_('2015-10-27', 'DECADE');
2010-01-01
> SELECT _FUNC_('1981-01-19', 'century');
1901-01-01
> SELECT _FUNC_('1981-01-19', 'millennium');
1001-01-01
""",
group = "datetime_funcs",
since = "1.5.0")
@ -1674,9 +1676,21 @@ case class TruncDate(date: Expression, format: Expression)
@ExpressionDescription(
usage = """
_FUNC_(fmt, ts) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`.
`fmt` should be one of ["MILLENNIUM", "CENTURY", "DECADE", "YEAR", "YYYY", "YY",
"QUARTER", "MON", "MONTH", "MM", "WEEK", "DAY", "DD",
"HOUR", "MINUTE", "SECOND", "MILLISECOND", "MICROSECOND"]
""",
arguments = """
Arguments:
* fmt - the format representing the unit to be truncated to
- "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `ts` falls in, the time part will be zero out
- "QUARTER" - truncate to the first date of the quarter that the `ts` falls in, the time part will be zero out
- "MONTH", "MM", "MON" - truncate to the first date of the month that the `ts` falls in, the time part will be zero out
- "WEEK" - truncate to the Monday of the week that the `ts` falls in, the time part will be zero out
- "DAY", "DD" - zero out the time part
- "HOUR" - zero out the minute and second with fraction part
- "MINUTE"- zero out the second with fraction part
- "SECOND" - zero out the second fraction part
- "MILLISECOND" - zero out the microseconds
- "MICROSECOND" - everything remains
* ts - datetime value or valid timestamp string
""",
examples = """
Examples:
@ -1690,10 +1704,6 @@ case class TruncDate(date: Expression, format: Expression)
2015-03-05 09:00:00
> SELECT _FUNC_('MILLISECOND', '2015-03-05T09:32:05.123456');
2015-03-05 09:32:05.123
> SELECT _FUNC_('DECADE', '2015-03-05T09:32:05.123456');
2010-01-01 00:00:00
> SELECT _FUNC_('CENTURY', '2015-03-05T09:32:05.123456');
2001-01-01 00:00:00
""",
group = "datetime_funcs",
since = "2.3.0")

View file

@ -716,40 +716,25 @@ object DateTimeUtils {
private[sql] val TRUNC_TO_MINUTE = 3
private[sql] val TRUNC_TO_HOUR = 4
private[sql] val TRUNC_TO_DAY = 5
// The levels from TRUNC_TO_WEEK to TRUNC_TO_MILLENNIUM are used in truncations
// The levels from TRUNC_TO_WEEK to TRUNC_TO_YEAR are used in truncations
// of DATE and TIMESTAMP values.
private[sql] val TRUNC_TO_WEEK = 6
private[sql] val MIN_LEVEL_OF_DATE_TRUNC = TRUNC_TO_WEEK
private[sql] val TRUNC_TO_MONTH = 7
private[sql] val TRUNC_TO_QUARTER = 8
private[sql] val TRUNC_TO_YEAR = 9
private[sql] val TRUNC_TO_DECADE = 10
private[sql] val TRUNC_TO_CENTURY = 11
private[sql] val TRUNC_TO_MILLENNIUM = 12
/**
* Returns the trunc date from original date and trunc level.
* Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 6.
* Trunc level should be generated using `parseTruncLevel()`, should be between 6 and 9.
*/
def truncDate(d: SQLDate, level: Int): SQLDate = {
def truncToYearLevel(divider: Int, adjust: Int): SQLDate = {
val oldYear = getYear(d)
var newYear = Math.floorDiv(oldYear, divider) * divider
if (adjust > 0 && newYear == oldYear) {
newYear -= divider
}
newYear += adjust
localDateToDays(LocalDate.of(newYear, 1, 1))
}
level match {
case TRUNC_TO_WEEK => getNextDateForDayOfWeek(d - 7, MONDAY)
case TRUNC_TO_MONTH => d - DateTimeUtils.getDayOfMonth(d) + 1
case TRUNC_TO_QUARTER =>
localDateToDays(daysToLocalDate(d).`with`(IsoFields.DAY_OF_QUARTER, 1L))
case TRUNC_TO_YEAR => d - DateTimeUtils.getDayInYear(d) + 1
case TRUNC_TO_DECADE => truncToYearLevel(10, 0)
case TRUNC_TO_CENTURY => truncToYearLevel(100, 1)
case TRUNC_TO_MILLENNIUM => truncToYearLevel(1000, 1)
case _ =>
// caller make sure that this should never be reached
sys.error(s"Invalid trunc level: $level")
@ -763,7 +748,7 @@ object DateTimeUtils {
/**
* Returns the trunc date time from original date time and trunc level.
* Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 12.
* Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 9.
*/
def truncTimestamp(t: SQLTimestamp, level: Int, zoneId: ZoneId): SQLTimestamp = {
level match {
@ -783,7 +768,7 @@ object DateTimeUtils {
}
/**
* Returns the truncate level, could be from TRUNC_TO_MICROSECOND to TRUNC_TO_MILLENNIUM,
* Returns the truncate level, could be from TRUNC_TO_MICROSECOND to TRUNC_TO_YEAR,
* or TRUNC_INVALID, TRUNC_INVALID means unsupported truncate level.
*/
def parseTruncLevel(format: UTF8String): Int = {
@ -801,9 +786,6 @@ object DateTimeUtils {
case "MON" | "MONTH" | "MM" => TRUNC_TO_MONTH
case "QUARTER" => TRUNC_TO_QUARTER
case "YEAR" | "YYYY" | "YY" => TRUNC_TO_YEAR
case "DECADE" => TRUNC_TO_DECADE
case "CENTURY" => TRUNC_TO_CENTURY
case "MILLENNIUM" => TRUNC_TO_MILLENNIUM
case _ => TRUNC_INVALID
}
}

View file

@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
import java.text.SimpleDateFormat
import java.time.{Instant, LocalDate, LocalDateTime, ZoneId}
import java.time.{Instant, LocalDate, ZoneId}
import java.util.{Calendar, Locale, TimeZone}
import java.util.concurrent.TimeUnit._
@ -27,7 +27,6 @@ import org.apache.spark.{SparkFunSuite, SparkUpgradeException}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
@ -611,105 +610,102 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) :: Nil)
}
private def testTruncDate(input: Date, fmt: String, expected: Date): Unit = {
checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)),
expected)
checkEvaluation(
TruncDate(Literal.create(input, DateType), NonFoldableLiteral.create(fmt, StringType)),
expected)
}
test("TruncDate") {
def testTrunc(input: Date, fmt: String, expected: Date): Unit = {
checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)),
expected)
checkEvaluation(
TruncDate(Literal.create(input, DateType), NonFoldableLiteral.create(fmt, StringType)),
expected)
}
val date = Date.valueOf("2015-07-22")
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt =>
testTrunc(date, fmt, Date.valueOf("2015-01-01"))
testTruncDate(date, fmt, Date.valueOf("2015-01-01"))
}
Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt =>
testTrunc(date, fmt, Date.valueOf("2015-07-01"))
testTruncDate(date, fmt, Date.valueOf("2015-07-01"))
}
testTrunc(date, "DD", null)
testTrunc(date, "SECOND", null)
testTrunc(date, "HOUR", null)
testTrunc(date, null, null)
testTrunc(null, "MON", null)
testTrunc(null, null, null)
testTruncDate(date, "DD", null)
testTruncDate(date, "SECOND", null)
testTruncDate(date, "HOUR", null)
testTruncDate(null, "MON", null)
// Test escaping of format
GenerateUnsafeProjection.generate(TruncDate(Literal(0, DateType), Literal("\"quote")) :: Nil)
}
testTrunc(Date.valueOf("2000-03-08"), "decade", Date.valueOf("2000-01-01"))
testTrunc(Date.valueOf("2000-03-08"), "century", Date.valueOf("1901-01-01"))
private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
checkEvaluation(
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
checkEvaluation(
TruncTimestamp(
NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
}
test("TruncTimestamp") {
def testTrunc(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
checkEvaluation(
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
checkEvaluation(
TruncTimestamp(
NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
}
withDefaultTimeZone(UTC) {
val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-01-01 00:00:00"))
}
Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-01 00:00:00"))
}
Seq("DAY", "day", "DD", "dd").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-22 00:00:00"))
}
Seq("HOUR", "hour").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-22 05:00:00"))
}
Seq("MINUTE", "minute").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-22 05:30:00"))
}
Seq("SECOND", "second").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-22 05:30:06"))
}
Seq("WEEK", "week").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-20 00:00:00"))
}
Seq("QUARTER", "quarter").foreach { fmt =>
testTrunc(
testTruncTimestamp(
inputDate, fmt,
Timestamp.valueOf("2015-07-01 00:00:00"))
}
testTrunc(inputDate, "INVALID", null)
testTrunc(inputDate, null, null)
testTrunc(null, "MON", null)
testTrunc(null, null, null)
testTruncTimestamp(null, "MON", null)
}
}
testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "decade",
Timestamp.valueOf("2000-01-01 00:00:00"))
testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "century",
Timestamp.valueOf("1901-01-01 00:00:00"))
test("unsupported fmt fields for trunc/date_trunc results null") {
Seq("INVALID", "decade", "century", "millennium", "whatever", null).foreach { field =>
testTruncDate(Date.valueOf("2000-03-08"), field, null)
testTruncDate(null, field, null)
testTruncTimestamp(Timestamp.valueOf("2000-03-08 11:12:13"), field, null)
testTruncTimestamp(null, field, null)
}
}

View file

@ -547,9 +547,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get)
testTrunc(DateTimeUtils.TRUNC_TO_MICROSECOND, "2015-03-05T09:32:05.359123", defaultInputTS.get)
testTrunc(DateTimeUtils.TRUNC_TO_MILLISECOND, "2015-03-05T09:32:05.359", defaultInputTS.get)
testTrunc(DateTimeUtils.TRUNC_TO_DECADE, "2010-01-01", defaultInputTS.get)
testTrunc(DateTimeUtils.TRUNC_TO_CENTURY, "2001-01-01", defaultInputTS.get)
testTrunc(DateTimeUtils.TRUNC_TO_MILLENNIUM, "2001-01-01", defaultInputTS.get)
for (zid <- ALL_TIMEZONES) {
withDefaultTimeZone(zid) {
@ -580,9 +577,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, zid)
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, zid)
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, zid)
testTrunc(DateTimeUtils.TRUNC_TO_DECADE, "1990-01-01", inputTS5.get, zid)
testTrunc(DateTimeUtils.TRUNC_TO_CENTURY, "1901-01-01", inputTS5.get, zid)
testTrunc(DateTimeUtils.TRUNC_TO_MILLENNIUM, "2001-01-01", inputTS.get, zid)
}
}
}

View file

@ -3122,6 +3122,7 @@ object functions {
* cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param format: 'year', 'yyyy', 'yy' to truncate by year,
* or 'month', 'mon', 'mm' to truncate by month
* Other options are: 'week', 'quarter'
*
* @return A date, or null if `date` was a string that could not be cast to a date or `format`
* was an invalid value
@ -3140,7 +3141,8 @@ object functions {
* @param format: 'year', 'yyyy', 'yy' to truncate by year,
* 'month', 'mon', 'mm' to truncate by month,
* 'day', 'dd' to truncate by day,
* Other options are: 'second', 'minute', 'hour', 'week', 'month', 'quarter'
* Other options are:
* 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
* @param timestamp A date, timestamp or string. If a string, the data must be in a format that
* can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @return A timestamp, or null if `timestamp` was a string that could not be cast to a timestamp

View file

@ -474,10 +474,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
checkAnswer(
df.selectExpr("trunc(t, 'Month')"),
Seq(Row(Date.valueOf("2015-07-01")), Row(Date.valueOf("2014-12-01"))))
checkAnswer(
df.selectExpr("trunc(t, 'decade')"),
Seq(Row(Date.valueOf("2010-01-01")), Row(Date.valueOf("2010-01-01"))))
}
test("function date_trunc") {
@ -529,17 +525,15 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
df.selectExpr("date_trunc('MILLISECOND', t)"),
Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40.123")),
Row(Timestamp.valueOf("2014-12-31 05:29:06.123"))))
}
test("unsupported fmt fields for trunc/date_trunc results null") {
Seq("INVALID", "decade", "century", "millennium", "whatever", null).foreach { f =>
checkAnswer(
df.selectExpr("date_trunc('DECADE', t)"),
Seq(Row(Timestamp.valueOf("2010-01-01 00:00:00")),
Row(Timestamp.valueOf("2010-01-01 00:00:00"))))
Seq("century", "millennium").foreach { level =>
checkAnswer(
df.selectExpr(s"date_trunc('$level', t)"),
Seq(Row(Timestamp.valueOf("2001-01-01 00:00:00")),
Row(Timestamp.valueOf("2001-01-01 00:00:00"))))
Seq(Date.valueOf("2014-12-31"))
.toDF("dt")
.selectExpr(s"date_trunc('$f', dt)", "trunc(dt, '$f')"),
Row(null, null))
}
}