[SPARK-31528][SQL] Remove millennium, century, decade from trunc/date_trunc fucntions
### What changes were proposed in this pull request? Similar to https://jira.apache.org/jira/browse/SPARK-31507, millennium, century, and decade are not commonly used in most modern platforms. For example Negative: https://docs.snowflake.com/en/sql-reference/functions-date-time.html#supported-date-and-time-parts https://prestodb.io/docs/current/functions/datetime.html#date_trunc https://teradata.github.io/presto/docs/148t/functions/datetime.html#date_trunc https://www.oracletutorial.com/oracle-date-functions/oracle-trunc/ Positive: https://docs.aws.amazon.com/redshift/latest/dg/r_Dateparts_for_datetime_functions.html https://www.postgresql.org/docs/9.1/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC This PR removes these `fmt`s support for trunc and date_trunc functions. ### Why are the changes needed? clean uncommon datetime unit for easy maintenance, we can add them back if they are found very useful later. ### Does this PR introduce any user-facing change? no, targeting 3.0.0, these are newly added in 3.0.0 ### How was this patch tested? remove and modify existing units tests Closes #28313 from yaooqinn/SPARK-31528. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
caf3ab8411
commit
f92652d0b5
|
@ -1622,7 +1622,15 @@ trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
|
|||
@ExpressionDescription(
|
||||
usage = """
|
||||
_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.
|
||||
`fmt` should be one of ["week", "mon", "month", "mm", "quarter", "year", "yyyy", "yy", "decade", "century", "millennium"]
|
||||
""",
|
||||
arguments = """
|
||||
Arguments:
|
||||
* date - date value or valid date string
|
||||
* fmt - the format representing the unit to be truncated to
|
||||
- "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `date` falls in
|
||||
- "QUARTER" - truncate to the first date of the quarter that the `date` falls in
|
||||
- "MONTH", "MM", "MON" - truncate to the first date of the month that the `date` falls in
|
||||
- "WEEK" - truncate to the Monday of the week that the `date` falls in
|
||||
""",
|
||||
examples = """
|
||||
Examples:
|
||||
|
@ -1634,12 +1642,6 @@ trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
|
|||
2009-02-01
|
||||
> SELECT _FUNC_('2015-10-27', 'YEAR');
|
||||
2015-01-01
|
||||
> SELECT _FUNC_('2015-10-27', 'DECADE');
|
||||
2010-01-01
|
||||
> SELECT _FUNC_('1981-01-19', 'century');
|
||||
1901-01-01
|
||||
> SELECT _FUNC_('1981-01-19', 'millennium');
|
||||
1001-01-01
|
||||
""",
|
||||
group = "datetime_funcs",
|
||||
since = "1.5.0")
|
||||
|
@ -1674,9 +1676,21 @@ case class TruncDate(date: Expression, format: Expression)
|
|||
@ExpressionDescription(
|
||||
usage = """
|
||||
_FUNC_(fmt, ts) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`.
|
||||
`fmt` should be one of ["MILLENNIUM", "CENTURY", "DECADE", "YEAR", "YYYY", "YY",
|
||||
"QUARTER", "MON", "MONTH", "MM", "WEEK", "DAY", "DD",
|
||||
"HOUR", "MINUTE", "SECOND", "MILLISECOND", "MICROSECOND"]
|
||||
""",
|
||||
arguments = """
|
||||
Arguments:
|
||||
* fmt - the format representing the unit to be truncated to
|
||||
- "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `ts` falls in, the time part will be zero out
|
||||
- "QUARTER" - truncate to the first date of the quarter that the `ts` falls in, the time part will be zero out
|
||||
- "MONTH", "MM", "MON" - truncate to the first date of the month that the `ts` falls in, the time part will be zero out
|
||||
- "WEEK" - truncate to the Monday of the week that the `ts` falls in, the time part will be zero out
|
||||
- "DAY", "DD" - zero out the time part
|
||||
- "HOUR" - zero out the minute and second with fraction part
|
||||
- "MINUTE"- zero out the second with fraction part
|
||||
- "SECOND" - zero out the second fraction part
|
||||
- "MILLISECOND" - zero out the microseconds
|
||||
- "MICROSECOND" - everything remains
|
||||
* ts - datetime value or valid timestamp string
|
||||
""",
|
||||
examples = """
|
||||
Examples:
|
||||
|
@ -1690,10 +1704,6 @@ case class TruncDate(date: Expression, format: Expression)
|
|||
2015-03-05 09:00:00
|
||||
> SELECT _FUNC_('MILLISECOND', '2015-03-05T09:32:05.123456');
|
||||
2015-03-05 09:32:05.123
|
||||
> SELECT _FUNC_('DECADE', '2015-03-05T09:32:05.123456');
|
||||
2010-01-01 00:00:00
|
||||
> SELECT _FUNC_('CENTURY', '2015-03-05T09:32:05.123456');
|
||||
2001-01-01 00:00:00
|
||||
""",
|
||||
group = "datetime_funcs",
|
||||
since = "2.3.0")
|
||||
|
|
|
@ -716,40 +716,25 @@ object DateTimeUtils {
|
|||
private[sql] val TRUNC_TO_MINUTE = 3
|
||||
private[sql] val TRUNC_TO_HOUR = 4
|
||||
private[sql] val TRUNC_TO_DAY = 5
|
||||
// The levels from TRUNC_TO_WEEK to TRUNC_TO_MILLENNIUM are used in truncations
|
||||
// The levels from TRUNC_TO_WEEK to TRUNC_TO_YEAR are used in truncations
|
||||
// of DATE and TIMESTAMP values.
|
||||
private[sql] val TRUNC_TO_WEEK = 6
|
||||
private[sql] val MIN_LEVEL_OF_DATE_TRUNC = TRUNC_TO_WEEK
|
||||
private[sql] val TRUNC_TO_MONTH = 7
|
||||
private[sql] val TRUNC_TO_QUARTER = 8
|
||||
private[sql] val TRUNC_TO_YEAR = 9
|
||||
private[sql] val TRUNC_TO_DECADE = 10
|
||||
private[sql] val TRUNC_TO_CENTURY = 11
|
||||
private[sql] val TRUNC_TO_MILLENNIUM = 12
|
||||
|
||||
/**
|
||||
* Returns the trunc date from original date and trunc level.
|
||||
* Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 6.
|
||||
* Trunc level should be generated using `parseTruncLevel()`, should be between 6 and 9.
|
||||
*/
|
||||
def truncDate(d: SQLDate, level: Int): SQLDate = {
|
||||
def truncToYearLevel(divider: Int, adjust: Int): SQLDate = {
|
||||
val oldYear = getYear(d)
|
||||
var newYear = Math.floorDiv(oldYear, divider) * divider
|
||||
if (adjust > 0 && newYear == oldYear) {
|
||||
newYear -= divider
|
||||
}
|
||||
newYear += adjust
|
||||
localDateToDays(LocalDate.of(newYear, 1, 1))
|
||||
}
|
||||
level match {
|
||||
case TRUNC_TO_WEEK => getNextDateForDayOfWeek(d - 7, MONDAY)
|
||||
case TRUNC_TO_MONTH => d - DateTimeUtils.getDayOfMonth(d) + 1
|
||||
case TRUNC_TO_QUARTER =>
|
||||
localDateToDays(daysToLocalDate(d).`with`(IsoFields.DAY_OF_QUARTER, 1L))
|
||||
case TRUNC_TO_YEAR => d - DateTimeUtils.getDayInYear(d) + 1
|
||||
case TRUNC_TO_DECADE => truncToYearLevel(10, 0)
|
||||
case TRUNC_TO_CENTURY => truncToYearLevel(100, 1)
|
||||
case TRUNC_TO_MILLENNIUM => truncToYearLevel(1000, 1)
|
||||
case _ =>
|
||||
// caller make sure that this should never be reached
|
||||
sys.error(s"Invalid trunc level: $level")
|
||||
|
@ -763,7 +748,7 @@ object DateTimeUtils {
|
|||
|
||||
/**
|
||||
* Returns the trunc date time from original date time and trunc level.
|
||||
* Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 12.
|
||||
* Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 9.
|
||||
*/
|
||||
def truncTimestamp(t: SQLTimestamp, level: Int, zoneId: ZoneId): SQLTimestamp = {
|
||||
level match {
|
||||
|
@ -783,7 +768,7 @@ object DateTimeUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the truncate level, could be from TRUNC_TO_MICROSECOND to TRUNC_TO_MILLENNIUM,
|
||||
* Returns the truncate level, could be from TRUNC_TO_MICROSECOND to TRUNC_TO_YEAR,
|
||||
* or TRUNC_INVALID, TRUNC_INVALID means unsupported truncate level.
|
||||
*/
|
||||
def parseTruncLevel(format: UTF8String): Int = {
|
||||
|
@ -801,9 +786,6 @@ object DateTimeUtils {
|
|||
case "MON" | "MONTH" | "MM" => TRUNC_TO_MONTH
|
||||
case "QUARTER" => TRUNC_TO_QUARTER
|
||||
case "YEAR" | "YYYY" | "YY" => TRUNC_TO_YEAR
|
||||
case "DECADE" => TRUNC_TO_DECADE
|
||||
case "CENTURY" => TRUNC_TO_CENTURY
|
||||
case "MILLENNIUM" => TRUNC_TO_MILLENNIUM
|
||||
case _ => TRUNC_INVALID
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
|
|||
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.time.{Instant, LocalDate, LocalDateTime, ZoneId}
|
||||
import java.time.{Instant, LocalDate, ZoneId}
|
||||
import java.util.{Calendar, Locale, TimeZone}
|
||||
import java.util.concurrent.TimeUnit._
|
||||
|
||||
|
@ -27,7 +27,6 @@ import org.apache.spark.{SparkFunSuite, SparkUpgradeException}
|
|||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
|
||||
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
|
||||
import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
|
||||
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.types._
|
||||
|
@ -611,105 +610,102 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) :: Nil)
|
||||
}
|
||||
|
||||
private def testTruncDate(input: Date, fmt: String, expected: Date): Unit = {
|
||||
checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)),
|
||||
expected)
|
||||
checkEvaluation(
|
||||
TruncDate(Literal.create(input, DateType), NonFoldableLiteral.create(fmt, StringType)),
|
||||
expected)
|
||||
}
|
||||
|
||||
test("TruncDate") {
|
||||
def testTrunc(input: Date, fmt: String, expected: Date): Unit = {
|
||||
checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)),
|
||||
expected)
|
||||
checkEvaluation(
|
||||
TruncDate(Literal.create(input, DateType), NonFoldableLiteral.create(fmt, StringType)),
|
||||
expected)
|
||||
}
|
||||
val date = Date.valueOf("2015-07-22")
|
||||
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt =>
|
||||
testTrunc(date, fmt, Date.valueOf("2015-01-01"))
|
||||
testTruncDate(date, fmt, Date.valueOf("2015-01-01"))
|
||||
}
|
||||
Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt =>
|
||||
testTrunc(date, fmt, Date.valueOf("2015-07-01"))
|
||||
testTruncDate(date, fmt, Date.valueOf("2015-07-01"))
|
||||
}
|
||||
testTrunc(date, "DD", null)
|
||||
testTrunc(date, "SECOND", null)
|
||||
testTrunc(date, "HOUR", null)
|
||||
testTrunc(date, null, null)
|
||||
testTrunc(null, "MON", null)
|
||||
testTrunc(null, null, null)
|
||||
testTruncDate(date, "DD", null)
|
||||
testTruncDate(date, "SECOND", null)
|
||||
testTruncDate(date, "HOUR", null)
|
||||
testTruncDate(null, "MON", null)
|
||||
// Test escaping of format
|
||||
GenerateUnsafeProjection.generate(TruncDate(Literal(0, DateType), Literal("\"quote")) :: Nil)
|
||||
}
|
||||
|
||||
testTrunc(Date.valueOf("2000-03-08"), "decade", Date.valueOf("2000-01-01"))
|
||||
testTrunc(Date.valueOf("2000-03-08"), "century", Date.valueOf("1901-01-01"))
|
||||
private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
|
||||
checkEvaluation(
|
||||
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
|
||||
expected)
|
||||
checkEvaluation(
|
||||
TruncTimestamp(
|
||||
NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)),
|
||||
expected)
|
||||
}
|
||||
|
||||
test("TruncTimestamp") {
|
||||
def testTrunc(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
|
||||
checkEvaluation(
|
||||
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
|
||||
expected)
|
||||
checkEvaluation(
|
||||
TruncTimestamp(
|
||||
NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)),
|
||||
expected)
|
||||
}
|
||||
|
||||
withDefaultTimeZone(UTC) {
|
||||
val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")
|
||||
|
||||
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-01-01 00:00:00"))
|
||||
}
|
||||
|
||||
Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-01 00:00:00"))
|
||||
}
|
||||
|
||||
Seq("DAY", "day", "DD", "dd").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-22 00:00:00"))
|
||||
}
|
||||
|
||||
Seq("HOUR", "hour").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-22 05:00:00"))
|
||||
}
|
||||
|
||||
Seq("MINUTE", "minute").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-22 05:30:00"))
|
||||
}
|
||||
|
||||
Seq("SECOND", "second").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-22 05:30:06"))
|
||||
}
|
||||
|
||||
Seq("WEEK", "week").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-20 00:00:00"))
|
||||
}
|
||||
|
||||
Seq("QUARTER", "quarter").foreach { fmt =>
|
||||
testTrunc(
|
||||
testTruncTimestamp(
|
||||
inputDate, fmt,
|
||||
Timestamp.valueOf("2015-07-01 00:00:00"))
|
||||
}
|
||||
|
||||
testTrunc(inputDate, "INVALID", null)
|
||||
testTrunc(inputDate, null, null)
|
||||
testTrunc(null, "MON", null)
|
||||
testTrunc(null, null, null)
|
||||
testTruncTimestamp(null, "MON", null)
|
||||
}
|
||||
}
|
||||
|
||||
testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "decade",
|
||||
Timestamp.valueOf("2000-01-01 00:00:00"))
|
||||
testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "century",
|
||||
Timestamp.valueOf("1901-01-01 00:00:00"))
|
||||
test("unsupported fmt fields for trunc/date_trunc results null") {
|
||||
Seq("INVALID", "decade", "century", "millennium", "whatever", null).foreach { field =>
|
||||
testTruncDate(Date.valueOf("2000-03-08"), field, null)
|
||||
testTruncDate(null, field, null)
|
||||
testTruncTimestamp(Timestamp.valueOf("2000-03-08 11:12:13"), field, null)
|
||||
testTruncTimestamp(null, field, null)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -547,9 +547,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
|
|||
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_MICROSECOND, "2015-03-05T09:32:05.359123", defaultInputTS.get)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_MILLISECOND, "2015-03-05T09:32:05.359", defaultInputTS.get)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_DECADE, "2010-01-01", defaultInputTS.get)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_CENTURY, "2001-01-01", defaultInputTS.get)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_MILLENNIUM, "2001-01-01", defaultInputTS.get)
|
||||
|
||||
for (zid <- ALL_TIMEZONES) {
|
||||
withDefaultTimeZone(zid) {
|
||||
|
@ -580,9 +577,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
|
|||
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, zid)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, zid)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, zid)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_DECADE, "1990-01-01", inputTS5.get, zid)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_CENTURY, "1901-01-01", inputTS5.get, zid)
|
||||
testTrunc(DateTimeUtils.TRUNC_TO_MILLENNIUM, "2001-01-01", inputTS.get, zid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3122,6 +3122,7 @@ object functions {
|
|||
* cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
|
||||
* @param format: 'year', 'yyyy', 'yy' to truncate by year,
|
||||
* or 'month', 'mon', 'mm' to truncate by month
|
||||
* Other options are: 'week', 'quarter'
|
||||
*
|
||||
* @return A date, or null if `date` was a string that could not be cast to a date or `format`
|
||||
* was an invalid value
|
||||
|
@ -3140,7 +3141,8 @@ object functions {
|
|||
* @param format: 'year', 'yyyy', 'yy' to truncate by year,
|
||||
* 'month', 'mon', 'mm' to truncate by month,
|
||||
* 'day', 'dd' to truncate by day,
|
||||
* Other options are: 'second', 'minute', 'hour', 'week', 'month', 'quarter'
|
||||
* Other options are:
|
||||
* 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
|
||||
* @param timestamp A date, timestamp or string. If a string, the data must be in a format that
|
||||
* can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
|
||||
* @return A timestamp, or null if `timestamp` was a string that could not be cast to a timestamp
|
||||
|
|
|
@ -474,10 +474,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
|
|||
checkAnswer(
|
||||
df.selectExpr("trunc(t, 'Month')"),
|
||||
Seq(Row(Date.valueOf("2015-07-01")), Row(Date.valueOf("2014-12-01"))))
|
||||
|
||||
checkAnswer(
|
||||
df.selectExpr("trunc(t, 'decade')"),
|
||||
Seq(Row(Date.valueOf("2010-01-01")), Row(Date.valueOf("2010-01-01"))))
|
||||
}
|
||||
|
||||
test("function date_trunc") {
|
||||
|
@ -529,17 +525,15 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
|
|||
df.selectExpr("date_trunc('MILLISECOND', t)"),
|
||||
Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40.123")),
|
||||
Row(Timestamp.valueOf("2014-12-31 05:29:06.123"))))
|
||||
}
|
||||
|
||||
test("unsupported fmt fields for trunc/date_trunc results null") {
|
||||
Seq("INVALID", "decade", "century", "millennium", "whatever", null).foreach { f =>
|
||||
checkAnswer(
|
||||
df.selectExpr("date_trunc('DECADE', t)"),
|
||||
Seq(Row(Timestamp.valueOf("2010-01-01 00:00:00")),
|
||||
Row(Timestamp.valueOf("2010-01-01 00:00:00"))))
|
||||
|
||||
Seq("century", "millennium").foreach { level =>
|
||||
checkAnswer(
|
||||
df.selectExpr(s"date_trunc('$level', t)"),
|
||||
Seq(Row(Timestamp.valueOf("2001-01-01 00:00:00")),
|
||||
Row(Timestamp.valueOf("2001-01-01 00:00:00"))))
|
||||
Seq(Date.valueOf("2014-12-31"))
|
||||
.toDF("dt")
|
||||
.selectExpr(s"date_trunc('$f', dt)", "trunc(dt, '$f')"),
|
||||
Row(null, null))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue