[SPARK-35937][SQL] Extracting date field from timestamp should work in ANSI mode
### What changes were proposed in this pull request? Add a new ANSI type coercion rule: when getting a date field from a Timestamp column, cast the column as Date type. This is Spark's current hack to make the implementation simple. In the default type coercion rules, the implicit cast rule does the work. However, The ANSI implicit cast rule doesn't allow converting Timestamp type as Date type, so we need to have this additional rule to make sure the date field extraction from Timestamp columns works. ### Why are the changes needed? Fix a bug. ### Does this PR introduce _any_ user-facing change? No, the new type coercion rules are not released yet. ### How was this patch tested? Unit test Closes #33138 from gengliangwang/fixGetDateField. Authored-by: Gengliang Wang <gengliang@apache.org> Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
parent
8d28839689
commit
ad4b6796f6
|
@ -91,7 +91,8 @@ object AnsiTypeCoercion extends TypeCoercionBase {
|
|||
ImplicitTypeCasts ::
|
||||
DateTimeOperations ::
|
||||
WindowFrameCoercion ::
|
||||
StringLiteralCoercion :: Nil) :: Nil
|
||||
StringLiteralCoercion ::
|
||||
GetDateFieldOperations:: Nil) :: Nil
|
||||
|
||||
val findTightestCommonType: (DataType, DataType) => Option[DataType] = {
|
||||
case (t1, t2) if t1 == t2 => Some(t1)
|
||||
|
@ -289,4 +290,19 @@ object AnsiTypeCoercion extends TypeCoercionBase {
|
|||
p.makeCopy(Array(a, newList))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* When getting a date field from a Timestamp column, cast the column as date type.
|
||||
*
|
||||
* This is Spark's hack to make the implementation simple. In the default type coercion rules,
|
||||
* the implicit cast rule does the work. However, The ANSI implicit cast rule doesn't allow
|
||||
* converting Timestamp type as Date type, so we need to have this additional rule
|
||||
* to make sure the date field extraction from Timestamp columns works.
|
||||
*/
|
||||
object GetDateFieldOperations extends TypeCoercionRule {
|
||||
override def transform: PartialFunction[Expression, Expression] = {
|
||||
case g: GetDateField if g.child.dataType == TimestampType =>
|
||||
g.withNewChildren(Seq(Cast(g.child, DateType)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,6 +159,7 @@ object RuleIdCollection {
|
|||
// In the production code path, the following rules are run in CombinedTypeCoercionRule, and
|
||||
// hence we only need to add them for unit testing.
|
||||
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$PromoteStringLiterals" ::
|
||||
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$GetDateFieldOperations" ::
|
||||
"org.apache.spark.sql.catalyst.analysis.DecimalPrecision" ::
|
||||
"org.apache.spark.sql.catalyst.analysis.TypeCoercion$BooleanEquality" ::
|
||||
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CaseWhenCoercion" ::
|
||||
|
|
|
@ -1425,4 +1425,14 @@ class AnsiTypeCoercionSuite extends AnalysisTest {
|
|||
In(timestampLiteral, Seq(stringLiteral)),
|
||||
In(timestampLiteral, Seq(castStringLiteralAsTimestamp)))
|
||||
}
|
||||
|
||||
test("SPARK-35937: GetDateFieldOperations") {
|
||||
val ts = Literal(Timestamp.valueOf("2021-01-01 01:30:00"))
|
||||
Seq(
|
||||
DayOfYear, Year, YearOfWeek, Quarter, Month, DayOfMonth, DayOfWeek, WeekDay, WeekOfYear
|
||||
).foreach { operation =>
|
||||
ruleTest(
|
||||
AnsiTypeCoercion.GetDateFieldOperations, operation(ts), operation(Cast(ts, DateType)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -256,10 +256,13 @@ SELECT '' AS `54`, d1 as `timestamp`,
|
|||
date_part( 'minute', d1) AS `minute`, date_part( 'second', d1) AS `second`
|
||||
FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
|
||||
-- !query schema
|
||||
struct<>
|
||||
struct<54:string,timestamp:timestamp,year:int,month:int,day:int,hour:int,minute:int,second:decimal(8,6)>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'year(spark_catalog.default.timestamp_tbl.d1)' due to data type mismatch: argument 1 requires date type, however, 'spark_catalog.default.timestamp_tbl.d1' is of timestamp type.; line 2 pos 4
|
||||
1969-12-31 16:00:00 1969 12 31 16 0 0.000000
|
||||
1997-01-02 00:00:00 1997 1 2 0 0 0.000000
|
||||
1997-01-02 03:04:05 1997 1 2 3 4 5.000000
|
||||
1997-02-10 17:32:01 1997 2 10 17 32 1.000000
|
||||
2001-09-22 18:19:20 2001 9 22 18 19 20.000000
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
|
@ -4025,6 +4025,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
|
|||
assert(minuteToSecDF.schema.head.dataType === DayTimeIntervalType(2, 3))
|
||||
}
|
||||
|
||||
test("SPARK-35937: Extract date field from timestamp should work in ANSI mode") {
|
||||
withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
|
||||
checkAnswer(sql("select extract(year from to_timestamp('2021-01-02 03:04:05'))"), Row(2021))
|
||||
checkAnswer(sql("select extract(month from to_timestamp('2021-01-02 03:04:05'))"), Row(1))
|
||||
checkAnswer(sql("select extract(day from to_timestamp('2021-01-02 03:04:05'))"), Row(2))
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-35545: split SubqueryExpression's children field into outer attributes and " +
|
||||
"join conditions") {
|
||||
withView("t") {
|
||||
|
|
Loading…
Reference in a new issue