[SPARK-35937][SQL] Extracting date field from timestamp should work in ANSI mode

### What changes were proposed in this pull request?

Add a new ANSI type coercion rule: when getting a date field from a Timestamp column, cast the column as Date type.

This is Spark's current hack to make the implementation simple. In the default type coercion rules, the implicit cast rule does the work. However, The ANSI implicit cast rule doesn't allow converting Timestamp type as Date type, so we need to have this additional rule to make sure the date field extraction from Timestamp columns works.

### Why are the changes needed?

Fix a bug.

### Does this PR introduce _any_ user-facing change?

No, the new type coercion rules are not released yet.

### How was this patch tested?

Unit test

Closes #33138 from gengliangwang/fixGetDateField.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
Gengliang Wang 2021-06-30 13:53:51 +08:00
parent 8d28839689
commit ad4b6796f6
5 changed files with 42 additions and 4 deletions

View file

@ -91,7 +91,8 @@ object AnsiTypeCoercion extends TypeCoercionBase {
ImplicitTypeCasts ::
DateTimeOperations ::
WindowFrameCoercion ::
StringLiteralCoercion :: Nil) :: Nil
StringLiteralCoercion ::
GetDateFieldOperations:: Nil) :: Nil
val findTightestCommonType: (DataType, DataType) => Option[DataType] = {
case (t1, t2) if t1 == t2 => Some(t1)
@ -289,4 +290,19 @@ object AnsiTypeCoercion extends TypeCoercionBase {
p.makeCopy(Array(a, newList))
}
}
/**
* When getting a date field from a Timestamp column, cast the column as date type.
*
* This is Spark's hack to make the implementation simple. In the default type coercion rules,
* the implicit cast rule does the work. However, The ANSI implicit cast rule doesn't allow
* converting Timestamp type as Date type, so we need to have this additional rule
* to make sure the date field extraction from Timestamp columns works.
*/
object GetDateFieldOperations extends TypeCoercionRule {
override def transform: PartialFunction[Expression, Expression] = {
case g: GetDateField if g.child.dataType == TimestampType =>
g.withNewChildren(Seq(Cast(g.child, DateType)))
}
}
}

View file

@ -159,6 +159,7 @@ object RuleIdCollection {
// In the production code path, the following rules are run in CombinedTypeCoercionRule, and
// hence we only need to add them for unit testing.
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$PromoteStringLiterals" ::
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$GetDateFieldOperations" ::
"org.apache.spark.sql.catalyst.analysis.DecimalPrecision" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercion$BooleanEquality" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CaseWhenCoercion" ::

View file

@ -1425,4 +1425,14 @@ class AnsiTypeCoercionSuite extends AnalysisTest {
In(timestampLiteral, Seq(stringLiteral)),
In(timestampLiteral, Seq(castStringLiteralAsTimestamp)))
}
test("SPARK-35937: GetDateFieldOperations") {
val ts = Literal(Timestamp.valueOf("2021-01-01 01:30:00"))
Seq(
DayOfYear, Year, YearOfWeek, Quarter, Month, DayOfMonth, DayOfWeek, WeekDay, WeekOfYear
).foreach { operation =>
ruleTest(
AnsiTypeCoercion.GetDateFieldOperations, operation(ts), operation(Cast(ts, DateType)))
}
}
}

View file

@ -256,10 +256,13 @@ SELECT '' AS `54`, d1 as `timestamp`,
date_part( 'minute', d1) AS `minute`, date_part( 'second', d1) AS `second`
FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
-- !query schema
struct<>
struct<54:string,timestamp:timestamp,year:int,month:int,day:int,hour:int,minute:int,second:decimal(8,6)>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'year(spark_catalog.default.timestamp_tbl.d1)' due to data type mismatch: argument 1 requires date type, however, 'spark_catalog.default.timestamp_tbl.d1' is of timestamp type.; line 2 pos 4
1969-12-31 16:00:00 1969 12 31 16 0 0.000000
1997-01-02 00:00:00 1997 1 2 0 0 0.000000
1997-01-02 03:04:05 1997 1 2 3 4 5.000000
1997-02-10 17:32:01 1997 2 10 17 32 1.000000
2001-09-22 18:19:20 2001 9 22 18 19 20.000000
-- !query

View file

@ -4025,6 +4025,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
assert(minuteToSecDF.schema.head.dataType === DayTimeIntervalType(2, 3))
}
test("SPARK-35937: Extract date field from timestamp should work in ANSI mode") {
withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
checkAnswer(sql("select extract(year from to_timestamp('2021-01-02 03:04:05'))"), Row(2021))
checkAnswer(sql("select extract(month from to_timestamp('2021-01-02 03:04:05'))"), Row(1))
checkAnswer(sql("select extract(day from to_timestamp('2021-01-02 03:04:05'))"), Row(2))
}
}
test("SPARK-35545: split SubqueryExpression's children field into outer attributes and " +
"join conditions") {
withView("t") {