diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala index 3732b2eb0e..b5c7f27ce1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala @@ -91,7 +91,8 @@ object AnsiTypeCoercion extends TypeCoercionBase { ImplicitTypeCasts :: DateTimeOperations :: WindowFrameCoercion :: - StringLiteralCoercion :: Nil) :: Nil + StringLiteralCoercion :: + GetDateFieldOperations:: Nil) :: Nil val findTightestCommonType: (DataType, DataType) => Option[DataType] = { case (t1, t2) if t1 == t2 => Some(t1) @@ -289,4 +290,19 @@ object AnsiTypeCoercion extends TypeCoercionBase { p.makeCopy(Array(a, newList)) } } + + /** + * When getting a date field from a Timestamp column, cast the column as date type. + * + * This is Spark's hack to make the implementation simple. In the default type coercion rules, + * the implicit cast rule does the work. However, The ANSI implicit cast rule doesn't allow + * converting Timestamp type as Date type, so we need to have this additional rule + * to make sure the date field extraction from Timestamp columns works. + */ + object GetDateFieldOperations extends TypeCoercionRule { + override def transform: PartialFunction[Expression, Expression] = { + case g: GetDateField if g.child.dataType == TimestampType => + g.withNewChildren(Seq(Cast(g.child, DateType))) + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala index 3fe7be03ec..2a05b8533b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala @@ -159,6 +159,7 @@ object RuleIdCollection { // In the production code path, the following rules are run in CombinedTypeCoercionRule, and // hence we only need to add them for unit testing. "org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$PromoteStringLiterals" :: + "org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$GetDateFieldOperations" :: "org.apache.spark.sql.catalyst.analysis.DecimalPrecision" :: "org.apache.spark.sql.catalyst.analysis.TypeCoercion$BooleanEquality" :: "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CaseWhenCoercion" :: diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala index 03d885cd43..ab8d9d9806 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala @@ -1425,4 +1425,14 @@ class AnsiTypeCoercionSuite extends AnalysisTest { In(timestampLiteral, Seq(stringLiteral)), In(timestampLiteral, Seq(castStringLiteralAsTimestamp))) } + + test("SPARK-35937: GetDateFieldOperations") { + val ts = Literal(Timestamp.valueOf("2021-01-01 01:30:00")) + Seq( + DayOfYear, Year, YearOfWeek, Quarter, Month, DayOfMonth, DayOfWeek, WeekDay, WeekOfYear + ).foreach { operation => + ruleTest( + AnsiTypeCoercion.GetDateFieldOperations, operation(ts), operation(Cast(ts, DateType))) + } + } } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out index 9847386d76..5068a37130 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out @@ -256,10 +256,13 @@ SELECT '' AS `54`, d1 as `timestamp`, date_part( 'minute', d1) AS `minute`, date_part( 'second', d1) AS `second` FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01' -- !query schema -struct<> +struct<54:string,timestamp:timestamp,year:int,month:int,day:int,hour:int,minute:int,second:decimal(8,6)> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'year(spark_catalog.default.timestamp_tbl.d1)' due to data type mismatch: argument 1 requires date type, however, 'spark_catalog.default.timestamp_tbl.d1' is of timestamp type.; line 2 pos 4 + 1969-12-31 16:00:00 1969 12 31 16 0 0.000000 + 1997-01-02 00:00:00 1997 1 2 0 0 0.000000 + 1997-01-02 03:04:05 1997 1 2 3 4 5.000000 + 1997-02-10 17:32:01 1997 2 10 17 32 1.000000 + 2001-09-22 18:19:20 2001 9 22 18 19 20.000000 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 6dbffd7a8b..867738d418 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4025,6 +4025,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark assert(minuteToSecDF.schema.head.dataType === DayTimeIntervalType(2, 3)) } + test("SPARK-35937: Extract date field from timestamp should work in ANSI mode") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + checkAnswer(sql("select extract(year from to_timestamp('2021-01-02 03:04:05'))"), Row(2021)) + checkAnswer(sql("select extract(month from to_timestamp('2021-01-02 03:04:05'))"), Row(1)) + checkAnswer(sql("select extract(day from to_timestamp('2021-01-02 03:04:05'))"), Row(2)) + } + } + test("SPARK-35545: split SubqueryExpression's children field into outer attributes and " + "join conditions") { withView("t") {