From 4b5fc1da752ec008468ef80a5717c8beab468387 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 6 Apr 2021 17:59:50 +0300 Subject: [PATCH] [SPARK-34667][SQL] Support casting of year-month intervals to strings ### What changes were proposed in this pull request? 1. Added new method `toYearMonthIntervalString()` to `IntervalUtils` which converts an year-month interval as a number of month to a string in the form **"INTERVAL '[sign]yearField-monthField' YEAR TO MONTH"**. 2. Extended the `Cast` expression to support casting of `YearMonthIntervalType` to `StringType`. ### Why are the changes needed? To conform the ANSI SQL standard which requires to support such casting. ### Does this PR introduce _any_ user-facing change? Should not because new year-month interval has not been released yet. ### How was this patch tested? Added new tests for casting: ``` $ build/sbt "testOnly *CastSuite*" ``` Closes #32056 from MaxGekk/cast-ym-interval-to-string. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../spark/sql/catalyst/expressions/Cast.scala | 6 +++++ .../sql/catalyst/util/IntervalUtils.scala | 17 ++++++++++++ .../sql/catalyst/expressions/CastSuite.scala | 27 ++++++++++++++++--- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 6b18563f36..1c37713204 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -406,6 +406,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case pudt: PythonUserDefinedType => castToString(pudt.sqlType) case udt: UserDefinedType[_] => buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString)) + case YearMonthIntervalType => + buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i))) case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString)) } @@ -1121,6 +1123,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => { code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());" } + case YearMonthIntervalType => + val iu = IntervalUtils.getClass.getName.stripSuffix("$") + (c, evPrim, _) => + code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));""" case _ => (c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 58a52475b6..8cd9d28154 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -834,4 +834,21 @@ object IntervalUtils { * @return The period of months, not null */ def monthsToPeriod(months: Int): Period = Period.ofMonths(months).normalized() + + /** + * Converts an year-month interval as a number of months to its textual representation + * which conforms to the ANSI SQL standard. + * + * @param months The number of months, positive or negative + * @return Year-month interval string + */ + def toYearMonthIntervalString(months: Int): String = { + var sign = "" + var absMonths: Long = months + if (months < 0) { + sign = "-" + absMonths = -absMonths + } + s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH" + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 3a79e8d383..547bf88767 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} -import java.time.DateTimeException +import java.time.{DateTimeException, Period} import java.util.{Calendar, TimeZone} import scala.collection.parallel.immutable.ParVector @@ -64,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { atomicTypes.foreach(dt => checkNullCast(NullType, dt)) (atomicTypes -- Set( // TODO(SPARK-34668): Support casting of day-time intervals to strings - DayTimeIntervalType, - // TODO(SPARK-34667): Support casting of year-month intervals to strings - YearMonthIntervalType)).foreach(dt => checkNullCast(dt, StringType)) + DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType)) checkNullCast(StringType, BinaryType) checkNullCast(StringType, BooleanType) numericTypes.foreach(dt => checkNullCast(dt, BooleanType)) @@ -799,6 +797,27 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-34667: cast year-month interval to string") { + Seq( + Period.ofMonths(0) -> "0-0", + Period.ofMonths(1) -> "0-1", + Period.ofMonths(-1) -> "-0-1", + Period.ofYears(1) -> "1-0", + Period.ofYears(-1) -> "-1-0", + Period.ofYears(10).plusMonths(10) -> "10-10", + Period.ofYears(-123).minusMonths(6) -> "-123-6", + Period.ofMonths(Int.MaxValue) -> "178956970-7", + Period.ofMonths(Int.MinValue) -> "-178956970-8" + ).foreach { case (period, intervalPayload) => + checkEvaluation( + Cast(Literal(period), StringType), + s"INTERVAL '$intervalPayload' YEAR TO MONTH") + } + + checkConsistencyBetweenInterpretedAndCodegen( + (child: Expression) => Cast(child, StringType), YearMonthIntervalType) + } } abstract class AnsiCastSuiteBase extends CastSuiteBase {