[SPARK-34667][SQL] Support casting of year-month intervals to strings

### What changes were proposed in this pull request?
1. Added new method `toYearMonthIntervalString()` to `IntervalUtils` which converts an year-month interval as a number of month to a string in the form **"INTERVAL '[sign]yearField-monthField' YEAR TO MONTH"**.
2. Extended the `Cast` expression to support casting of `YearMonthIntervalType` to `StringType`.

### Why are the changes needed?
To conform the ANSI SQL standard which requires to support such casting.

### Does this PR introduce _any_ user-facing change?
Should not because new year-month interval has not been released yet.

### How was this patch tested?
Added new tests for casting:
```
$ build/sbt "testOnly *CastSuite*"
```

Closes #32056 from MaxGekk/cast-ym-interval-to-string.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
Max Gekk 2021-04-06 17:59:50 +03:00
parent 19c7d2f3d8
commit 4b5fc1da75
3 changed files with 46 additions and 4 deletions

View file

@ -406,6 +406,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case pudt: PythonUserDefinedType => castToString(pudt.sqlType)
case udt: UserDefinedType[_] =>
buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString))
case YearMonthIntervalType =>
buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i)))
case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
}
@ -1121,6 +1123,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
(c, evPrim, evNull) => {
code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());"
}
case YearMonthIntervalType =>
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
(c, evPrim, _) =>
code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));"""
case _ =>
(c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
}

View file

@ -834,4 +834,21 @@ object IntervalUtils {
* @return The period of months, not null
*/
def monthsToPeriod(months: Int): Period = Period.ofMonths(months).normalized()
/**
* Converts an year-month interval as a number of months to its textual representation
* which conforms to the ANSI SQL standard.
*
* @param months The number of months, positive or negative
* @return Year-month interval string
*/
def toYearMonthIntervalString(months: Int): String = {
var sign = ""
var absMonths: Long = months
if (months < 0) {
sign = "-"
absMonths = -absMonths
}
s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH"
}
}

View file

@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
import java.time.DateTimeException
import java.time.{DateTimeException, Period}
import java.util.{Calendar, TimeZone}
import scala.collection.parallel.immutable.ParVector
@ -64,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
(atomicTypes -- Set(
// TODO(SPARK-34668): Support casting of day-time intervals to strings
DayTimeIntervalType,
// TODO(SPARK-34667): Support casting of year-month intervals to strings
YearMonthIntervalType)).foreach(dt => checkNullCast(dt, StringType))
DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType))
checkNullCast(StringType, BinaryType)
checkNullCast(StringType, BooleanType)
numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@ -799,6 +797,27 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
}
}
}
test("SPARK-34667: cast year-month interval to string") {
Seq(
Period.ofMonths(0) -> "0-0",
Period.ofMonths(1) -> "0-1",
Period.ofMonths(-1) -> "-0-1",
Period.ofYears(1) -> "1-0",
Period.ofYears(-1) -> "-1-0",
Period.ofYears(10).plusMonths(10) -> "10-10",
Period.ofYears(-123).minusMonths(6) -> "-123-6",
Period.ofMonths(Int.MaxValue) -> "178956970-7",
Period.ofMonths(Int.MinValue) -> "-178956970-8"
).foreach { case (period, intervalPayload) =>
checkEvaluation(
Cast(Literal(period), StringType),
s"INTERVAL '$intervalPayload' YEAR TO MONTH")
}
checkConsistencyBetweenInterpretedAndCodegen(
(child: Expression) => Cast(child, StringType), YearMonthIntervalType)
}
}
abstract class AnsiCastSuiteBase extends CastSuiteBase {