[SPARK-35698][SQL] Support casting of timestamp without time zone to strings
### What changes were proposed in this pull request? Extend the Cast expression and support TimestampWithoutTZType in casting to StringType. ### Why are the changes needed? To conform the ANSI SQL standard which requires to support such casting. ### Does this PR introduce _any_ user-facing change? No, the new timestamp type is not released yet. ### How was this patch tested? Unit test Closes #32846 from gengliangwang/tswtzToString. Authored-by: Gengliang Wang <gengliang@apache.org> Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
parent
f49bf1a072
commit
74b3df86f3
|
@ -17,7 +17,7 @@
|
|||
|
||||
package org.apache.spark.sql.catalyst.expressions
|
||||
|
||||
import java.time.ZoneId
|
||||
import java.time.{ZoneId, ZoneOffset}
|
||||
import java.util.Locale
|
||||
import java.util.concurrent.TimeUnit._
|
||||
|
||||
|
@ -306,6 +306,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
|
||||
private lazy val dateFormatter = DateFormatter()
|
||||
private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
|
||||
private lazy val timestampWithoutTZFormatter =
|
||||
TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
|
||||
|
||||
private val legacyCastToStr = SQLConf.get.getConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING)
|
||||
// The brackets that are used in casting structs and maps to strings
|
||||
|
@ -319,6 +321,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
|
||||
case TimestampType => buildCast[Long](_,
|
||||
t => UTF8String.fromString(timestampFormatter.format(t)))
|
||||
case TimestampWithoutTZType => buildCast[Long](_,
|
||||
t => UTF8String.fromString(timestampWithoutTZFormatter.format(t)))
|
||||
case ArrayType(et, _) =>
|
||||
buildCast[ArrayData](_, array => {
|
||||
val builder = new UTF8StringBuilder
|
||||
|
@ -1100,6 +1104,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
ctx.addReferenceObj("timestampFormatter", timestampFormatter),
|
||||
timestampFormatter.getClass)
|
||||
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));"""
|
||||
case TimestampWithoutTZType =>
|
||||
val tf = JavaCode.global(
|
||||
ctx.addReferenceObj("timestampWithoutTZFormatter", timestampWithoutTZFormatter),
|
||||
timestampWithoutTZFormatter.getClass)
|
||||
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));"""
|
||||
case CalendarIntervalType =>
|
||||
(c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());"""
|
||||
case ArrayType(et, _) =>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.spark.sql.catalyst.expressions
|
||||
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.time.{DateTimeException, Duration, Period}
|
||||
import java.time.{DateTimeException, Duration, LocalDateTime, Period}
|
||||
import java.time.temporal.ChronoUnit
|
||||
import java.util.{Calendar, TimeZone}
|
||||
|
||||
|
@ -63,10 +63,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
checkNullCast(from, to)
|
||||
}
|
||||
|
||||
(atomicTypes -- Set(
|
||||
// TODO(SPARK-35698): Support casting timestamp without time zone to strings.
|
||||
TimestampWithoutTZType
|
||||
)).foreach(dt => checkNullCast(NullType, dt))
|
||||
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
|
||||
atomicTypes.foreach(dt => checkNullCast(dt, StringType))
|
||||
checkNullCast(StringType, BinaryType)
|
||||
checkNullCast(StringType, BooleanType)
|
||||
|
@ -1711,6 +1708,17 @@ class CastSuite extends CastSuiteBase {
|
|||
}
|
||||
}
|
||||
|
||||
test("SPARK-35698: cast timestamp without time zone to string") {
|
||||
Seq(
|
||||
"0001-01-01 00:00:00", // the fist timestamp of Common Era
|
||||
"1582-10-15 23:59:59", // the cutover date from Julian to Gregorian calendar
|
||||
"1970-01-01 00:00:00", // the epoch timestamp
|
||||
"9999-12-31 23:59:59" // the last supported timestamp according to SQL standard
|
||||
).foreach { s =>
|
||||
checkEvaluation(cast(LocalDateTime.parse(s.replace(" ", "T")), StringType), s)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-32828: cast from a derived user-defined type to a base type") {
|
||||
val v = Literal.create(Row(1), new ExampleSubTypeUDT())
|
||||
checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))
|
||||
|
|
Loading…
Reference in a new issue