[SPARK-34984][SQL] ANSI intervals formatting in hive results
### What changes were proposed in this pull request? Extend `HiveResult.toHiveString()` to support new interval types `YearMonthIntervalType` and `DayTimeIntervalType`. ### Why are the changes needed? To fix failures while formatting ANSI intervals as Hive strings. For example: ```sql spark-sql> select timestamp'now' - date'2021-01-01'; 21/04/08 09:42:49 ERROR SparkSQLDriver: Failed in [select timestamp'now' - date'2021-01-01'] scala.MatchError: (PT2337H42M46.649S,DayTimeIntervalType) (of class scala.Tuple2) at org.apache.spark.sql.execution.HiveResult$.toHiveString(HiveResult.scala:97) ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes: ```sql spark-sql> select timestamp'now' - date'2021-01-01'; INTERVAL '97 09:37:52.171' DAY TO SECOND ``` ### How was this patch tested? By running new tests: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "testOnly *HiveResultSuite" ``` Closes #32087 from MaxGekk/ansi-interval-hiveResultString. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
c1c9a318c2
commit
96a3533de8
|
@ -19,10 +19,11 @@ package org.apache.spark.sql.execution
|
|||
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.time.{Instant, LocalDate, ZoneOffset}
|
||||
import java.time.{Duration, Instant, LocalDate, Period, ZoneOffset}
|
||||
|
||||
import org.apache.spark.sql.Row
|
||||
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
|
||||
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
|
||||
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
|
||||
import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
|
@ -117,6 +118,10 @@ object HiveResult {
|
|||
struct.toSeq.zip(fields).map { case (v, t) =>
|
||||
s""""${t.name}":${toHiveString((v, t.dataType), true, formatters)}"""
|
||||
}.mkString("{", ",", "}")
|
||||
case (period: Period, YearMonthIntervalType) =>
|
||||
toYearMonthIntervalString(periodToMonths(period))
|
||||
case (duration: Duration, DayTimeIntervalType) =>
|
||||
toDayTimeIntervalString(durationToMicros(duration))
|
||||
case (other, _: UserDefinedType[_]) => other.toString
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.spark.sql.execution
|
||||
|
||||
import java.time.{Duration, Period}
|
||||
|
||||
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
|
||||
import org.apache.spark.sql.connector.InMemoryTableCatalog
|
||||
import org.apache.spark.sql.execution.HiveResult._
|
||||
|
@ -107,4 +109,20 @@ class HiveResultSuite extends SharedSparkSession {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-34984: year-month interval formatting in hive result") {
|
||||
val df = Seq(Period.ofYears(-10).minusMonths(1)).toDF("i")
|
||||
val plan1 = df.queryExecution.executedPlan
|
||||
assert(hiveResultString(plan1) === Seq("INTERVAL '-10-1' YEAR TO MONTH"))
|
||||
val plan2 = df.selectExpr("array(i)").queryExecution.executedPlan
|
||||
assert(hiveResultString(plan2) === Seq("[INTERVAL '-10-1' YEAR TO MONTH]"))
|
||||
}
|
||||
|
||||
test("SPARK-34984: day-time interval formatting in hive result") {
|
||||
val df = Seq(Duration.ofDays(5).plusMillis(10)).toDF("i")
|
||||
val plan1 = df.queryExecution.executedPlan
|
||||
assert(hiveResultString(plan1) === Seq("INTERVAL '5 00:00:00.01' DAY TO SECOND"))
|
||||
val plan2 = df.selectExpr("array(i)").queryExecution.executedPlan
|
||||
assert(hiveResultString(plan2) === Seq("[INTERVAL '5 00:00:00.01' DAY TO SECOND]"))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue