[SPARK-34984][SQL] ANSI intervals formatting in hive results

### What changes were proposed in this pull request?
Extend `HiveResult.toHiveString()` to support new interval types `YearMonthIntervalType` and `DayTimeIntervalType`.

### Why are the changes needed?
To fix failures while formatting ANSI intervals as Hive strings. For example:
```sql
spark-sql> select timestamp'now' - date'2021-01-01';
21/04/08 09:42:49 ERROR SparkSQLDriver: Failed in [select timestamp'now' - date'2021-01-01']
scala.MatchError: (PT2337H42M46.649S,DayTimeIntervalType) (of class scala.Tuple2)
	at org.apache.spark.sql.execution.HiveResult$.toHiveString(HiveResult.scala:97)
```

### Does this PR introduce _any_ user-facing change?
Yes. After the changes:
```sql
spark-sql> select timestamp'now' - date'2021-01-01';
INTERVAL '97 09:37:52.171' DAY TO SECOND
```

### How was this patch tested?
By running new tests:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "testOnly *HiveResultSuite"
```

Closes #32087 from MaxGekk/ansi-interval-hiveResultString.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Max Gekk 2021-04-08 16:18:15 +00:00 committed by Wenchen Fan
parent c1c9a318c2
commit 96a3533de8
2 changed files with 24 additions and 1 deletions

View file

@ -19,10 +19,11 @@ package org.apache.spark.sql.execution
import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, ZoneOffset}
import java.time.{Duration, Instant, LocalDate, Period, ZoneOffset}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
import org.apache.spark.sql.internal.SQLConf
@ -117,6 +118,10 @@ object HiveResult {
struct.toSeq.zip(fields).map { case (v, t) =>
s""""${t.name}":${toHiveString((v, t.dataType), true, formatters)}"""
}.mkString("{", ",", "}")
case (period: Period, YearMonthIntervalType) =>
toYearMonthIntervalString(periodToMonths(period))
case (duration: Duration, DayTimeIntervalType) =>
toDayTimeIntervalString(durationToMicros(duration))
case (other, _: UserDefinedType[_]) => other.toString
}
}

View file

@ -17,6 +17,8 @@
package org.apache.spark.sql.execution
import java.time.{Duration, Period}
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
import org.apache.spark.sql.connector.InMemoryTableCatalog
import org.apache.spark.sql.execution.HiveResult._
@ -107,4 +109,20 @@ class HiveResultSuite extends SharedSparkSession {
}
}
}
test("SPARK-34984: year-month interval formatting in hive result") {
val df = Seq(Period.ofYears(-10).minusMonths(1)).toDF("i")
val plan1 = df.queryExecution.executedPlan
assert(hiveResultString(plan1) === Seq("INTERVAL '-10-1' YEAR TO MONTH"))
val plan2 = df.selectExpr("array(i)").queryExecution.executedPlan
assert(hiveResultString(plan2) === Seq("[INTERVAL '-10-1' YEAR TO MONTH]"))
}
test("SPARK-34984: day-time interval formatting in hive result") {
val df = Seq(Duration.ofDays(5).plusMillis(10)).toDF("i")
val plan1 = df.queryExecution.executedPlan
assert(hiveResultString(plan1) === Seq("INTERVAL '5 00:00:00.01' DAY TO SECOND"))
val plan2 = df.selectExpr("array(i)").queryExecution.executedPlan
assert(hiveResultString(plan2) === Seq("[INTERVAL '5 00:00:00.01' DAY TO SECOND]"))
}
}