From 647422685292cd1a46766afa9b07b6fcfc181bbd Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 5 Jul 2021 10:35:50 +0300 Subject: [PATCH] [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals ### What changes were proposed in this pull request? This PR fixes two issues. One is that `to_json` doesn't support `map` types where value types are `year-month` interval types like: ``` spark-sql> select to_json(map('a', interval '1-2' year to month)); 21/07/02 11:38:15 ERROR SparkSQLDriver: Failed in [select to_json(map('a', interval '1-2' year to month))] java.lang.RuntimeException: Failed to convert value 14 (class of class java.lang.Integer) with the type of YearMonthIntervalType(0,1) to JSON. ``` The other issue is that even if the issue of `to_json` is resolved, `from_json` doesn't support to convert `year-month` interval string to JSON. So the result of following query will be `null`. ``` spark-sql> select from_json(to_json(map('a', interval '1-2' year to month)), 'a interval year to month'); {"a":null} ``` ### Why are the changes needed? There should be no reason why year-month intervals cannot used as map value types. `CalendarIntervalTypes` can do it. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New tests. Closes #33181 from sarutak/map-json-yminterval. Authored-by: Kousuke Saruta Signed-off-by: Max Gekk --- .../sql/catalyst/json/JacksonGenerator.scala | 9 +++++++ .../sql/catalyst/json/JacksonParser.scala | 7 +++++ .../apache/spark/sql/JsonFunctionsSuite.scala | 26 +++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index 2567438313..9777d562c5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -147,6 +147,15 @@ private[sql] class JacksonGenerator( (row: SpecializedGetters, ordinal: Int) => gen.writeString(row.getInterval(ordinal).toString) + case YearMonthIntervalType(start, end) => + (row: SpecializedGetters, ordinal: Int) => + val ymString = IntervalUtils.toYearMonthIntervalString( + row.getInt(ordinal), + IntervalStringStyles.ANSI_STYLE, + start, + end) + gen.writeString(ymString) + case BinaryType => (row: SpecializedGetters, ordinal: Int) => gen.writeBinary(row.getBinary(ordinal)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 27e1411cf3..2aa735d8eb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -295,6 +295,13 @@ class JacksonParser( IntervalUtils.safeStringToInterval(UTF8String.fromString(parser.getText)) } + case ym: YearMonthIntervalType => (parser: JsonParser) => + parseJsonToken[Integer](parser, dataType) { + case VALUE_STRING => + val expr = Cast(Literal(parser.getText), ym) + Integer.valueOf(expr.eval(EmptyRow).asInstanceOf[Int]) + } + case st: StructType => val fieldConverters = st.map(_.dataType).map(makeConverter).toArray (parser: JsonParser) => parseJsonToken[InternalRow](parser, dataType) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 5485cc1b83..c2bea8cbf1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import java.text.SimpleDateFormat +import java.time.Period import java.util.Locale import collection.JavaConverters._ @@ -27,6 +28,7 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR} class JsonFunctionsSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -821,4 +823,28 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { } } } + + test("SPARK-35982: from_json/to_json for map types where value types are year-month intervals") { + val ymDF = Seq(Period.of(1, 2, 0)).toDF + Seq( + (YearMonthIntervalType(), """{"key":"INTERVAL '1-2' YEAR TO MONTH"}""", Period.of(1, 2, 0)), + (YearMonthIntervalType(YEAR), """{"key":"INTERVAL '1' YEAR"}""", Period.of(1, 0, 0)), + (YearMonthIntervalType(MONTH), """{"key":"INTERVAL '14' MONTH"}""", Period.of(1, 2, 0)) + ).foreach { case (toJsonDtype, toJsonExpected, fromJsonExpected) => + val toJsonDF = ymDF.select(to_json(map(lit("key"), $"value" cast toJsonDtype)) as "json") + checkAnswer(toJsonDF, Row(toJsonExpected)) + + DataTypeTestUtils.yearMonthIntervalTypes.foreach { fromJsonDtype => + val fromJsonDF = toJsonDF + .select( + from_json($"json", StructType(StructField("key", fromJsonDtype) :: Nil)) as "value") + .selectExpr("value['key']") + if (toJsonDtype == fromJsonDtype) { + checkAnswer(fromJsonDF, Row(fromJsonExpected)) + } else { + checkAnswer(fromJsonDF, Row(null)) + } + } + } + } }