[SPARK-21677][SQL] json_tuple throws NullPointException when column is null as string type
## What changes were proposed in this pull request? ``` scala scala> Seq(("""{"Hyukjin": 224, "John": 1225}""")).toDS.selectExpr("json_tuple(value, trim(null))").show() ... java.lang.NullPointerException at ... ``` Currently the `null` field name will throw NullPointException. As a given field name null can't be matched with any field names in json, we just output null as its column value. This PR achieves it by returning a very unlikely column name `__NullFieldName` in evaluation of the field names. ## How was this patch tested? Added unit test. Author: Jen-Ming Chung <jenmingisme@gmail.com> Closes #18930 from jmchung/SPARK-21677.
This commit is contained in:
parent
bfdc361ede
commit
7ab951885f
|
@ -362,9 +362,9 @@ case class JsonTuple(children: Seq[Expression])
|
|||
@transient private lazy val fieldExpressions: Seq[Expression] = children.tail
|
||||
|
||||
// eagerly evaluate any foldable the field names
|
||||
@transient private lazy val foldableFieldNames: IndexedSeq[String] = {
|
||||
@transient private lazy val foldableFieldNames: IndexedSeq[Option[String]] = {
|
||||
fieldExpressions.map {
|
||||
case expr if expr.foldable => expr.eval().asInstanceOf[UTF8String].toString
|
||||
case expr if expr.foldable => Option(expr.eval()).map(_.asInstanceOf[UTF8String].toString)
|
||||
case _ => null
|
||||
}.toIndexedSeq
|
||||
}
|
||||
|
@ -417,7 +417,7 @@ case class JsonTuple(children: Seq[Expression])
|
|||
val fieldNames = if (constantFields == fieldExpressions.length) {
|
||||
// typically the user will provide the field names as foldable expressions
|
||||
// so we can use the cached copy
|
||||
foldableFieldNames
|
||||
foldableFieldNames.map(_.orNull)
|
||||
} else if (constantFields == 0) {
|
||||
// none are foldable so all field names need to be evaluated from the input row
|
||||
fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String].toString)
|
||||
|
@ -426,7 +426,7 @@ case class JsonTuple(children: Seq[Expression])
|
|||
// prefer the cached copy when available
|
||||
foldableFieldNames.zip(fieldExpressions).map {
|
||||
case (null, expr) => expr.eval(input).asInstanceOf[UTF8String].toString
|
||||
case (fieldName, _) => fieldName
|
||||
case (fieldName, _) => fieldName.orNull
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -363,6 +363,16 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
InternalRow(UTF8String.fromString("b\nc")))
|
||||
}
|
||||
|
||||
test("SPARK-21677: json_tuple throws NullPointException when column is null as string type") {
|
||||
checkJsonTuple(
|
||||
JsonTuple(Literal("""{"f1": 1, "f2": 2}""") ::
|
||||
NonFoldableLiteral("f1") ::
|
||||
NonFoldableLiteral("cast(NULL AS STRING)") ::
|
||||
NonFoldableLiteral("f2") ::
|
||||
Nil),
|
||||
InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("2")))
|
||||
}
|
||||
|
||||
val gmtId = Option(DateTimeUtils.TimeZoneGMT.getID)
|
||||
|
||||
test("from_json") {
|
||||
|
|
|
@ -20,3 +20,9 @@ select from_json('{"a":1}', 'a InvalidType');
|
|||
select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'));
|
||||
select from_json('{"a":1}', 'a INT', map('mode', 1));
|
||||
select from_json();
|
||||
-- json_tuple
|
||||
SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a');
|
||||
CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a');
|
||||
SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable;
|
||||
-- Clean up
|
||||
DROP VIEW IF EXISTS jsonTable;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 17
|
||||
-- Number of queries: 21
|
||||
|
||||
|
||||
-- !query 0
|
||||
|
@ -178,3 +178,35 @@ struct<>
|
|||
-- !query 16 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
Invalid number of arguments for function from_json; line 1 pos 7
|
||||
|
||||
|
||||
-- !query 17
|
||||
SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a')
|
||||
-- !query 17 schema
|
||||
struct<c0:string,c1:string,c2:string,c3:string>
|
||||
-- !query 17 output
|
||||
NULL 2 NULL 1
|
||||
|
||||
|
||||
-- !query 18
|
||||
CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a')
|
||||
-- !query 18 schema
|
||||
struct<>
|
||||
-- !query 18 output
|
||||
|
||||
|
||||
|
||||
-- !query 19
|
||||
SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable
|
||||
-- !query 19 schema
|
||||
struct<c0:string,c1:string,c2:string>
|
||||
-- !query 19 output
|
||||
2 NULL 1
|
||||
|
||||
|
||||
-- !query 20
|
||||
DROP VIEW IF EXISTS jsonTable
|
||||
-- !query 20 schema
|
||||
struct<>
|
||||
-- !query 20 output
|
||||
|
||||
|
|
Loading…
Reference in a new issue