[SPARK-13651] Generator outputs are not resolved correctly resulting in run time error
## What changes were proposed in this pull request? ``` Seq(("id1", "value1")).toDF("key", "value").registerTempTable("src") sqlContext.sql("SELECT t1.* FROM src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) t1 AS key, value") ``` Results in following logical plan ``` Project [key#2,value#3] +- Generate explode(HiveGenericUDF#org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap(key1,100,key2,200)), true, false, Some(genoutput), [key#2,value#3] +- SubqueryAlias src +- Project [_1#0 AS key#2,_2#1 AS value#3] +- LocalRelation [_1#0,_2#1], [[id1,value1]] ``` The above query fails with following runtime error. ``` java.lang.ClassCastException: java.lang.Integer cannot be cast to org.apache.spark.unsafe.types.UTF8String at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getUTF8String(rows.scala:46) at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getUTF8String(rows.scala:221) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(generated.java:42) at org.apache.spark.sql.execution.Generate$$anonfun$doExecute$1$$anonfun$apply$9.apply(Generate.scala:98) at org.apache.spark.sql.execution.Generate$$anonfun$doExecute$1$$anonfun$apply$9.apply(Generate.scala:96) at scala.collection.Iterator$$anon$11.next(Iterator.scala:370) at scala.collection.Iterator$$anon$11.next(Iterator.scala:370) at scala.collection.Iterator$class.foreach(Iterator.scala:742) at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) <stack-trace omitted.....> ``` In this case the generated outputs are wrongly resolved from its child (LocalRelation) due to https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala#L537-L548 ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Added unit tests in hive/SQLQuerySuite and AnalysisSuite Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #11497 from dilipbiswal/spark-13651.
This commit is contained in:
parent
03f57a6c2d
commit
d7eac9d795
|
@ -512,8 +512,9 @@ class Analyzer(
|
|||
|
||||
// A special case for Generate, because the output of Generate should not be resolved by
|
||||
// ResolveReferences. Attributes in the output will be resolved by ResolveGenerate.
|
||||
case g @ Generate(generator, join, outer, qualifier, output, child)
|
||||
if child.resolved && !generator.resolved =>
|
||||
case g @ Generate(generator, _, _, _, _, _) if generator.resolved => g
|
||||
|
||||
case g @ Generate(generator, join, outer, qualifier, output, child) =>
|
||||
val newG = resolveExpression(generator, child, throws = true)
|
||||
if (newG.fastEquals(generator)) {
|
||||
g
|
||||
|
|
|
@ -92,6 +92,16 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
checkAnswer(query, Row(1, 1) :: Row(1, 2) :: Row(1, 3) :: Nil)
|
||||
}
|
||||
|
||||
test("SPARK-13651: generator outputs shouldn't be resolved from its child's output") {
|
||||
withTempTable("src") {
|
||||
Seq(("id1", "value1")).toDF("key", "value").registerTempTable("src")
|
||||
val query =
|
||||
sql("SELECT genoutput.* FROM src " +
|
||||
"LATERAL VIEW explode(map('key1', 100, 'key2', 200)) genoutput AS key, value")
|
||||
checkAnswer(query, Row("key1", 100) :: Row("key2", 200) :: Nil)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-6851: Self-joined converted parquet tables") {
|
||||
val orders = Seq(
|
||||
Order(1, "Atlas", "MTB", 234, "2015-01-07", "John D", "Pacifica", "CA", 20151),
|
||||
|
|
Loading…
Reference in a new issue