[SPARK-4244] [SQL] Support Hive Generic UDFs with constant object inspector parameters
Query `SELECT named_struct(lower("AA"), "12", lower("Bb"), "13") FROM src LIMIT 1` will throw exception, some of the Hive Generic UDF/UDAF requires the input object inspector is `ConstantObjectInspector`, however, we won't get that before the expression optimization executed. (Constant Folding). This PR is a work around to fix this. (As ideally, the `output` of LogicalPlan should be identical before and after Optimization). Author: Cheng Hao <hao.cheng@intel.com> Closes #3109 from chenghao-intel/optimized and squashes the following commits: 487ff79 [Cheng Hao] rebase to the latest master & update the unittest
This commit is contained in:
parent
d39f2e9c68
commit
84d79ee9ec
|
@ -326,6 +326,8 @@ private[hive] trait HiveInspectors {
|
|||
})
|
||||
ObjectInspectorFactory.getStandardConstantMapObjectInspector(keyOI, valueOI, map)
|
||||
}
|
||||
case Literal(_, dt) => sys.error(s"Hive doesn't support the constant type [$dt].")
|
||||
case _ if expr.foldable => toInspector(Literal(expr.eval(), expr.dataType))
|
||||
case _ => toInspector(expr.dataType)
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper
|
|||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ConstantObjectInspector}
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory
|
||||
import org.apache.hadoop.hive.ql.exec.{UDF, UDAF}
|
||||
|
@ -108,9 +108,7 @@ private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[
|
|||
udfType != null && udfType.deterministic()
|
||||
}
|
||||
|
||||
override def foldable = {
|
||||
isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable)
|
||||
}
|
||||
override def foldable = isUDFDeterministic && children.forall(_.foldable)
|
||||
|
||||
// Create parameter converters
|
||||
@transient
|
||||
|
@ -154,7 +152,8 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq
|
|||
protected lazy val argumentInspectors = children.map(toInspector)
|
||||
|
||||
@transient
|
||||
protected lazy val returnInspector = function.initialize(argumentInspectors.toArray)
|
||||
protected lazy val returnInspector =
|
||||
function.initializeAndFoldConstants(argumentInspectors.toArray)
|
||||
|
||||
@transient
|
||||
protected lazy val isUDFDeterministic = {
|
||||
|
@ -162,9 +161,8 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq
|
|||
(udfType != null && udfType.deterministic())
|
||||
}
|
||||
|
||||
override def foldable = {
|
||||
isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable)
|
||||
}
|
||||
override def foldable =
|
||||
isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector]
|
||||
|
||||
@transient
|
||||
protected lazy val deferedObjects =
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"aa":"10","aaaaaa":"11","aaaaaa":"12","bb12":"13","s14s14":"14"}
|
|
@ -56,6 +56,14 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|
|||
Locale.setDefault(originalLocale)
|
||||
}
|
||||
|
||||
createQueryTest("constant object inspector for generic udf",
|
||||
"""SELECT named_struct(
|
||||
lower("AA"), "10",
|
||||
repeat(lower("AA"), 3), "11",
|
||||
lower(repeat("AA", 3)), "12",
|
||||
printf("Bb%d", 12), "13",
|
||||
repeat(printf("s%d", 14), 2), "14") FROM src LIMIT 1""")
|
||||
|
||||
createQueryTest("NaN to Decimal",
|
||||
"SELECT CAST(CAST('NaN' AS DOUBLE) AS DECIMAL(1,1)) FROM src LIMIT 1")
|
||||
|
||||
|
|
Loading…
Reference in a new issue