[SPARK-17989][SQL] Check ascendingOrder type in sort_array function rather than throwing ClassCastException
## What changes were proposed in this pull request? This PR proposes to check the second argument, `ascendingOrder` rather than throwing `ClassCastException` exception message. ```sql select sort_array(array('b', 'd'), '1'); ``` **Before** ``` 16/10/19 13:16:08 ERROR SparkSQLDriver: Failed in [select sort_array(array('b', 'd'), '1')] java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Boolean at scala.runtime.BoxesRunTime.unboxToBoolean(BoxesRunTime.java:85) at org.apache.spark.sql.catalyst.expressions.SortArray.nullSafeEval(collectionOperations.scala:185) at org.apache.spark.sql.catalyst.expressions.BinaryExpression.eval(Expression.scala:416) at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:50) at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:43) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:291) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:297) ``` **After** ``` Error in query: cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7; ``` ## How was this patch tested? Unit test in `DataFrameFunctionsSuite`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #15532 from HyukjinKwon/SPARK-17989.
This commit is contained in:
parent
444c2d22e3
commit
4b2011ec9d
|
@ -124,7 +124,13 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
|
|||
|
||||
override def checkInputDataTypes(): TypeCheckResult = base.dataType match {
|
||||
case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
|
||||
TypeCheckResult.TypeCheckSuccess
|
||||
ascendingOrder match {
|
||||
case Literal(_: Boolean, BooleanType) =>
|
||||
TypeCheckResult.TypeCheckSuccess
|
||||
case _ =>
|
||||
TypeCheckResult.TypeCheckFailure(
|
||||
"Sort order in second argument requires a boolean literal.")
|
||||
}
|
||||
case ArrayType(dt, _) =>
|
||||
TypeCheckResult.TypeCheckFailure(
|
||||
s"$prettyName does not support sorting array of type ${dt.simpleString}")
|
||||
|
|
|
@ -71,6 +71,12 @@ select
|
|||
sort_array(timestamp_array)
|
||||
from primitive_arrays;
|
||||
|
||||
-- sort_array with an invalid string literal for the argument of sort order.
|
||||
select sort_array(array('b', 'd'), '1');
|
||||
|
||||
-- sort_array with an invalid null literal casted as boolean for the argument of sort order.
|
||||
select sort_array(array('b', 'd'), cast(NULL as boolean));
|
||||
|
||||
-- size
|
||||
select
|
||||
size(boolean_array),
|
||||
|
|
|
@ -124,8 +124,23 @@ struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array,
|
|||
-- !query 8 output
|
||||
[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
|
||||
|
||||
|
||||
-- !query 9
|
||||
select sort_array(array('b', 'd'), '1')
|
||||
-- !query 9 schema
|
||||
struct<>
|
||||
-- !query 9 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
|
||||
|
||||
-- !query 10
|
||||
select sort_array(array('b', 'd'), cast(NULL as boolean))
|
||||
-- !query 10 schema
|
||||
struct<>
|
||||
-- !query 10 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
|
||||
|
||||
-- !query 11
|
||||
select
|
||||
size(boolean_array),
|
||||
size(tinyint_array),
|
||||
|
@ -138,7 +153,7 @@ select
|
|||
size(date_array),
|
||||
size(timestamp_array)
|
||||
from primitive_arrays
|
||||
-- !query 9 schema
|
||||
-- !query 11 schema
|
||||
struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
|
||||
-- !query 9 output
|
||||
-- !query 11 output
|
||||
1 2 2 2 2 2 2 2 2 2
|
||||
|
|
Loading…
Reference in a new issue