[SPARK-33879][SQL] Char Varchar values fails w/ match error as partition columns
### What changes were proposed in this pull request? ```sql spark-sql> select * from t10 where c0='abcd'; 20/12/22 15:43:38 ERROR SparkSQLDriver: Failed in [select * from t10 where c0='abcd'] scala.MatchError: CharType(10) (of class org.apache.spark.sql.types.CharType) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:815) at org.apache.spark.sql.catalyst.expressions.CastBase.cast$lzycompute(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:844) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.$anonfun$toRow$2(interface.scala:164) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at org.apache.spark.sql.types.StructType.map(StructType.scala:102) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.toRow(interface.scala:158) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3(ExternalCatalogUtils.scala:157) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3$adapted(ExternalCatalogUtils.scala:156) ``` c0 is a partition column, it fails in the partition pruning rule In this PR, we relace char/varchar w/ string type before the CAST happends ### Why are the changes needed? bugfix, see the case above ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? yes, new tests Closes #30887 from yaooqinn/SPARK-33879. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
e853f068f6
commit
2287f56a3e
|
@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
|
|||
import org.apache.spark.sql.catalyst.analysis.Resolver
|
||||
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
|
||||
import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate}
|
||||
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
|
||||
|
||||
object ExternalCatalogUtils {
|
||||
// This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
|
||||
|
@ -135,7 +136,8 @@ object ExternalCatalogUtils {
|
|||
if (predicates.isEmpty) {
|
||||
inputPartitions
|
||||
} else {
|
||||
val partitionSchema = catalogTable.partitionSchema
|
||||
val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
|
||||
catalogTable.partitionSchema)
|
||||
val partitionColumnNames = catalogTable.partitionColumnNames.toSet
|
||||
|
||||
val nonPartitionPruningPredicates = predicates.filterNot {
|
||||
|
|
|
@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
|
|||
}
|
||||
}
|
||||
|
||||
test("char type comparison: partition pruning") {
|
||||
withTable("t") {
|
||||
sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)")
|
||||
sql("INSERT INTO t VALUES (1, 'a', 'a')")
|
||||
Seq(("c1 = 'a'", true),
|
||||
("'a' = c1", true),
|
||||
("c1 = 'a '", true),
|
||||
("c1 > 'a'", false),
|
||||
("c1 IN ('a', 'b')", true),
|
||||
("c2 = 'a '", false),
|
||||
("c2 = 'a'", true),
|
||||
("c2 IN ('a', 'b')", true)).foreach { case (con, res) =>
|
||||
val df = spark.table("t")
|
||||
withClue(con) {
|
||||
checkAnswer(df.where(con), df.where(res.toString))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("char type comparison: join") {
|
||||
withTable("t1", "t2") {
|
||||
sql(s"CREATE TABLE t1(c CHAR(2)) USING $format")
|
||||
|
|
Loading…
Reference in a new issue