[SPARK-33879][SQL] Char Varchar values fails w/ match error as partition columns

### What changes were proposed in this pull request?

```sql
spark-sql> select * from t10 where c0='abcd';
20/12/22 15:43:38 ERROR SparkSQLDriver: Failed in [select * from t10 where c0='abcd']
scala.MatchError: CharType(10) (of class org.apache.spark.sql.types.CharType)
	at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:815)
	at org.apache.spark.sql.catalyst.expressions.CastBase.cast$lzycompute(Cast.scala:842)
	at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:842)
	at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:844)
	at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476)
	at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.$anonfun$toRow$2(interface.scala:164)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
	at scala.collection.Iterator.foreach(Iterator.scala:941)
	at scala.collection.Iterator.foreach$(Iterator.scala:941)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102)
	at scala.collection.TraversableLike.map(TraversableLike.scala:238)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
	at org.apache.spark.sql.types.StructType.map(StructType.scala:102)
	at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.toRow(interface.scala:158)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3(ExternalCatalogUtils.scala:157)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3$adapted(ExternalCatalogUtils.scala:156)
```
c0 is a partition column, it fails in the partition pruning rule

In this PR, we relace char/varchar w/ string type before the CAST happends

### Why are the changes needed?

bugfix, see the case above

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

yes, new tests

Closes #30887 from yaooqinn/SPARK-33879.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Kent Yao 2020-12-23 16:14:27 +09:00 committed by HyukjinKwon
parent e853f068f6
commit 2287f56a3e
2 changed files with 23 additions and 1 deletions

View file

@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate}
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
object ExternalCatalogUtils {
// This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
@ -135,7 +136,8 @@ object ExternalCatalogUtils {
if (predicates.isEmpty) {
inputPartitions
} else {
val partitionSchema = catalogTable.partitionSchema
val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
catalogTable.partitionSchema)
val partitionColumnNames = catalogTable.partitionColumnNames.toSet
val nonPartitionPruningPredicates = predicates.filterNot {

View file

@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
}
}
test("char type comparison: partition pruning") {
withTable("t") {
sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)")
sql("INSERT INTO t VALUES (1, 'a', 'a')")
Seq(("c1 = 'a'", true),
("'a' = c1", true),
("c1 = 'a '", true),
("c1 > 'a'", false),
("c1 IN ('a', 'b')", true),
("c2 = 'a '", false),
("c2 = 'a'", true),
("c2 IN ('a', 'b')", true)).foreach { case (con, res) =>
val df = spark.table("t")
withClue(con) {
checkAnswer(df.where(con), df.where(res.toString))
}
}
}
}
test("char type comparison: join") {
withTable("t1", "t2") {
sql(s"CREATE TABLE t1(c CHAR(2)) USING $format")