[SPARK-25988][SQL] Keep names unchanged when deduplicating the column names in Analyzer
## What changes were proposed in this pull request? When the queries do not use the column names with the same case, users might hit various errors. Below is a typical test failure they can hit. ``` Expected only partition pruning predicates: ArrayBuffer(isnotnull(tdate#237), (cast(tdate#237 as string) >= 2017-08-15)); org.apache.spark.sql.AnalysisException: Expected only partition pruning predicates: ArrayBuffer(isnotnull(tdate#237), (cast(tdate#237 as string) >= 2017-08-15)); at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.prunePartitionsByFilter(ExternalCatalogUtils.scala:146) at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.listPartitionsByFilter(InMemoryCatalog.scala:560) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.listPartitionsByFilter(SessionCatalog.scala:925) ``` ## How was this patch tested? Added two test cases. Closes #22990 from gatorsmile/fix1283. Authored-by: gatorsmile <gatorsmile@gmail.com> Signed-off-by: gatorsmile <gatorsmile@gmail.com>
This commit is contained in:
parent
25f506e2ad
commit
657fd00b52
|
@ -824,7 +824,8 @@ class Analyzer(
|
|||
}
|
||||
|
||||
private def dedupAttr(attr: Attribute, attrMap: AttributeMap[Attribute]): Attribute = {
|
||||
attrMap.get(attr).getOrElse(attr).withQualifier(attr.qualifier)
|
||||
val exprId = attrMap.getOrElse(attr, attr).exprId
|
||||
attr.withExprId(exprId)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -112,6 +112,7 @@ case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Un
|
|||
override def withQualifier(newQualifier: Seq[String]): UnresolvedAttribute = this
|
||||
override def withName(newName: String): UnresolvedAttribute = UnresolvedAttribute.quoted(newName)
|
||||
override def withMetadata(newMetadata: Metadata): Attribute = this
|
||||
override def withExprId(newExprId: ExprId): UnresolvedAttribute = this
|
||||
|
||||
override def toString: String = s"'$name"
|
||||
|
||||
|
|
|
@ -115,6 +115,7 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn
|
|||
def withQualifier(newQualifier: Seq[String]): Attribute
|
||||
def withName(newName: String): Attribute
|
||||
def withMetadata(newMetadata: Metadata): Attribute
|
||||
def withExprId(newExprId: ExprId): Attribute
|
||||
|
||||
override def toAttribute: Attribute = this
|
||||
def newInstance(): Attribute
|
||||
|
@ -299,7 +300,7 @@ case class AttributeReference(
|
|||
}
|
||||
}
|
||||
|
||||
def withExprId(newExprId: ExprId): AttributeReference = {
|
||||
override def withExprId(newExprId: ExprId): AttributeReference = {
|
||||
if (exprId == newExprId) {
|
||||
this
|
||||
} else {
|
||||
|
@ -362,6 +363,8 @@ case class PrettyAttribute(
|
|||
throw new UnsupportedOperationException
|
||||
override def qualifier: Seq[String] = throw new UnsupportedOperationException
|
||||
override def exprId: ExprId = throw new UnsupportedOperationException
|
||||
override def withExprId(newExprId: ExprId): Attribute =
|
||||
throw new UnsupportedOperationException
|
||||
override def nullable: Boolean = true
|
||||
}
|
||||
|
||||
|
|
|
@ -2856,6 +2856,59 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
|
|||
checkAnswer(sql("select 26393499451 / (1e6 * 1000)"), Row(BigDecimal("26.3934994510000")))
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-25988: self join with aliases on partitioned tables #1") {
|
||||
withTempView("tmpView1", "tmpView2") {
|
||||
withTable("tab1", "tab2") {
|
||||
sql(
|
||||
"""
|
||||
|CREATE TABLE `tab1` (`col1` INT, `TDATE` DATE)
|
||||
|USING CSV
|
||||
|PARTITIONED BY (TDATE)
|
||||
""".stripMargin)
|
||||
spark.table("tab1").where("TDATE >= '2017-08-15'").createOrReplaceTempView("tmpView1")
|
||||
sql("CREATE TABLE `tab2` (`TDATE` DATE) USING parquet")
|
||||
sql(
|
||||
"""
|
||||
|CREATE OR REPLACE TEMPORARY VIEW tmpView2 AS
|
||||
|SELECT N.tdate, col1 AS aliasCol1
|
||||
|FROM tmpView1 N
|
||||
|JOIN tab2 Z
|
||||
|ON N.tdate = Z.tdate
|
||||
""".stripMargin)
|
||||
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
|
||||
sql("SELECT * FROM tmpView2 x JOIN tmpView2 y ON x.tdate = y.tdate").collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-25988: self join with aliases on partitioned tables #2") {
|
||||
withTempView("tmp") {
|
||||
withTable("tab1", "tab2") {
|
||||
sql(
|
||||
"""
|
||||
|CREATE TABLE `tab1` (`EX` STRING, `TDATE` DATE)
|
||||
|USING parquet
|
||||
|PARTITIONED BY (tdate)
|
||||
""".stripMargin)
|
||||
sql("CREATE TABLE `tab2` (`TDATE` DATE) USING parquet")
|
||||
sql(
|
||||
"""
|
||||
|CREATE OR REPLACE TEMPORARY VIEW TMP as
|
||||
|SELECT N.tdate, EX AS new_ex
|
||||
|FROM tab1 N
|
||||
|JOIN tab2 Z
|
||||
|ON N.tdate = Z.tdate
|
||||
""".stripMargin)
|
||||
sql(
|
||||
"""
|
||||
|SELECT * FROM TMP x JOIN TMP y
|
||||
|ON x.tdate = y.tdate
|
||||
""".stripMargin).queryExecution.executedPlan
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case class Foo(bar: Option[String])
|
||||
|
|
Loading…
Reference in a new issue