[SPARK-13947][SQL] The error message from using an invalid column reference is not clear

## What changes were proposed in this pull request?

 Rewritten error message for clarity. Added extra information in case of attribute name collision, hinting the user to double-check referencing two different tables

## How was this patch tested?

No functional changes, only final message has changed. It has been tested manually against the situation proposed in the JIRA ticket. Automated tests in repository pass.

This PR is original work from me and I license this work to the Spark project

Author: Ruben Berenguel Montoro <ruben@mostlymaths.net>
Author: Ruben Berenguel Montoro <ruben@dreamattic.com>
Author: Ruben Berenguel <ruben@mostlymaths.net>

Closes #17100 from rberenguel/SPARK-13947-error-message.
This commit is contained in:
Ruben Berenguel Montoro 2017-10-24 23:02:11 -07:00 committed by gatorsmile
parent 524abb996a
commit 427359f077
3 changed files with 33 additions and 11 deletions

View file

@ -272,10 +272,23 @@ trait CheckAnalysis extends PredicateHelper {
case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
val missingAttributes = o.missingInput.mkString(",")
val input = o.inputSet.mkString(",")
val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " +
s"from $input in operator ${operator.simpleString}."
failAnalysis(
s"resolved attribute(s) $missingAttributes missing from $input " +
s"in operator ${operator.simpleString}")
val resolver = plan.conf.resolver
val attrsWithSameName = o.missingInput.filter { missing =>
o.inputSet.exists(input => resolver(missing.name, input.name))
}
val msg = if (attrsWithSameName.nonEmpty) {
val sameNames = attrsWithSameName.map(_.name).mkString(",")
s"$msgForMissingAttributes Attribute(s) with the same name appear in the " +
s"operation: $sameNames. Please check if the right attribute(s) are used."
} else {
msgForMissingAttributes
}
failAnalysis(msg)
case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
failAnalysis(

View file

@ -408,16 +408,25 @@ class AnalysisErrorSuite extends AnalysisTest {
// CheckAnalysis should throw AnalysisException when Aggregate contains missing attribute(s)
// Since we manually construct the logical plan at here and Sum only accept
// LongType, DoubleType, and DecimalType. We use LongType as the type of a.
val plan =
Aggregate(
Nil,
Alias(sum(AttributeReference("a", LongType)(exprId = ExprId(1))), "b")() :: Nil,
LocalRelation(
AttributeReference("a", LongType)(exprId = ExprId(2))))
val attrA = AttributeReference("a", LongType)(exprId = ExprId(1))
val otherA = AttributeReference("a", LongType)(exprId = ExprId(2))
val attrC = AttributeReference("c", LongType)(exprId = ExprId(3))
val aliases = Alias(sum(attrA), "b")() :: Alias(sum(attrC), "d")() :: Nil
val plan = Aggregate(
Nil,
aliases,
LocalRelation(otherA))
assert(plan.resolved)
assertAnalysisError(plan, "resolved attribute(s) a#1L missing from a#2L" :: Nil)
val resolved = s"${attrA.toString},${attrC.toString}"
val errorMsg = s"Resolved attribute(s) $resolved missing from ${otherA.toString} " +
s"in operator !Aggregate [${aliases.mkString(", ")}]. " +
s"Attribute(s) with the same name appear in the operation: a. " +
"Please check if the right attribute(s) are used."
assertAnalysisError(plan, errorMsg :: Nil)
}
test("error test for self-join") {

View file

@ -63,7 +63,7 @@ WHERE t1a IN (SELECT min(t2a)
struct<>
-- !query 4 output
org.apache.spark.sql.AnalysisException
resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]);
Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).;
-- !query 5