[SPARK-24385][SQL] Resolve self-join condition ambiguity for EqualNullSafe
## What changes were proposed in this pull request? In Dataset.join we have a small hack for resolving ambiguity in the column name for self-joins. The current code supports only `EqualTo`. The PR extends the fix to `EqualNullSafe`. Credit for this PR should be given to daniel-shields. ## How was this patch tested? added UT Author: Marco Gaido <marcogaido91@gmail.com> Closes #21605 from mgaido91/SPARK-24385_2.
This commit is contained in:
parent
85fe1297e3
commit
a7c8f0c8cb
|
@ -1016,6 +1016,11 @@ class Dataset[T] private[sql](
|
|||
catalyst.expressions.EqualTo(
|
||||
withPlan(plan.left).resolve(a.name),
|
||||
withPlan(plan.right).resolve(b.name))
|
||||
case catalyst.expressions.EqualNullSafe(a: AttributeReference, b: AttributeReference)
|
||||
if a.sameRef(b) =>
|
||||
catalyst.expressions.EqualNullSafe(
|
||||
withPlan(plan.left).resolve(a.name),
|
||||
withPlan(plan.right).resolve(b.name))
|
||||
}}
|
||||
|
||||
withPlan {
|
||||
|
|
|
@ -287,4 +287,12 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
|
|||
dfOne.join(dfTwo, $"a" === $"b", "left").queryExecution.optimizedPlan
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-24385: Resolve ambiguity in self-joins with EqualNullSafe") {
|
||||
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
|
||||
val df = spark.range(2)
|
||||
// this throws an exception before the fix
|
||||
df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue