[SPARK-24385][SQL] Resolve self-join condition ambiguity for EqualNullSafe

## What changes were proposed in this pull request? In Dataset.join we have a small hack for resolving ambiguity in the column name for self-joins. The current code supports only `EqualTo`. The PR extends the fix to `EqualNullSafe`. Credit for this PR should be given to daniel-shields. ## How was this patch tested? added UT Author: Marco Gaido <marcogaido91@gmail.com> Closes #21605 from mgaido91/SPARK-24385_2.
2018-07-03 12:20:03 +08:00 · 2018-07-03 12:20:03 +08:00 · a7c8f0c8cb
parent 85fe1297e3
commit a7c8f0c8cb
2 changed files with 13 additions and 0 deletions
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@ -1016,6 +1016,11 @@ class Dataset[T] private[sql](
        catalyst.expressions.EqualTo(
          withPlan(plan.left).resolve(a.name),
          withPlan(plan.right).resolve(b.name))
+      case catalyst.expressions.EqualNullSafe(a: AttributeReference, b: AttributeReference)
+        if a.sameRef(b) =>
+        catalyst.expressions.EqualNullSafe(
+          withPlan(plan.left).resolve(a.name),
+          withPlan(plan.right).resolve(b.name))
    }}

    withPlan {
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@ -287,4 +287,12 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
      dfOne.join(dfTwo, $"a" === $"b", "left").queryExecution.optimizedPlan
    }
  }
+
+  test("SPARK-24385: Resolve ambiguity in self-joins with EqualNullSafe") {
+    withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
+      val df = spark.range(2)
+      // this throws an exception before the fix
+      df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
+    }
+  }
 }