From 2553d53dc85fdf1127446941e2bc749e721c1b57 Mon Sep 17 00:00:00 2001 From: kozakana Date: Sat, 26 Dec 2020 16:30:50 +0900 Subject: [PATCH] [SPARK-33897][SQL] Can't set option 'cross' in join method ### What changes were proposed in this pull request? [The PySpark documentation](https://spark.apache.org/docs/3.0.1/api/python/pyspark.sql.html#pyspark.sql.DataFrame.join) says "Must be one of: inner, cross, outer, full, fullouter, full_outer, left, leftouter, left_outer, right, rightouter, right_outer, semi, leftsemi, left_semi, anti, leftanti and left_anti." However, I get the following error when I set the cross option. ``` scala> val df1 = spark.createDataFrame(Seq((1,"a"),(2,"b"))) df1: org.apache.spark.sql.DataFrame = [_1: int, _2: string] scala> val df2 = spark.createDataFrame(Seq((1,"A"),(2,"B"), (3, "C"))) df2: org.apache.spark.sql.DataFrame = [_1: int, _2: string] scala> df1.join(right = df2, usingColumns = Seq("_1"), joinType = "cross").show() java.lang.IllegalArgumentException: requirement failed: Unsupported using join type Cross at scala.Predef$.require(Predef.scala:281) at org.apache.spark.sql.catalyst.plans.UsingJoin.(joinTypes.scala:106) at org.apache.spark.sql.Dataset.join(Dataset.scala:1025) ... 53 elided ``` ### Why are the changes needed? The documentation says cross option can be set, but when I try to set it, I get an java.lang.IllegalArgumentException. ### Does this PR introduce _any_ user-facing change? Accepting this PR fix will behave the same as the documentation. ### How was this patch tested? There is already a test for [JoinTypes](https://github.com/apache/spark/blob/1b9fd67904671ea08526bfb7a97d694815d47665/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala), but I can't find a test for the join option itself. Closes #30803 from kozakana/allow_cross_option. Authored-by: kozakana Signed-off-by: HyukjinKwon --- .../apache/spark/sql/catalyst/plans/joinTypes.scala | 2 +- .../org/apache/spark/sql/DataFrameJoinSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala index feea1d2177..da3cfb4c9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala @@ -102,7 +102,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType { } case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType { - require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe), + require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe), "Unsupported using join type " + tpe) override def sql: String = "USING " + tpe.sql } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index c317f562c6..1513c2e90e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -122,6 +122,16 @@ class DataFrameJoinSuite extends QueryTest df2.crossJoin(df1), Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") :: Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil) + + checkAnswer( + df1.join(df2, Nil, "cross"), + Row(1, "1", 2, "2") :: Row(1, "1", 4, "4") :: + Row(3, "3", 2, "2") :: Row(3, "3", 4, "4") :: Nil) + + checkAnswer( + df2.join(df1, Nil, "cross"), + Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") :: + Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil) } test("broadcast join hint using broadcast function") {