[SPARK-33897][SQL] Can't set option 'cross' in join method
### What changes were proposed in this pull request?
[The PySpark documentation](https://spark.apache.org/docs/3.0.1/api/python/pyspark.sql.html#pyspark.sql.DataFrame.join) says "Must be one of: inner, cross, outer, full, fullouter, full_outer, left, leftouter, left_outer, right, rightouter, right_outer, semi, leftsemi, left_semi, anti, leftanti and left_anti."
However, I get the following error when I set the cross option.
```
scala> val df1 = spark.createDataFrame(Seq((1,"a"),(2,"b")))
df1: org.apache.spark.sql.DataFrame = [_1: int, _2: string]
scala> val df2 = spark.createDataFrame(Seq((1,"A"),(2,"B"), (3, "C")))
df2: org.apache.spark.sql.DataFrame = [_1: int, _2: string]
scala> df1.join(right = df2, usingColumns = Seq("_1"), joinType = "cross").show()
java.lang.IllegalArgumentException: requirement failed: Unsupported using join type Cross
at scala.Predef$.require(Predef.scala:281)
at org.apache.spark.sql.catalyst.plans.UsingJoin.<init>(joinTypes.scala:106)
at org.apache.spark.sql.Dataset.join(Dataset.scala:1025)
... 53 elided
```
### Why are the changes needed?
The documentation says cross option can be set, but when I try to set it, I get an java.lang.IllegalArgumentException.
### Does this PR introduce _any_ user-facing change?
Accepting this PR fix will behave the same as the documentation.
### How was this patch tested?
There is already a test for [JoinTypes](1b9fd67904/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
), but I can't find a test for the join option itself.
Closes #30803 from kozakana/allow_cross_option.
Authored-by: kozakana <goki727@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
10b6466e91
commit
2553d53dc8
|
@ -102,7 +102,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType {
|
||||||
}
|
}
|
||||||
|
|
||||||
case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType {
|
case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType {
|
||||||
require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe),
|
require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe),
|
||||||
"Unsupported using join type " + tpe)
|
"Unsupported using join type " + tpe)
|
||||||
override def sql: String = "USING " + tpe.sql
|
override def sql: String = "USING " + tpe.sql
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,6 +122,16 @@ class DataFrameJoinSuite extends QueryTest
|
||||||
df2.crossJoin(df1),
|
df2.crossJoin(df1),
|
||||||
Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
|
Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
|
||||||
Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
|
Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
|
||||||
|
|
||||||
|
checkAnswer(
|
||||||
|
df1.join(df2, Nil, "cross"),
|
||||||
|
Row(1, "1", 2, "2") :: Row(1, "1", 4, "4") ::
|
||||||
|
Row(3, "3", 2, "2") :: Row(3, "3", 4, "4") :: Nil)
|
||||||
|
|
||||||
|
checkAnswer(
|
||||||
|
df2.join(df1, Nil, "cross"),
|
||||||
|
Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
|
||||||
|
Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("broadcast join hint using broadcast function") {
|
test("broadcast join hint using broadcast function") {
|
||||||
|
|
Loading…
Reference in a new issue