diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 0ceeb53e1d..9906a30b48 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -517,11 +517,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Connect two queries by a Set operator. * * Supported Set operators are: - * - UNION [DISTINCT] - * - UNION ALL - * - EXCEPT [DISTINCT] - * - MINUS [DISTINCT] - * - INTERSECT [DISTINCT] + * - UNION [ DISTINCT | ALL ] + * - EXCEPT [ DISTINCT | ALL ] + * - MINUS [ DISTINCT | ALL ] + * - INTERSECT [DISTINCT | ALL] */ override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = withOrigin(ctx) { val left = plan(ctx.left) @@ -541,7 +540,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case SqlBaseParser.EXCEPT => Except(left, right) case SqlBaseParser.SETMINUS if all => - throw new ParseException("MINUS ALL is not supported.", ctx) + Except(left, right, isAll = true) case SqlBaseParser.SETMINUS => Except(left, right) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 38efd89156..924700483d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -67,11 +67,13 @@ class PlanParserSuite extends AnalysisTest { assertEqual("select * from a union all select * from b", a.union(b)) assertEqual("select * from a except select * from b", a.except(b)) assertEqual("select * from a except distinct select * from b", a.except(b)) + assertEqual("select * from a except all select * from b", a.except(b, isAll = true)) assertEqual("select * from a minus select * from b", a.except(b)) - intercept("select * from a minus all select * from b", "MINUS ALL is not supported.") + assertEqual("select * from a minus all select * from b", a.except(b, isAll = true)) assertEqual("select * from a minus distinct select * from b", a.except(b)) assertEqual("select * from a intersect select * from b", a.intersect(b)) assertEqual("select * from a intersect distinct select * from b", a.intersect(b)) + assertEqual("select * from a intersect all select * from b", a.intersect(b, isAll = true)) } test("common table expressions") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/except-all.sql b/sql/core/src/test/resources/sql-tests/inputs/except-all.sql index 08b9a437b3..e28f0721a6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/except-all.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/except-all.sql @@ -17,12 +17,17 @@ CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES (2, 20) AS tab4(k, v); --- Basic ExceptAll +-- Basic EXCEPT ALL SELECT * FROM tab1 EXCEPT ALL SELECT * FROM tab2; --- ExceptAll same table in both branches +-- MINUS ALL (synonym for EXCEPT) +SELECT * FROM tab1 +MINUS ALL +SELECT * FROM tab2; + +-- EXCEPT ALL same table in both branches SELECT * FROM tab1 EXCEPT ALL SELECT * FROM tab2 WHERE c1 IS NOT NULL; @@ -57,14 +62,14 @@ SELECT * FROM tab4 EXCEPT ALL SELECT * FROM tab3; --- ExceptAll + Intersect +-- EXCEPT ALL + INTERSECT SELECT * FROM tab4 EXCEPT ALL SELECT * FROM tab3 INTERSECT DISTINCT SELECT * FROM tab4; --- ExceptAll + Except +-- EXCEPT ALL + EXCEPT SELECT * FROM tab4 EXCEPT ALL SELECT * FROM tab3 @@ -94,6 +99,15 @@ SELECT * FROM tab3 EXCEPT DISTINCT SELECT * FROM tab4; +-- Using MINUS ALL +SELECT * FROM tab3 +MINUS ALL +SELECT * FROM tab4 +UNION +SELECT * FROM tab3 +MINUS DISTINCT +SELECT * FROM tab4; + -- Chain of set operations SELECT * FROM tab3 EXCEPT ALL diff --git a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out index 2a21c15053..01091a2f75 100644 --- a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 25 +-- Number of queries: 27 -- !query 0 @@ -63,8 +63,8 @@ NULL -- !query 5 SELECT * FROM tab1 -EXCEPT ALL -SELECT * FROM tab2 WHERE c1 IS NOT NULL +MINUS ALL +SELECT * FROM tab2 -- !query 5 schema struct -- !query 5 output @@ -72,45 +72,41 @@ struct 2 2 NULL -NULL -- !query 6 -SELECT * FROM tab1 WHERE c1 > 5 +SELECT * FROM tab1 EXCEPT ALL -SELECT * FROM tab2 +SELECT * FROM tab2 WHERE c1 IS NOT NULL -- !query 6 schema struct -- !query 6 output - +0 +2 +2 +NULL +NULL -- !query 7 -SELECT * FROM tab1 +SELECT * FROM tab1 WHERE c1 > 5 EXCEPT ALL -SELECT * FROM tab2 WHERE c1 > 6 +SELECT * FROM tab2 -- !query 7 schema struct -- !query 7 output -0 -1 -2 -2 -2 -2 -3 -NULL -NULL + -- !query 8 SELECT * FROM tab1 EXCEPT ALL -SELECT CAST(1 AS BIGINT) +SELECT * FROM tab2 WHERE c1 > 6 -- !query 8 schema -struct +struct -- !query 8 output 0 +1 2 2 2 @@ -123,42 +119,46 @@ NULL -- !query 9 SELECT * FROM tab1 EXCEPT ALL -SELECT array(1) +SELECT CAST(1 AS BIGINT) -- !query 9 schema -struct<> +struct -- !query 9 output +0 +2 +2 +2 +2 +3 +NULL +NULL + + +-- !query 10 +SELECT * FROM tab1 +EXCEPT ALL +SELECT array(1) +-- !query 10 schema +struct<> +-- !query 10 output org.apache.spark.sql.AnalysisException ExceptAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table; --- !query 10 -SELECT * FROM tab3 -EXCEPT ALL -SELECT * FROM tab4 --- !query 10 schema -struct --- !query 10 output -1 2 -1 3 - - -- !query 11 -SELECT * FROM tab4 -EXCEPT ALL SELECT * FROM tab3 +EXCEPT ALL +SELECT * FROM tab4 -- !query 11 schema struct -- !query 11 output -2 2 -2 20 +1 2 +1 3 -- !query 12 SELECT * FROM tab4 EXCEPT ALL SELECT * FROM tab3 -INTERSECT DISTINCT -SELECT * FROM tab4 -- !query 12 schema struct -- !query 12 output @@ -170,15 +170,28 @@ struct SELECT * FROM tab4 EXCEPT ALL SELECT * FROM tab3 -EXCEPT DISTINCT +INTERSECT DISTINCT SELECT * FROM tab4 -- !query 13 schema struct -- !query 13 output - +2 2 +2 20 -- !query 14 +SELECT * FROM tab4 +EXCEPT ALL +SELECT * FROM tab3 +EXCEPT DISTINCT +SELECT * FROM tab4 +-- !query 14 schema +struct +-- !query 14 output + + + +-- !query 15 SELECT * FROM tab3 EXCEPT ALL SELECT * FROM tab4 @@ -186,24 +199,24 @@ UNION ALL SELECT * FROM tab3 EXCEPT DISTINCT SELECT * FROM tab4 --- !query 14 schema +-- !query 15 schema struct --- !query 14 output +-- !query 15 output 1 3 --- !query 15 +-- !query 16 SELECT k FROM tab3 EXCEPT ALL SELECT k, v FROM tab4 --- !query 15 schema +-- !query 16 schema struct<> --- !query 15 output +-- !query 16 output org.apache.spark.sql.AnalysisException ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns; --- !query 16 +-- !query 17 SELECT * FROM tab3 EXCEPT ALL SELECT * FROM tab4 @@ -211,13 +224,27 @@ UNION SELECT * FROM tab3 EXCEPT DISTINCT SELECT * FROM tab4 --- !query 16 schema +-- !query 17 schema struct --- !query 16 output +-- !query 17 output 1 3 --- !query 17 +-- !query 18 +SELECT * FROM tab3 +MINUS ALL +SELECT * FROM tab4 +UNION +SELECT * FROM tab3 +MINUS DISTINCT +SELECT * FROM tab4 +-- !query 18 schema +struct +-- !query 18 output +1 3 + + +-- !query 19 SELECT * FROM tab3 EXCEPT ALL SELECT * FROM tab4 @@ -225,13 +252,13 @@ EXCEPT DISTINCT SELECT * FROM tab3 EXCEPT DISTINCT SELECT * FROM tab4 --- !query 17 schema +-- !query 19 schema struct --- !query 17 output +-- !query 19 output --- !query 18 +-- !query 20 SELECT * FROM (SELECT tab3.k, tab4.v @@ -245,13 +272,13 @@ FROM (SELECT tab3.k, FROM tab3 JOIN tab4 ON tab3.k = tab4.k) --- !query 18 schema +-- !query 20 schema struct --- !query 18 output +-- !query 20 output --- !query 19 +-- !query 21 SELECT * FROM (SELECT tab3.k, tab4.v @@ -265,46 +292,30 @@ FROM (SELECT tab4.v AS k, FROM tab3 JOIN tab4 ON tab3.k = tab4.k) --- !query 19 schema -struct --- !query 19 output -1 2 -1 2 -1 2 -2 20 -2 20 -2 3 -2 3 - - --- !query 20 -SELECT v FROM tab3 GROUP BY v -EXCEPT ALL -SELECT k FROM tab4 GROUP BY k --- !query 20 schema -struct --- !query 20 output -3 - - --- !query 21 -DROP VIEW IF EXISTS tab1 -- !query 21 schema -struct<> +struct -- !query 21 output - +1 2 +1 2 +1 2 +2 20 +2 20 +2 3 +2 3 -- !query 22 -DROP VIEW IF EXISTS tab2 +SELECT v FROM tab3 GROUP BY v +EXCEPT ALL +SELECT k FROM tab4 GROUP BY k -- !query 22 schema -struct<> +struct -- !query 22 output - +3 -- !query 23 -DROP VIEW IF EXISTS tab3 +DROP VIEW IF EXISTS tab1 -- !query 23 schema struct<> -- !query 23 output @@ -312,8 +323,24 @@ struct<> -- !query 24 -DROP VIEW IF EXISTS tab4 +DROP VIEW IF EXISTS tab2 -- !query 24 schema struct<> -- !query 24 output + + +-- !query 25 +DROP VIEW IF EXISTS tab3 +-- !query 25 schema +struct<> +-- !query 25 output + + + +-- !query 26 +DROP VIEW IF EXISTS tab4 +-- !query 26 schema +struct<> +-- !query 26 output +