[SPARK-24997][SQL] Enable support of MINUS ALL
## What changes were proposed in this pull request? Enable support for MINUS ALL which was gated at AstBuilder. ## How was this patch tested? Added tests in SQLQueryTestSuite and modify PlanParserSuite. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #21963 from dilipbiswal/minus-all.
This commit is contained in:
parent
b0d6967d45
commit
19a4531913
|
@ -517,11 +517,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
* Connect two queries by a Set operator.
|
||||
*
|
||||
* Supported Set operators are:
|
||||
* - UNION [DISTINCT]
|
||||
* - UNION ALL
|
||||
* - EXCEPT [DISTINCT]
|
||||
* - MINUS [DISTINCT]
|
||||
* - INTERSECT [DISTINCT]
|
||||
* - UNION [ DISTINCT | ALL ]
|
||||
* - EXCEPT [ DISTINCT | ALL ]
|
||||
* - MINUS [ DISTINCT | ALL ]
|
||||
* - INTERSECT [DISTINCT | ALL]
|
||||
*/
|
||||
override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = withOrigin(ctx) {
|
||||
val left = plan(ctx.left)
|
||||
|
@ -541,7 +540,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
case SqlBaseParser.EXCEPT =>
|
||||
Except(left, right)
|
||||
case SqlBaseParser.SETMINUS if all =>
|
||||
throw new ParseException("MINUS ALL is not supported.", ctx)
|
||||
Except(left, right, isAll = true)
|
||||
case SqlBaseParser.SETMINUS =>
|
||||
Except(left, right)
|
||||
}
|
||||
|
|
|
@ -67,11 +67,13 @@ class PlanParserSuite extends AnalysisTest {
|
|||
assertEqual("select * from a union all select * from b", a.union(b))
|
||||
assertEqual("select * from a except select * from b", a.except(b))
|
||||
assertEqual("select * from a except distinct select * from b", a.except(b))
|
||||
assertEqual("select * from a except all select * from b", a.except(b, isAll = true))
|
||||
assertEqual("select * from a minus select * from b", a.except(b))
|
||||
intercept("select * from a minus all select * from b", "MINUS ALL is not supported.")
|
||||
assertEqual("select * from a minus all select * from b", a.except(b, isAll = true))
|
||||
assertEqual("select * from a minus distinct select * from b", a.except(b))
|
||||
assertEqual("select * from a intersect select * from b", a.intersect(b))
|
||||
assertEqual("select * from a intersect distinct select * from b", a.intersect(b))
|
||||
assertEqual("select * from a intersect all select * from b", a.intersect(b, isAll = true))
|
||||
}
|
||||
|
||||
test("common table expressions") {
|
||||
|
|
|
@ -17,12 +17,17 @@ CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
|
|||
(2, 20)
|
||||
AS tab4(k, v);
|
||||
|
||||
-- Basic ExceptAll
|
||||
-- Basic EXCEPT ALL
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab2;
|
||||
|
||||
-- ExceptAll same table in both branches
|
||||
-- MINUS ALL (synonym for EXCEPT)
|
||||
SELECT * FROM tab1
|
||||
MINUS ALL
|
||||
SELECT * FROM tab2;
|
||||
|
||||
-- EXCEPT ALL same table in both branches
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab2 WHERE c1 IS NOT NULL;
|
||||
|
@ -57,14 +62,14 @@ SELECT * FROM tab4
|
|||
EXCEPT ALL
|
||||
SELECT * FROM tab3;
|
||||
|
||||
-- ExceptAll + Intersect
|
||||
-- EXCEPT ALL + INTERSECT
|
||||
SELECT * FROM tab4
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab3
|
||||
INTERSECT DISTINCT
|
||||
SELECT * FROM tab4;
|
||||
|
||||
-- ExceptAll + Except
|
||||
-- EXCEPT ALL + EXCEPT
|
||||
SELECT * FROM tab4
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab3
|
||||
|
@ -94,6 +99,15 @@ SELECT * FROM tab3
|
|||
EXCEPT DISTINCT
|
||||
SELECT * FROM tab4;
|
||||
|
||||
-- Using MINUS ALL
|
||||
SELECT * FROM tab3
|
||||
MINUS ALL
|
||||
SELECT * FROM tab4
|
||||
UNION
|
||||
SELECT * FROM tab3
|
||||
MINUS DISTINCT
|
||||
SELECT * FROM tab4;
|
||||
|
||||
-- Chain of set operations
|
||||
SELECT * FROM tab3
|
||||
EXCEPT ALL
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 25
|
||||
-- Number of queries: 27
|
||||
|
||||
|
||||
-- !query 0
|
||||
|
@ -63,8 +63,8 @@ NULL
|
|||
|
||||
-- !query 5
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab2 WHERE c1 IS NOT NULL
|
||||
MINUS ALL
|
||||
SELECT * FROM tab2
|
||||
-- !query 5 schema
|
||||
struct<c1:int>
|
||||
-- !query 5 output
|
||||
|
@ -72,45 +72,41 @@ struct<c1:int>
|
|||
2
|
||||
2
|
||||
NULL
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 6
|
||||
SELECT * FROM tab1 WHERE c1 > 5
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab2
|
||||
SELECT * FROM tab2 WHERE c1 IS NOT NULL
|
||||
-- !query 6 schema
|
||||
struct<c1:int>
|
||||
-- !query 6 output
|
||||
|
||||
0
|
||||
2
|
||||
2
|
||||
NULL
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 7
|
||||
SELECT * FROM tab1
|
||||
SELECT * FROM tab1 WHERE c1 > 5
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab2 WHERE c1 > 6
|
||||
SELECT * FROM tab2
|
||||
-- !query 7 schema
|
||||
struct<c1:int>
|
||||
-- !query 7 output
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
3
|
||||
NULL
|
||||
NULL
|
||||
|
||||
|
||||
|
||||
-- !query 8
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT CAST(1 AS BIGINT)
|
||||
SELECT * FROM tab2 WHERE c1 > 6
|
||||
-- !query 8 schema
|
||||
struct<c1:bigint>
|
||||
struct<c1:int>
|
||||
-- !query 8 output
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
|
@ -123,42 +119,46 @@ NULL
|
|||
-- !query 9
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT array(1)
|
||||
SELECT CAST(1 AS BIGINT)
|
||||
-- !query 9 schema
|
||||
struct<>
|
||||
struct<c1:bigint>
|
||||
-- !query 9 output
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
3
|
||||
NULL
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 10
|
||||
SELECT * FROM tab1
|
||||
EXCEPT ALL
|
||||
SELECT array(1)
|
||||
-- !query 10 schema
|
||||
struct<>
|
||||
-- !query 10 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
|
||||
|
||||
|
||||
-- !query 10
|
||||
SELECT * FROM tab3
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab4
|
||||
-- !query 10 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 10 output
|
||||
1 2
|
||||
1 3
|
||||
|
||||
|
||||
-- !query 11
|
||||
SELECT * FROM tab4
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab3
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab4
|
||||
-- !query 11 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 11 output
|
||||
2 2
|
||||
2 20
|
||||
1 2
|
||||
1 3
|
||||
|
||||
|
||||
-- !query 12
|
||||
SELECT * FROM tab4
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab3
|
||||
INTERSECT DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 12 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 12 output
|
||||
|
@ -170,15 +170,28 @@ struct<k:int,v:int>
|
|||
SELECT * FROM tab4
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab3
|
||||
EXCEPT DISTINCT
|
||||
INTERSECT DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 13 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 13 output
|
||||
|
||||
2 2
|
||||
2 20
|
||||
|
||||
|
||||
-- !query 14
|
||||
SELECT * FROM tab4
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab3
|
||||
EXCEPT DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 14 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 14 output
|
||||
|
||||
|
||||
|
||||
-- !query 15
|
||||
SELECT * FROM tab3
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab4
|
||||
|
@ -186,24 +199,24 @@ UNION ALL
|
|||
SELECT * FROM tab3
|
||||
EXCEPT DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 14 schema
|
||||
-- !query 15 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 14 output
|
||||
-- !query 15 output
|
||||
1 3
|
||||
|
||||
|
||||
-- !query 15
|
||||
-- !query 16
|
||||
SELECT k FROM tab3
|
||||
EXCEPT ALL
|
||||
SELECT k, v FROM tab4
|
||||
-- !query 15 schema
|
||||
-- !query 16 schema
|
||||
struct<>
|
||||
-- !query 15 output
|
||||
-- !query 16 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
|
||||
|
||||
|
||||
-- !query 16
|
||||
-- !query 17
|
||||
SELECT * FROM tab3
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab4
|
||||
|
@ -211,13 +224,27 @@ UNION
|
|||
SELECT * FROM tab3
|
||||
EXCEPT DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 16 schema
|
||||
-- !query 17 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 16 output
|
||||
-- !query 17 output
|
||||
1 3
|
||||
|
||||
|
||||
-- !query 17
|
||||
-- !query 18
|
||||
SELECT * FROM tab3
|
||||
MINUS ALL
|
||||
SELECT * FROM tab4
|
||||
UNION
|
||||
SELECT * FROM tab3
|
||||
MINUS DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 18 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 18 output
|
||||
1 3
|
||||
|
||||
|
||||
-- !query 19
|
||||
SELECT * FROM tab3
|
||||
EXCEPT ALL
|
||||
SELECT * FROM tab4
|
||||
|
@ -225,13 +252,13 @@ EXCEPT DISTINCT
|
|||
SELECT * FROM tab3
|
||||
EXCEPT DISTINCT
|
||||
SELECT * FROM tab4
|
||||
-- !query 17 schema
|
||||
-- !query 19 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 17 output
|
||||
-- !query 19 output
|
||||
|
||||
|
||||
|
||||
-- !query 18
|
||||
-- !query 20
|
||||
SELECT *
|
||||
FROM (SELECT tab3.k,
|
||||
tab4.v
|
||||
|
@ -245,13 +272,13 @@ FROM (SELECT tab3.k,
|
|||
FROM tab3
|
||||
JOIN tab4
|
||||
ON tab3.k = tab4.k)
|
||||
-- !query 18 schema
|
||||
-- !query 20 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 18 output
|
||||
-- !query 20 output
|
||||
|
||||
|
||||
|
||||
-- !query 19
|
||||
-- !query 21
|
||||
SELECT *
|
||||
FROM (SELECT tab3.k,
|
||||
tab4.v
|
||||
|
@ -265,46 +292,30 @@ FROM (SELECT tab4.v AS k,
|
|||
FROM tab3
|
||||
JOIN tab4
|
||||
ON tab3.k = tab4.k)
|
||||
-- !query 19 schema
|
||||
struct<k:int,v:int>
|
||||
-- !query 19 output
|
||||
1 2
|
||||
1 2
|
||||
1 2
|
||||
2 20
|
||||
2 20
|
||||
2 3
|
||||
2 3
|
||||
|
||||
|
||||
-- !query 20
|
||||
SELECT v FROM tab3 GROUP BY v
|
||||
EXCEPT ALL
|
||||
SELECT k FROM tab4 GROUP BY k
|
||||
-- !query 20 schema
|
||||
struct<v:int>
|
||||
-- !query 20 output
|
||||
3
|
||||
|
||||
|
||||
-- !query 21
|
||||
DROP VIEW IF EXISTS tab1
|
||||
-- !query 21 schema
|
||||
struct<>
|
||||
struct<k:int,v:int>
|
||||
-- !query 21 output
|
||||
|
||||
1 2
|
||||
1 2
|
||||
1 2
|
||||
2 20
|
||||
2 20
|
||||
2 3
|
||||
2 3
|
||||
|
||||
|
||||
-- !query 22
|
||||
DROP VIEW IF EXISTS tab2
|
||||
SELECT v FROM tab3 GROUP BY v
|
||||
EXCEPT ALL
|
||||
SELECT k FROM tab4 GROUP BY k
|
||||
-- !query 22 schema
|
||||
struct<>
|
||||
struct<v:int>
|
||||
-- !query 22 output
|
||||
|
||||
3
|
||||
|
||||
|
||||
-- !query 23
|
||||
DROP VIEW IF EXISTS tab3
|
||||
DROP VIEW IF EXISTS tab1
|
||||
-- !query 23 schema
|
||||
struct<>
|
||||
-- !query 23 output
|
||||
|
@ -312,8 +323,24 @@ struct<>
|
|||
|
||||
|
||||
-- !query 24
|
||||
DROP VIEW IF EXISTS tab4
|
||||
DROP VIEW IF EXISTS tab2
|
||||
-- !query 24 schema
|
||||
struct<>
|
||||
-- !query 24 output
|
||||
|
||||
|
||||
|
||||
-- !query 25
|
||||
DROP VIEW IF EXISTS tab3
|
||||
-- !query 25 schema
|
||||
struct<>
|
||||
-- !query 25 output
|
||||
|
||||
|
||||
|
||||
-- !query 26
|
||||
DROP VIEW IF EXISTS tab4
|
||||
-- !query 26 schema
|
||||
struct<>
|
||||
-- !query 26 output
|
||||
|
||||
|
|
Loading…
Reference in a new issue