[SPARK-24997][SQL] Enable support of MINUS ALL

## What changes were proposed in this pull request?
Enable support for MINUS ALL which was gated at AstBuilder.

## How was this patch tested?
Added tests in SQLQueryTestSuite and modify PlanParserSuite.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Dilip Biswal <dbiswal@us.ibm.com>

Closes #21963 from dilipbiswal/minus-all.
This commit is contained in:
Dilip Biswal 2018-08-02 22:45:10 -07:00 committed by Xiao Li
parent b0d6967d45
commit 19a4531913
4 changed files with 140 additions and 98 deletions

View file

@ -517,11 +517,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* Connect two queries by a Set operator.
*
* Supported Set operators are:
* - UNION [DISTINCT]
* - UNION ALL
* - EXCEPT [DISTINCT]
* - MINUS [DISTINCT]
* - INTERSECT [DISTINCT]
* - UNION [ DISTINCT | ALL ]
* - EXCEPT [ DISTINCT | ALL ]
* - MINUS [ DISTINCT | ALL ]
* - INTERSECT [DISTINCT | ALL]
*/
override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = withOrigin(ctx) {
val left = plan(ctx.left)
@ -541,7 +540,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
case SqlBaseParser.EXCEPT =>
Except(left, right)
case SqlBaseParser.SETMINUS if all =>
throw new ParseException("MINUS ALL is not supported.", ctx)
Except(left, right, isAll = true)
case SqlBaseParser.SETMINUS =>
Except(left, right)
}

View file

@ -67,11 +67,13 @@ class PlanParserSuite extends AnalysisTest {
assertEqual("select * from a union all select * from b", a.union(b))
assertEqual("select * from a except select * from b", a.except(b))
assertEqual("select * from a except distinct select * from b", a.except(b))
assertEqual("select * from a except all select * from b", a.except(b, isAll = true))
assertEqual("select * from a minus select * from b", a.except(b))
intercept("select * from a minus all select * from b", "MINUS ALL is not supported.")
assertEqual("select * from a minus all select * from b", a.except(b, isAll = true))
assertEqual("select * from a minus distinct select * from b", a.except(b))
assertEqual("select * from a intersect select * from b", a.intersect(b))
assertEqual("select * from a intersect distinct select * from b", a.intersect(b))
assertEqual("select * from a intersect all select * from b", a.intersect(b, isAll = true))
}
test("common table expressions") {

View file

@ -17,12 +17,17 @@ CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
(2, 20)
AS tab4(k, v);
-- Basic ExceptAll
-- Basic EXCEPT ALL
SELECT * FROM tab1
EXCEPT ALL
SELECT * FROM tab2;
-- ExceptAll same table in both branches
-- MINUS ALL (synonym for EXCEPT)
SELECT * FROM tab1
MINUS ALL
SELECT * FROM tab2;
-- EXCEPT ALL same table in both branches
SELECT * FROM tab1
EXCEPT ALL
SELECT * FROM tab2 WHERE c1 IS NOT NULL;
@ -57,14 +62,14 @@ SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3;
-- ExceptAll + Intersect
-- EXCEPT ALL + INTERSECT
SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3
INTERSECT DISTINCT
SELECT * FROM tab4;
-- ExceptAll + Except
-- EXCEPT ALL + EXCEPT
SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3
@ -94,6 +99,15 @@ SELECT * FROM tab3
EXCEPT DISTINCT
SELECT * FROM tab4;
-- Using MINUS ALL
SELECT * FROM tab3
MINUS ALL
SELECT * FROM tab4
UNION
SELECT * FROM tab3
MINUS DISTINCT
SELECT * FROM tab4;
-- Chain of set operations
SELECT * FROM tab3
EXCEPT ALL

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 25
-- Number of queries: 27
-- !query 0
@ -63,8 +63,8 @@ NULL
-- !query 5
SELECT * FROM tab1
EXCEPT ALL
SELECT * FROM tab2 WHERE c1 IS NOT NULL
MINUS ALL
SELECT * FROM tab2
-- !query 5 schema
struct<c1:int>
-- !query 5 output
@ -72,45 +72,41 @@ struct<c1:int>
2
2
NULL
NULL
-- !query 6
SELECT * FROM tab1 WHERE c1 > 5
SELECT * FROM tab1
EXCEPT ALL
SELECT * FROM tab2
SELECT * FROM tab2 WHERE c1 IS NOT NULL
-- !query 6 schema
struct<c1:int>
-- !query 6 output
0
2
2
NULL
NULL
-- !query 7
SELECT * FROM tab1
SELECT * FROM tab1 WHERE c1 > 5
EXCEPT ALL
SELECT * FROM tab2 WHERE c1 > 6
SELECT * FROM tab2
-- !query 7 schema
struct<c1:int>
-- !query 7 output
0
1
2
2
2
2
3
NULL
NULL
-- !query 8
SELECT * FROM tab1
EXCEPT ALL
SELECT CAST(1 AS BIGINT)
SELECT * FROM tab2 WHERE c1 > 6
-- !query 8 schema
struct<c1:bigint>
struct<c1:int>
-- !query 8 output
0
1
2
2
2
@ -123,42 +119,46 @@ NULL
-- !query 9
SELECT * FROM tab1
EXCEPT ALL
SELECT array(1)
SELECT CAST(1 AS BIGINT)
-- !query 9 schema
struct<>
struct<c1:bigint>
-- !query 9 output
0
2
2
2
2
3
NULL
NULL
-- !query 10
SELECT * FROM tab1
EXCEPT ALL
SELECT array(1)
-- !query 10 schema
struct<>
-- !query 10 output
org.apache.spark.sql.AnalysisException
ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
-- !query 10
SELECT * FROM tab3
EXCEPT ALL
SELECT * FROM tab4
-- !query 10 schema
struct<k:int,v:int>
-- !query 10 output
1 2
1 3
-- !query 11
SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3
EXCEPT ALL
SELECT * FROM tab4
-- !query 11 schema
struct<k:int,v:int>
-- !query 11 output
2 2
2 20
1 2
1 3
-- !query 12
SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3
INTERSECT DISTINCT
SELECT * FROM tab4
-- !query 12 schema
struct<k:int,v:int>
-- !query 12 output
@ -170,15 +170,28 @@ struct<k:int,v:int>
SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3
EXCEPT DISTINCT
INTERSECT DISTINCT
SELECT * FROM tab4
-- !query 13 schema
struct<k:int,v:int>
-- !query 13 output
2 2
2 20
-- !query 14
SELECT * FROM tab4
EXCEPT ALL
SELECT * FROM tab3
EXCEPT DISTINCT
SELECT * FROM tab4
-- !query 14 schema
struct<k:int,v:int>
-- !query 14 output
-- !query 15
SELECT * FROM tab3
EXCEPT ALL
SELECT * FROM tab4
@ -186,24 +199,24 @@ UNION ALL
SELECT * FROM tab3
EXCEPT DISTINCT
SELECT * FROM tab4
-- !query 14 schema
-- !query 15 schema
struct<k:int,v:int>
-- !query 14 output
-- !query 15 output
1 3
-- !query 15
-- !query 16
SELECT k FROM tab3
EXCEPT ALL
SELECT k, v FROM tab4
-- !query 15 schema
-- !query 16 schema
struct<>
-- !query 15 output
-- !query 16 output
org.apache.spark.sql.AnalysisException
ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
-- !query 16
-- !query 17
SELECT * FROM tab3
EXCEPT ALL
SELECT * FROM tab4
@ -211,13 +224,27 @@ UNION
SELECT * FROM tab3
EXCEPT DISTINCT
SELECT * FROM tab4
-- !query 16 schema
-- !query 17 schema
struct<k:int,v:int>
-- !query 16 output
-- !query 17 output
1 3
-- !query 17
-- !query 18
SELECT * FROM tab3
MINUS ALL
SELECT * FROM tab4
UNION
SELECT * FROM tab3
MINUS DISTINCT
SELECT * FROM tab4
-- !query 18 schema
struct<k:int,v:int>
-- !query 18 output
1 3
-- !query 19
SELECT * FROM tab3
EXCEPT ALL
SELECT * FROM tab4
@ -225,13 +252,13 @@ EXCEPT DISTINCT
SELECT * FROM tab3
EXCEPT DISTINCT
SELECT * FROM tab4
-- !query 17 schema
-- !query 19 schema
struct<k:int,v:int>
-- !query 17 output
-- !query 19 output
-- !query 18
-- !query 20
SELECT *
FROM (SELECT tab3.k,
tab4.v
@ -245,13 +272,13 @@ FROM (SELECT tab3.k,
FROM tab3
JOIN tab4
ON tab3.k = tab4.k)
-- !query 18 schema
-- !query 20 schema
struct<k:int,v:int>
-- !query 18 output
-- !query 20 output
-- !query 19
-- !query 21
SELECT *
FROM (SELECT tab3.k,
tab4.v
@ -265,46 +292,30 @@ FROM (SELECT tab4.v AS k,
FROM tab3
JOIN tab4
ON tab3.k = tab4.k)
-- !query 19 schema
struct<k:int,v:int>
-- !query 19 output
1 2
1 2
1 2
2 20
2 20
2 3
2 3
-- !query 20
SELECT v FROM tab3 GROUP BY v
EXCEPT ALL
SELECT k FROM tab4 GROUP BY k
-- !query 20 schema
struct<v:int>
-- !query 20 output
3
-- !query 21
DROP VIEW IF EXISTS tab1
-- !query 21 schema
struct<>
struct<k:int,v:int>
-- !query 21 output
1 2
1 2
1 2
2 20
2 20
2 3
2 3
-- !query 22
DROP VIEW IF EXISTS tab2
SELECT v FROM tab3 GROUP BY v
EXCEPT ALL
SELECT k FROM tab4 GROUP BY k
-- !query 22 schema
struct<>
struct<v:int>
-- !query 22 output
3
-- !query 23
DROP VIEW IF EXISTS tab3
DROP VIEW IF EXISTS tab1
-- !query 23 schema
struct<>
-- !query 23 output
@ -312,8 +323,24 @@ struct<>
-- !query 24
DROP VIEW IF EXISTS tab4
DROP VIEW IF EXISTS tab2
-- !query 24 schema
struct<>
-- !query 24 output
-- !query 25
DROP VIEW IF EXISTS tab3
-- !query 25 schema
struct<>
-- !query 25 output
-- !query 26
DROP VIEW IF EXISTS tab4
-- !query 26 schema
struct<>
-- !query 26 output