[SPARK-18094][SQL][TESTS] Move group analytics test cases from SQLQuerySuite into a query file test.

## What changes were proposed in this pull request?

Currently we have several test cases for group analytics(ROLLUP/CUBE/GROUPING SETS) in `SQLQuerySuite`, should better move them into a query file test.
The following test cases are moved to `group-analytics.sql`:
```
test("rollup")
test("grouping sets when aggregate functions containing groupBy columns")
test("cube")
test("grouping sets")
test("grouping and grouping_id")
test("grouping and grouping_id in having")
test("grouping and grouping_id in sort")
```

This is followup work of #15582

## How was this patch tested?

Modified query file `group-analytics.sql`, which will be tested by `SQLQueryTestSuite`.

Author: jiangxingbo <jiangxb1987@gmail.com>

Closes #15624 from jiangxb1987/group-analytics-test.
This commit is contained in:
jiangxingbo 2016-10-26 23:51:16 +02:00 committed by Reynold Xin
parent dcdda19785
commit 5b7d403c18
3 changed files with 290 additions and 192 deletions

View file

@ -10,4 +10,48 @@ SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE;
-- ROLLUP on overlapping columns
SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP;
SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP;
CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
AS courseSales(course, year, earnings);
-- ROLLUP
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year;
-- CUBE
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year;
-- GROUPING SETS
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course);
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
-- GROUPING SETS with aggregate functions containing groupBy columns
SELECT course, SUM(earnings) AS sum FROM courseSales
GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
-- GROUPING/GROUPING_ID
SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
GROUP BY CUBE(course, year);
SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year;
SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year;
SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year);
-- GROUPING/GROUPING_ID in having clause
SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0;
SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0;
SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0;
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0;
-- GROUPING/GROUPING_ID in orderBy clause
SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
ORDER BY GROUPING(course), GROUPING(year), course, year;
SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
ORDER BY GROUPING(course), GROUPING(year), course, year;
SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course);
SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course);
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id;

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 5
-- Number of queries: 26
-- !query 0
@ -32,7 +32,6 @@ NULL 2 0
NULL NULL 3
-- !query 2
SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE
-- !query 2 schema
@ -85,3 +84,247 @@ struct<a:int,b:int,sum(b):bigint>
3 2 2
3 NULL 3
NULL NULL 9
-- !query 5
CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
AS courseSales(course, year, earnings)
-- !query 5 schema
struct<>
-- !query 5 output
-- !query 6
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
-- !query 6 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 6 output
NULL NULL 113000
Java NULL 50000
Java 2012 20000
Java 2013 30000
dotNET NULL 63000
dotNET 2012 15000
dotNET 2013 48000
-- !query 7
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
-- !query 7 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 7 output
NULL NULL 113000
NULL 2012 35000
NULL 2013 78000
Java NULL 50000
Java 2012 20000
Java 2013 30000
dotNET NULL 63000
dotNET 2012 15000
dotNET 2013 48000
-- !query 8
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
-- !query 8 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 8 output
Java NULL 50000
NULL 2012 35000
NULL 2013 78000
dotNET NULL 63000
-- !query 9
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
-- !query 9 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 9 output
Java NULL 50000
dotNET NULL 63000
-- !query 10
SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
-- !query 10 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 10 output
NULL 2012 35000
NULL 2013 78000
-- !query 11
SELECT course, SUM(earnings) AS sum FROM courseSales
GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
-- !query 11 schema
struct<course:string,sum:bigint>
-- !query 11 output
NULL 113000
Java 20000
Java 30000
Java 50000
dotNET 5000
dotNET 10000
dotNET 48000
dotNET 63000
-- !query 12
SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
-- !query 12 schema
struct<course:string,sum:bigint,grouping_id(course, earnings):int>
-- !query 12 output
NULL 113000 3
Java 20000 0
Java 30000 0
Java 50000 1
dotNET 5000 0
dotNET 10000 0
dotNET 48000 0
dotNET 63000 1
-- !query 13
SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
GROUP BY CUBE(course, year)
-- !query 13 schema
struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
-- !query 13 output
Java 2012 0 0 0
Java 2013 0 0 0
Java NULL 0 1 1
NULL 2012 1 0 2
NULL 2013 1 0 2
NULL NULL 1 1 3
dotNET 2012 0 0 0
dotNET 2013 0 0 0
dotNET NULL 0 1 1
-- !query 14
SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
-- !query 14 schema
struct<>
-- !query 14 output
org.apache.spark.sql.AnalysisException
grouping() can only be used with GroupingSets/Cube/Rollup;
-- !query 15
SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
-- !query 15 schema
struct<>
-- !query 15 output
org.apache.spark.sql.AnalysisException
grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 16
SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year)
-- !query 16 schema
struct<>
-- !query 16 output
org.apache.spark.sql.AnalysisException
grouping__id is deprecated; use grouping_id() instead;
-- !query 17
SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0
-- !query 17 schema
struct<course:string,year:int>
-- !query 17 output
Java NULL
NULL NULL
dotNET NULL
-- !query 18
SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
-- !query 18 schema
struct<>
-- !query 18 output
org.apache.spark.sql.AnalysisException
grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 19
SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
-- !query 19 schema
struct<>
-- !query 19 output
org.apache.spark.sql.AnalysisException
grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 20
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
-- !query 20 schema
struct<>
-- !query 20 output
org.apache.spark.sql.AnalysisException
grouping__id is deprecated; use grouping_id() instead;
-- !query 21
SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
ORDER BY GROUPING(course), GROUPING(year), course, year
-- !query 21 schema
struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
-- !query 21 output
Java 2012 0 0
Java 2013 0 0
dotNET 2012 0 0
dotNET 2013 0 0
Java NULL 0 1
dotNET NULL 0 1
NULL 2012 1 0
NULL 2013 1 0
NULL NULL 1 1
-- !query 22
SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
ORDER BY GROUPING(course), GROUPING(year), course, year
-- !query 22 schema
struct<course:string,year:int,grouping_id(course, year):int>
-- !query 22 output
Java 2012 0
Java 2013 0
dotNET 2012 0
dotNET 2013 0
Java NULL 1
dotNET NULL 1
NULL 2012 2
NULL 2013 2
NULL NULL 3
-- !query 23
SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course)
-- !query 23 schema
struct<>
-- !query 23 output
org.apache.spark.sql.AnalysisException
grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 24
SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course)
-- !query 24 schema
struct<>
-- !query 24 output
org.apache.spark.sql.AnalysisException
grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 25
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id
-- !query 25 schema
struct<>
-- !query 25 output
org.apache.spark.sql.AnalysisException
grouping__id is deprecated; use grouping_id() instead;

View file

@ -2005,195 +2005,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
Row(false) :: Row(true) :: Nil)
}
test("rollup") {
checkAnswer(
sql("select course, year, sum(earnings) from courseSales group by rollup(course, year)" +
" order by course, year"),
Row(null, null, 113000.0) ::
Row("Java", null, 50000.0) ::
Row("Java", 2012, 20000.0) ::
Row("Java", 2013, 30000.0) ::
Row("dotNET", null, 63000.0) ::
Row("dotNET", 2012, 15000.0) ::
Row("dotNET", 2013, 48000.0) :: Nil
)
}
test("grouping sets when aggregate functions containing groupBy columns") {
checkAnswer(
sql("select course, sum(earnings) as sum from courseSales group by course, earnings " +
"grouping sets((), (course), (course, earnings)) " +
"order by course, sum"),
Row(null, 113000.0) ::
Row("Java", 20000.0) ::
Row("Java", 30000.0) ::
Row("Java", 50000.0) ::
Row("dotNET", 5000.0) ::
Row("dotNET", 10000.0) ::
Row("dotNET", 48000.0) ::
Row("dotNET", 63000.0) :: Nil
)
checkAnswer(
sql("select course, sum(earnings) as sum, grouping_id(course, earnings) from courseSales " +
"group by course, earnings grouping sets((), (course), (course, earnings)) " +
"order by course, sum"),
Row(null, 113000.0, 3) ::
Row("Java", 20000.0, 0) ::
Row("Java", 30000.0, 0) ::
Row("Java", 50000.0, 1) ::
Row("dotNET", 5000.0, 0) ::
Row("dotNET", 10000.0, 0) ::
Row("dotNET", 48000.0, 0) ::
Row("dotNET", 63000.0, 1) :: Nil
)
}
test("cube") {
checkAnswer(
sql("select course, year, sum(earnings) from courseSales group by cube(course, year)"),
Row("Java", 2012, 20000.0) ::
Row("Java", 2013, 30000.0) ::
Row("Java", null, 50000.0) ::
Row("dotNET", 2012, 15000.0) ::
Row("dotNET", 2013, 48000.0) ::
Row("dotNET", null, 63000.0) ::
Row(null, 2012, 35000.0) ::
Row(null, 2013, 78000.0) ::
Row(null, null, 113000.0) :: Nil
)
}
test("grouping sets") {
checkAnswer(
sql("select course, year, sum(earnings) from courseSales group by course, year " +
"grouping sets(course, year)"),
Row("Java", null, 50000.0) ::
Row("dotNET", null, 63000.0) ::
Row(null, 2012, 35000.0) ::
Row(null, 2013, 78000.0) :: Nil
)
checkAnswer(
sql("select course, year, sum(earnings) from courseSales group by course, year " +
"grouping sets(course)"),
Row("Java", null, 50000.0) ::
Row("dotNET", null, 63000.0) :: Nil
)
checkAnswer(
sql("select course, year, sum(earnings) from courseSales group by course, year " +
"grouping sets(year)"),
Row(null, 2012, 35000.0) ::
Row(null, 2013, 78000.0) :: Nil
)
}
test("grouping and grouping_id") {
checkAnswer(
sql("select course, year, grouping(course), grouping(year), grouping_id(course, year)" +
" from courseSales group by cube(course, year)"),
Row("Java", 2012, 0, 0, 0) ::
Row("Java", 2013, 0, 0, 0) ::
Row("Java", null, 0, 1, 1) ::
Row("dotNET", 2012, 0, 0, 0) ::
Row("dotNET", 2013, 0, 0, 0) ::
Row("dotNET", null, 0, 1, 1) ::
Row(null, 2012, 1, 0, 2) ::
Row(null, 2013, 1, 0, 2) ::
Row(null, null, 1, 1, 3) :: Nil
)
var error = intercept[AnalysisException] {
sql("select course, year, grouping(course) from courseSales group by course, year")
}
assert(error.getMessage contains "grouping() can only be used with GroupingSets/Cube/Rollup")
error = intercept[AnalysisException] {
sql("select course, year, grouping_id(course, year) from courseSales group by course, year")
}
assert(error.getMessage contains "grouping_id() can only be used with GroupingSets/Cube/Rollup")
error = intercept[AnalysisException] {
sql("select course, year, grouping__id from courseSales group by cube(course, year)")
}
assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
}
test("grouping and grouping_id in having") {
checkAnswer(
sql("select course, year from courseSales group by cube(course, year)" +
" having grouping(year) = 1 and grouping_id(course, year) > 0"),
Row("Java", null) ::
Row("dotNET", null) ::
Row(null, null) :: Nil
)
var error = intercept[AnalysisException] {
sql("select course, year from courseSales group by course, year" +
" having grouping(course) > 0")
}
assert(error.getMessage contains
"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
error = intercept[AnalysisException] {
sql("select course, year from courseSales group by course, year" +
" having grouping_id(course, year) > 0")
}
assert(error.getMessage contains
"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
error = intercept[AnalysisException] {
sql("select course, year from courseSales group by cube(course, year)" +
" having grouping__id > 0")
}
assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
}
test("grouping and grouping_id in sort") {
checkAnswer(
sql("select course, year, grouping(course), grouping(year) from courseSales" +
" group by cube(course, year) order by grouping_id(course, year), course, year"),
Row("Java", 2012, 0, 0) ::
Row("Java", 2013, 0, 0) ::
Row("dotNET", 2012, 0, 0) ::
Row("dotNET", 2013, 0, 0) ::
Row("Java", null, 0, 1) ::
Row("dotNET", null, 0, 1) ::
Row(null, 2012, 1, 0) ::
Row(null, 2013, 1, 0) ::
Row(null, null, 1, 1) :: Nil
)
checkAnswer(
sql("select course, year, grouping_id(course, year) from courseSales" +
" group by cube(course, year) order by grouping(course), grouping(year), course, year"),
Row("Java", 2012, 0) ::
Row("Java", 2013, 0) ::
Row("dotNET", 2012, 0) ::
Row("dotNET", 2013, 0) ::
Row("Java", null, 1) ::
Row("dotNET", null, 1) ::
Row(null, 2012, 2) ::
Row(null, 2013, 2) ::
Row(null, null, 3) :: Nil
)
var error = intercept[AnalysisException] {
sql("select course, year from courseSales group by course, year" +
" order by grouping(course)")
}
assert(error.getMessage contains
"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
error = intercept[AnalysisException] {
sql("select course, year from courseSales group by course, year" +
" order by grouping_id(course, year)")
}
assert(error.getMessage contains
"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
error = intercept[AnalysisException] {
sql("select course, year from courseSales group by cube(course, year)" +
" order by grouping__id")
}
assert(error.getMessage contains "grouping__id is deprecated; use grouping_id() instead")
}
test("filter on a grouping column that is not presented in SELECT") {
checkAnswer(
sql("select count(1) from (select 1 as a) t group by a having a > 0"),