From 8bfb4f1e72f33205b94957f7dacf298b0c8bde17 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 17 Aug 2021 20:23:49 +0800 Subject: [PATCH] Revert "[SPARK-35028][SQL] ANSI mode: disallow group by aliases" ### What changes were proposed in this pull request? Revert [[SPARK-35028][SQL] ANSI mode: disallow group by aliases ](https://github.com/apache/spark/pull/32129) ### Why are the changes needed? It turns out that many users are using the group by alias feature. Spark has its precedence rule when alias names conflict with column names in Group by clause: always use the table column. This should be reasonable and acceptable. Also, external DBMS such as PostgreSQL and MySQL allow grouping by alias, too. As we are going to announce ANSI mode GA in Spark 3.2, I suggest allowing the group by alias in ANSI mode. ### Does this PR introduce _any_ user-facing change? No, the feature is not released yet. ### How was this patch tested? Unit tests Closes #33758 from gengliangwang/revertGroupByAlias. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- docs/sql-ref-ansi-compliance.md | 1 - .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../apache/spark/sql/internal/SQLConf.scala | 27 +- .../sql-tests/inputs/ansi/group-analytics.sql | 1 - .../results/ansi/group-analytics.sql.out | 1293 ----------------- 5 files changed, 14 insertions(+), 1310 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql delete mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index a647abc9e9..f0e1066528 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -255,7 +255,6 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`). - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map. - - `GROUP BY`: aliases in a select list can not be used in GROUP BY clauses. Each column referenced in a GROUP BY clause shall unambiguously reference a column of the table resulting from the FROM clause. ### Useful Functions for ANSI Mode diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a727fa0f20..af9ff0d980 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1948,7 +1948,7 @@ class Analyzer(override val catalogManager: CatalogManager) // mayResolveAttrByAggregateExprs requires the TreePattern UNRESOLVED_ATTRIBUTE. _.containsAllPatterns(AGGREGATE, UNRESOLVED_ATTRIBUTE), ruleId) { case agg @ Aggregate(groups, aggs, child) - if allowGroupByAlias && child.resolved && aggs.forall(_.resolved) && + if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) && groups.exists(!_.resolved) => agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a930f638ca..982076df79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -240,17 +240,6 @@ object SQLConf { .intConf .createWithDefault(100) - val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled") - .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + - "For example, Spark will throw an exception at runtime instead of returning null results " + - "when the inputs to a SQL operator/function are invalid." + - "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " + - "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " + - "standard directly, but their behaviors align with ANSI SQL's style") - .version("3.0.0") - .booleanConf - .createWithDefault(false) - val OPTIMIZER_EXCLUDED_RULES = buildConf("spark.sql.optimizer.excludedRules") .doc("Configures a list of rules to be disabled in the optimizer, in which the rules are " + "specified by their rule names and separated by comma. It is not guaranteed that all the " + @@ -1221,9 +1210,8 @@ object SQLConf { .createWithDefault(true) val GROUP_BY_ALIASES = buildConf("spark.sql.groupByAliases") - .doc("This configuration is only effective when ANSI mode is disabled. When it is true and " + - s"${ANSI_ENABLED.key} is false, aliases in a select list can be used in group by clauses. " + - "Otherwise, an analysis exception is thrown in the case.") + .doc("When true, aliases in a select list can be used in group by clauses. When false, " + + "an analysis exception is thrown in the case.") .version("2.2.0") .booleanConf .createWithDefault(true) @@ -2547,6 +2535,17 @@ object SQLConf { .checkValues(StoreAssignmentPolicy.values.map(_.toString)) .createWithDefault(StoreAssignmentPolicy.ANSI.toString) + val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled") + .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + + "For example, Spark will throw an exception at runtime instead of returning null results " + + "when the inputs to a SQL operator/function are invalid." + + "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " + + "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " + + "standard directly, but their behaviors align with ANSI SQL's style") + .version("3.0.0") + .booleanConf + .createWithDefault(false) + val SORT_BEFORE_REPARTITION = buildConf("spark.sql.execution.sortBeforeRepartition") .internal() diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql deleted file mode 100644 index d78689233d..0000000000 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql +++ /dev/null @@ -1 +0,0 @@ ---IMPORT group-analytics.sql \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out deleted file mode 100644 index 9dbfc4cf4f..0000000000 --- a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out +++ /dev/null @@ -1,1293 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 52 - - --- !query -CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES -(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2) -AS testData(a, b) --- !query schema -struct<> --- !query output - - - --- !query -SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE --- !query schema -struct<(a + b):int,b:int,sum((a - b)):bigint> --- !query output -2 1 0 -2 NULL 0 -3 1 1 -3 2 -1 -3 NULL 0 -4 1 2 -4 2 0 -4 NULL 2 -5 2 1 -5 NULL 1 -NULL 1 3 -NULL 2 0 -NULL NULL 3 - - --- !query -SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE --- !query schema -struct --- !query output -1 1 1 -1 2 2 -1 NULL 3 -2 1 1 -2 2 2 -2 NULL 3 -3 1 1 -3 2 2 -3 NULL 3 -NULL 1 3 -NULL 2 6 -NULL NULL 9 - - --- !query -SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP --- !query schema -struct<(a + b):int,b:int,sum((a - b)):bigint> --- !query output -2 1 0 -2 NULL 0 -3 1 1 -3 2 -1 -3 NULL 0 -4 1 2 -4 2 0 -4 NULL 2 -5 2 1 -5 NULL 1 -NULL NULL 3 - - --- !query -SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP --- !query schema -struct --- !query output -1 1 1 -1 2 2 -1 NULL 3 -2 1 1 -2 2 2 -2 NULL 3 -3 1 1 -3 2 2 -3 NULL 3 -NULL NULL 9 - - --- !query -CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES -("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) -AS courseSales(course, year, earnings) --- !query schema -struct<> --- !query output - - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year --- !query schema -struct --- !query output -NULL NULL 113000 -Java NULL 50000 -Java 2012 20000 -Java 2013 30000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2013 48000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year --- !query schema -struct --- !query output -NULL NULL 113000 -Java NULL 50000 -Java 2012 20000 -Java 2012 20000 -Java 2013 30000 -Java 2013 30000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2013 48000 -dotNET 2013 48000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Empty set in ROLLUP grouping sets is not supported.(line 1, pos 61) - -== SQL == -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year --------------------------------------------------------------^^^ - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year --- !query schema -struct --- !query output -NULL NULL 113000 -NULL 2012 35000 -NULL 2013 78000 -Java NULL 50000 -Java 2012 20000 -Java 2013 30000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2013 48000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year --- !query schema -struct --- !query output -NULL NULL 113000 -NULL 2012 35000 -NULL 2013 78000 -Java NULL 50000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Empty set in CUBE grouping sets is not supported.(line 1, pos 61) - -== SQL == -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year --------------------------------------------------------------^^^ - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year) --- !query schema -struct --- !query output -Java NULL 50000 -NULL 2012 35000 -NULL 2013 78000 -dotNET NULL 63000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ()) --- !query schema -struct --- !query output -Java NULL 50000 -NULL 2012 35000 -NULL 2013 78000 -NULL NULL 113000 -dotNET NULL 63000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course) --- !query schema -struct --- !query output -Java NULL 50000 -dotNET NULL 63000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year) --- !query schema -struct --- !query output -NULL 2012 35000 -NULL 2013 78000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year --- !query schema -struct --- !query output -Java NULL 50000 -Java NULL 50000 -Java 2012 20000 -Java 2012 20000 -Java 2013 30000 -Java 2013 30000 -dotNET NULL 63000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2013 48000 -dotNET 2013 48000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year --- !query schema -struct --- !query output -NULL NULL 113000 -NULL 2012 35000 -NULL 2013 78000 -Java NULL 50000 -Java NULL 50000 -Java NULL 50000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -dotNET NULL 63000 -dotNET NULL 63000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 - - --- !query -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year --- !query schema -struct --- !query output -NULL 2012 35000 -NULL 2012 35000 -NULL 2013 78000 -NULL 2013 78000 -Java NULL 50000 -Java NULL 50000 -Java NULL 50000 -Java NULL 50000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2012 20000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -Java 2013 30000 -dotNET NULL 63000 -dotNET NULL 63000 -dotNET NULL 63000 -dotNET NULL 63000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2012 15000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 -dotNET 2013 48000 - - --- !query -SELECT course, SUM(earnings) AS sum FROM courseSales -GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum --- !query schema -struct --- !query output -NULL 113000 -Java 20000 -Java 30000 -Java 50000 -dotNET 5000 -dotNET 10000 -dotNET 48000 -dotNET 63000 - - --- !query -SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales -GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum --- !query schema -struct --- !query output -NULL 113000 3 -Java 20000 0 -Java 30000 0 -Java 50000 1 -dotNET 5000 0 -dotNET 10000 0 -dotNET 48000 0 -dotNET 63000 1 - - --- !query -SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales -GROUP BY CUBE(course, year) --- !query schema -struct --- !query output -Java 2012 0 0 0 -Java 2013 0 0 0 -Java NULL 0 1 1 -NULL 2012 1 0 2 -NULL 2013 1 0 2 -NULL NULL 1 1 3 -dotNET 2012 0 0 0 -dotNET 2013 0 0 0 -dotNET NULL 0 1 1 - - --- !query -SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -grouping() can only be used with GroupingSets/Cube/Rollup - - --- !query -SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -grouping_id() can only be used with GroupingSets/Cube/Rollup - - --- !query -SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year --- !query schema -struct --- !query output -Java 2012 0 -Java 2013 0 -dotNET 2012 0 -dotNET 2013 0 -Java NULL 1 -dotNET NULL 1 -NULL 2012 2 -NULL 2013 2 -NULL NULL 3 - - --- !query -SELECT course, year FROM courseSales GROUP BY CUBE(course, year) -HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year --- !query schema -struct --- !query output -NULL NULL -Java NULL -dotNET NULL - - --- !query -SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup - - --- !query -SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup - - --- !query -SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0 --- !query schema -struct --- !query output -Java NULL -NULL 2012 -NULL 2013 -NULL NULL -dotNET NULL - - --- !query -SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year) -ORDER BY GROUPING(course), GROUPING(year), course, year --- !query schema -struct --- !query output -Java 2012 0 0 -Java 2013 0 0 -dotNET 2012 0 0 -dotNET 2013 0 0 -Java NULL 0 1 -dotNET NULL 0 1 -NULL 2012 1 0 -NULL 2013 1 0 -NULL NULL 1 1 - - --- !query -SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year) -ORDER BY GROUPING(course), GROUPING(year), course, year --- !query schema -struct --- !query output -Java 2012 0 -Java 2013 0 -dotNET 2012 0 -dotNET 2013 0 -Java NULL 1 -dotNET NULL 1 -NULL 2012 2 -NULL 2013 2 -NULL NULL 3 - - --- !query -SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course) --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup - - --- !query -SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course) --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup - - --- !query -SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year --- !query schema -struct --- !query output -Java 2012 -Java 2013 -dotNET 2012 -dotNET 2013 -Java NULL -dotNET NULL -NULL 2012 -NULL 2013 -NULL NULL - - --- !query -SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2) --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'k1' given input columns: [testdata.a, testdata.b]; line 1 pos 68 - - --- !query -SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b) --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 63 - - --- !query -SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 79 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, b, CUBE(a, b) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, b, ROLLUP(a, b) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 -3 NULL 2 -NULL 1 3 -NULL 2 3 -NULL NULL 6 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()) --- !query schema -struct --- !query output -1 1 1 -1 2 1 -1 NULL 2 -1 NULL 2 -2 1 1 -2 2 1 -2 NULL 2 -2 NULL 2 -3 1 1 -3 2 1 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ()) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ()) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b)) --- !query schema -struct --- !query output -1 1 1 -1 2 1 -1 NULL 2 -1 NULL 2 -2 1 1 -2 2 1 -2 NULL 2 -2 NULL 2 -3 1 1 -3 2 1 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ())) --- !query schema -struct --- !query output -1 1 1 -1 2 1 -1 NULL 2 -1 NULL 2 -2 1 1 -2 2 1 -2 NULL 2 -2 NULL 2 -3 1 1 -3 2 1 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b))) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b)) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b))) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b)) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ())) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2 - - --- !query -SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ()) --- !query schema -struct --- !query output -1 1 1 -1 1 1 -1 1 1 -1 2 1 -1 2 1 -1 2 1 -1 NULL 2 -1 NULL 2 -1 NULL 2 -1 NULL 2 -2 1 1 -2 1 1 -2 1 1 -2 2 1 -2 2 1 -2 2 1 -2 NULL 2 -2 NULL 2 -2 NULL 2 -2 NULL 2 -3 1 1 -3 1 1 -3 1 1 -3 2 1 -3 2 1 -3 2 1 -3 NULL 2 -3 NULL 2 -3 NULL 2 -3 NULL 2