[SPARK-27883][SQL] Port AGGREGATES.sql [Part 2]
## What changes were proposed in this pull request? This PR is to port AGGREGATES.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/aggregates.sql#L145-L350 The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/expected/aggregates.out#L499-L984 When porting the test cases, found four PostgreSQL specific features that do not exist in Spark SQL: - [SPARK-27877](https://issues.apache.org/jira/browse/SPARK-27877): Implement SQL-standard LATERAL subqueries - [SPARK-27878](https://issues.apache.org/jira/browse/SPARK-27878): Support ARRAY(sub-SELECT) expressions - [SPARK-27879](https://issues.apache.org/jira/browse/SPARK-27879): Implement bitwise integer aggregates(BIT_AND and BIT_OR) - [SPARK-27880](https://issues.apache.org/jira/browse/SPARK-27880): Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) ## How was this patch tested? N/A Closes #24743 from wangyum/SPARK-27883. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: gatorsmile <gatorsmile@gmail.com>
This commit is contained in:
parent
d1371a2dad
commit
4de96493ae
|
@ -3,7 +3,7 @@
|
|||
--
|
||||
--
|
||||
-- AGGREGATES [Part 1]
|
||||
-- https://github.com/postgres/postgres/blob/02ddd499322ab6f2f0d58692955dc9633c2150fc/src/test/regress/sql/aggregates.sql#L1-L143
|
||||
-- https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/aggregates.sql#L1-L143
|
||||
|
||||
-- avoid bit-exact output here because operations may not be bit-exact.
|
||||
-- SET extra_float_digits = 0;
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
--
|
||||
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
||||
--
|
||||
--
|
||||
-- AGGREGATES [Part 2]
|
||||
-- https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/aggregates.sql#L145-L350
|
||||
|
||||
create temporary view int4_tbl as select * from values
|
||||
(0),
|
||||
(123456),
|
||||
(-123456),
|
||||
(2147483647),
|
||||
(-2147483647)
|
||||
as int4_tbl(f1);
|
||||
|
||||
-- Test handling of Params within aggregate arguments in hashed aggregation.
|
||||
-- Per bug report from Jeevan Chalke.
|
||||
-- [SPARK-27877] Implement SQL-standard LATERAL subqueries
|
||||
-- explain (verbose, costs off)
|
||||
-- select s1, s2, sm
|
||||
-- from generate_series(1, 3) s1,
|
||||
-- lateral (select s2, sum(s1 + s2) sm
|
||||
-- from generate_series(1, 3) s2 group by s2) ss
|
||||
-- order by 1, 2;
|
||||
-- select s1, s2, sm
|
||||
-- from generate_series(1, 3) s1,
|
||||
-- lateral (select s2, sum(s1 + s2) sm
|
||||
-- from generate_series(1, 3) s2 group by s2) ss
|
||||
-- order by 1, 2;
|
||||
|
||||
-- [SPARK-27878] Support ARRAY(sub-SELECT) expressions
|
||||
-- explain (verbose, costs off)
|
||||
-- select array(select sum(x+y) s
|
||||
-- from generate_series(1,3) y group by y order by s)
|
||||
-- from generate_series(1,3) x;
|
||||
-- select array(select sum(x+y) s
|
||||
-- from generate_series(1,3) y group by y order by s)
|
||||
-- from generate_series(1,3) x;
|
||||
|
||||
-- [SPARK-27879] Implement bitwise integer aggregates(BIT_AND and BIT_OR)
|
||||
--
|
||||
-- test for bitwise integer aggregates
|
||||
--
|
||||
-- CREATE TEMPORARY TABLE bitwise_test(
|
||||
-- i2 INT2,
|
||||
-- i4 INT4,
|
||||
-- i8 INT8,
|
||||
-- i INTEGER,
|
||||
-- x INT2,
|
||||
-- y BIT(4)
|
||||
-- );
|
||||
|
||||
-- empty case
|
||||
-- SELECT
|
||||
-- BIT_AND(i2) AS "?",
|
||||
-- BIT_OR(i4) AS "?"
|
||||
-- FROM bitwise_test;
|
||||
|
||||
-- COPY bitwise_test FROM STDIN NULL 'null';
|
||||
-- 1 1 1 1 1 B0101
|
||||
-- 3 3 3 null 2 B0100
|
||||
-- 7 7 7 3 4 B1100
|
||||
-- \.
|
||||
|
||||
-- SELECT
|
||||
-- BIT_AND(i2) AS "1",
|
||||
-- BIT_AND(i4) AS "1",
|
||||
-- BIT_AND(i8) AS "1",
|
||||
-- BIT_AND(i) AS "?",
|
||||
-- BIT_AND(x) AS "0",
|
||||
-- BIT_AND(y) AS "0100",
|
||||
--
|
||||
-- BIT_OR(i2) AS "7",
|
||||
-- BIT_OR(i4) AS "7",
|
||||
-- BIT_OR(i8) AS "7",
|
||||
-- BIT_OR(i) AS "?",
|
||||
-- BIT_OR(x) AS "7",
|
||||
-- BIT_OR(y) AS "1101"
|
||||
-- FROM bitwise_test;
|
||||
|
||||
--
|
||||
-- test boolean aggregates
|
||||
--
|
||||
-- first test all possible transition and final states
|
||||
|
||||
-- The result is inconsistent with PostgreSQL because our AND does not have strict mode
|
||||
SELECT
|
||||
-- boolean and transitions
|
||||
-- null because strict
|
||||
(NULL AND NULL) IS NULL AS `t`,
|
||||
(TRUE AND NULL) IS NULL AS `t`,
|
||||
(FALSE AND NULL) IS NULL AS `t`,
|
||||
(NULL AND TRUE) IS NULL AS `t`,
|
||||
(NULL AND FALSE) IS NULL AS `t`,
|
||||
-- and actual computations
|
||||
(TRUE AND TRUE) AS `t`,
|
||||
NOT (TRUE AND FALSE) AS `t`,
|
||||
NOT (FALSE AND TRUE) AS `t`,
|
||||
NOT (FALSE AND FALSE) AS `t`;
|
||||
|
||||
-- The result is inconsistent with PostgreSQL because our OR does not have strict mode
|
||||
SELECT
|
||||
-- boolean or transitions
|
||||
-- null because strict
|
||||
(NULL OR NULL) IS NULL AS `t`,
|
||||
(TRUE OR NULL) IS NULL AS `t`,
|
||||
(FALSE OR NULL) IS NULL AS `t`,
|
||||
(NULL OR TRUE) IS NULL AS `t`,
|
||||
(NULL OR FALSE) IS NULL AS `t`,
|
||||
-- actual computations
|
||||
(TRUE OR TRUE) AS `t`,
|
||||
(TRUE OR FALSE) AS `t`,
|
||||
(FALSE OR TRUE) AS `t`,
|
||||
NOT (FALSE OR FALSE) AS `t`;
|
||||
|
||||
-- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY)
|
||||
-- CREATE TEMPORARY TABLE bool_test(
|
||||
-- b1 BOOL,
|
||||
-- b2 BOOL,
|
||||
-- b3 BOOL,
|
||||
-- b4 BOOL);
|
||||
|
||||
-- empty case
|
||||
-- SELECT
|
||||
-- BOOL_AND(b1) AS "n",
|
||||
-- BOOL_OR(b3) AS "n"
|
||||
-- FROM bool_test;
|
||||
|
||||
-- COPY bool_test FROM STDIN NULL 'null';
|
||||
-- TRUE null FALSE null
|
||||
-- FALSE TRUE null null
|
||||
-- null TRUE FALSE null
|
||||
-- \.
|
||||
|
||||
-- SELECT
|
||||
-- BOOL_AND(b1) AS "f",
|
||||
-- BOOL_AND(b2) AS "t",
|
||||
-- BOOL_AND(b3) AS "f",
|
||||
-- BOOL_AND(b4) AS "n",
|
||||
-- BOOL_AND(NOT b2) AS "f",
|
||||
-- BOOL_AND(NOT b3) AS "t"
|
||||
-- FROM bool_test;
|
||||
|
||||
-- SELECT
|
||||
-- EVERY(b1) AS "f",
|
||||
-- EVERY(b2) AS "t",
|
||||
-- EVERY(b3) AS "f",
|
||||
-- EVERY(b4) AS "n",
|
||||
-- EVERY(NOT b2) AS "f",
|
||||
-- EVERY(NOT b3) AS "t"
|
||||
-- FROM bool_test;
|
||||
|
||||
-- SELECT
|
||||
-- BOOL_OR(b1) AS "t",
|
||||
-- BOOL_OR(b2) AS "t",
|
||||
-- BOOL_OR(b3) AS "f",
|
||||
-- BOOL_OR(b4) AS "n",
|
||||
-- BOOL_OR(NOT b2) AS "f",
|
||||
-- BOOL_OR(NOT b3) AS "t"
|
||||
-- FROM bool_test;
|
||||
|
||||
--
|
||||
-- Test cases that should be optimized into indexscans instead of
|
||||
-- the generic aggregate implementation.
|
||||
--
|
||||
|
||||
-- Basic cases
|
||||
-- explain
|
||||
-- select min(unique1) from tenk1;
|
||||
select min(unique1) from tenk1;
|
||||
-- explain
|
||||
-- select max(unique1) from tenk1;
|
||||
select max(unique1) from tenk1;
|
||||
-- explain
|
||||
-- select max(unique1) from tenk1 where unique1 < 42;
|
||||
select max(unique1) from tenk1 where unique1 < 42;
|
||||
-- explain
|
||||
-- select max(unique1) from tenk1 where unique1 > 42;
|
||||
select max(unique1) from tenk1 where unique1 > 42;
|
||||
|
||||
-- the planner may choose a generic aggregate here if parallel query is
|
||||
-- enabled, since that plan will be parallel safe and the "optimized"
|
||||
-- plan, which has almost identical cost, will not be. we want to test
|
||||
-- the optimized plan, so temporarily disable parallel query.
|
||||
-- begin;
|
||||
-- set local max_parallel_workers_per_gather = 0;
|
||||
-- explain
|
||||
-- select max(unique1) from tenk1 where unique1 > 42000;
|
||||
select max(unique1) from tenk1 where unique1 > 42000;
|
||||
-- rollback;
|
||||
|
||||
-- multi-column index (uses tenk1_thous_tenthous)
|
||||
-- explain
|
||||
-- select max(tenthous) from tenk1 where thousand = 33;
|
||||
select max(tenthous) from tenk1 where thousand = 33;
|
||||
-- explain
|
||||
-- select min(tenthous) from tenk1 where thousand = 33;
|
||||
select min(tenthous) from tenk1 where thousand = 33;
|
||||
|
||||
-- [SPARK-17348] Correlated column is not allowed in a non-equality predicate
|
||||
-- check parameter propagation into an indexscan subquery
|
||||
-- explain
|
||||
-- select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
|
||||
-- from int4_tbl;
|
||||
-- select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
|
||||
-- from int4_tbl;
|
||||
|
||||
-- check some cases that were handled incorrectly in 8.3.0
|
||||
-- explain
|
||||
-- select distinct max(unique2) from tenk1;
|
||||
select distinct max(unique2) from tenk1;
|
||||
-- explain
|
||||
-- select max(unique2) from tenk1 order by 1;
|
||||
select max(unique2) from tenk1 order by 1;
|
||||
-- explain
|
||||
-- select max(unique2) from tenk1 order by max(unique2);
|
||||
select max(unique2) from tenk1 order by max(unique2);
|
||||
-- explain
|
||||
-- select max(unique2) from tenk1 order by max(unique2)+1;
|
||||
select max(unique2) from tenk1 order by max(unique2)+1;
|
||||
-- explain
|
||||
-- select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
|
||||
select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc;
|
||||
|
||||
-- interesting corner case: constant gets optimized into a seqscan
|
||||
-- explain
|
||||
-- select max(100) from tenk1;
|
||||
select max(100) from tenk1;
|
|
@ -0,0 +1,162 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 16
|
||||
|
||||
|
||||
-- !query 0
|
||||
create temporary view int4_tbl as select * from values
|
||||
(0),
|
||||
(123456),
|
||||
(-123456),
|
||||
(2147483647),
|
||||
(-2147483647)
|
||||
as int4_tbl(f1)
|
||||
-- !query 0 schema
|
||||
struct<>
|
||||
-- !query 0 output
|
||||
|
||||
|
||||
|
||||
-- !query 1
|
||||
SELECT
|
||||
-- boolean and transitions
|
||||
-- null because strict
|
||||
(NULL AND NULL) IS NULL AS `t`,
|
||||
(TRUE AND NULL) IS NULL AS `t`,
|
||||
(FALSE AND NULL) IS NULL AS `t`,
|
||||
(NULL AND TRUE) IS NULL AS `t`,
|
||||
(NULL AND FALSE) IS NULL AS `t`,
|
||||
-- and actual computations
|
||||
(TRUE AND TRUE) AS `t`,
|
||||
NOT (TRUE AND FALSE) AS `t`,
|
||||
NOT (FALSE AND TRUE) AS `t`,
|
||||
NOT (FALSE AND FALSE) AS `t`
|
||||
-- !query 1 schema
|
||||
struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
|
||||
-- !query 1 output
|
||||
true true false true false true true true true
|
||||
|
||||
|
||||
-- !query 2
|
||||
SELECT
|
||||
-- boolean or transitions
|
||||
-- null because strict
|
||||
(NULL OR NULL) IS NULL AS `t`,
|
||||
(TRUE OR NULL) IS NULL AS `t`,
|
||||
(FALSE OR NULL) IS NULL AS `t`,
|
||||
(NULL OR TRUE) IS NULL AS `t`,
|
||||
(NULL OR FALSE) IS NULL AS `t`,
|
||||
-- actual computations
|
||||
(TRUE OR TRUE) AS `t`,
|
||||
(TRUE OR FALSE) AS `t`,
|
||||
(FALSE OR TRUE) AS `t`,
|
||||
NOT (FALSE OR FALSE) AS `t`
|
||||
-- !query 2 schema
|
||||
struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
|
||||
-- !query 2 output
|
||||
true false true false true true true true true
|
||||
|
||||
|
||||
-- !query 3
|
||||
select min(unique1) from tenk1
|
||||
-- !query 3 schema
|
||||
struct<min(unique1):int>
|
||||
-- !query 3 output
|
||||
0
|
||||
|
||||
|
||||
-- !query 4
|
||||
select max(unique1) from tenk1
|
||||
-- !query 4 schema
|
||||
struct<max(unique1):int>
|
||||
-- !query 4 output
|
||||
9999
|
||||
|
||||
|
||||
-- !query 5
|
||||
select max(unique1) from tenk1 where unique1 < 42
|
||||
-- !query 5 schema
|
||||
struct<max(unique1):int>
|
||||
-- !query 5 output
|
||||
41
|
||||
|
||||
|
||||
-- !query 6
|
||||
select max(unique1) from tenk1 where unique1 > 42
|
||||
-- !query 6 schema
|
||||
struct<max(unique1):int>
|
||||
-- !query 6 output
|
||||
9999
|
||||
|
||||
|
||||
-- !query 7
|
||||
select max(unique1) from tenk1 where unique1 > 42000
|
||||
-- !query 7 schema
|
||||
struct<max(unique1):int>
|
||||
-- !query 7 output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 8
|
||||
select max(tenthous) from tenk1 where thousand = 33
|
||||
-- !query 8 schema
|
||||
struct<max(tenthous):int>
|
||||
-- !query 8 output
|
||||
9033
|
||||
|
||||
|
||||
-- !query 9
|
||||
select min(tenthous) from tenk1 where thousand = 33
|
||||
-- !query 9 schema
|
||||
struct<min(tenthous):int>
|
||||
-- !query 9 output
|
||||
33
|
||||
|
||||
|
||||
-- !query 10
|
||||
select distinct max(unique2) from tenk1
|
||||
-- !query 10 schema
|
||||
struct<max(unique2):int>
|
||||
-- !query 10 output
|
||||
9999
|
||||
|
||||
|
||||
-- !query 11
|
||||
select max(unique2) from tenk1 order by 1
|
||||
-- !query 11 schema
|
||||
struct<max(unique2):int>
|
||||
-- !query 11 output
|
||||
9999
|
||||
|
||||
|
||||
-- !query 12
|
||||
select max(unique2) from tenk1 order by max(unique2)
|
||||
-- !query 12 schema
|
||||
struct<max(unique2):int>
|
||||
-- !query 12 output
|
||||
9999
|
||||
|
||||
|
||||
-- !query 13
|
||||
select max(unique2) from tenk1 order by max(unique2)+1
|
||||
-- !query 13 schema
|
||||
struct<max(unique2):int>
|
||||
-- !query 13 output
|
||||
9999
|
||||
|
||||
|
||||
-- !query 14
|
||||
select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc
|
||||
-- !query 14 schema
|
||||
struct<max_unique2:int,g:int>
|
||||
-- !query 14 output
|
||||
9999 3
|
||||
9999 2
|
||||
9999 1
|
||||
|
||||
|
||||
-- !query 15
|
||||
select max(100) from tenk1
|
||||
-- !query 15 schema
|
||||
struct<max(100):int>
|
||||
-- !query 15 output
|
||||
100
|
Loading…
Reference in a new issue