[SPARK-28384][SQL][TEST] Port select_distinct.sql
## What changes were proposed in this pull request? This PR is to port select.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_distinct.sql The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/select_distinct.out When porting the test cases, found one PostgreSQL specific features that do not exist in Spark SQL: [SPARK-28010](https://issues.apache.org/jira/browse/SPARK-28010): Support ORDER BY ... USING syntax ## How was this patch tested? N/A Closes #25150 from wangyum/SPARK-28384. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
e238ebe9b0
commit
72cc853092
|
@ -0,0 +1,86 @@
|
|||
--
|
||||
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
||||
--
|
||||
--
|
||||
-- SELECT_DISTINCT
|
||||
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_distinct.sql
|
||||
--
|
||||
|
||||
CREATE OR REPLACE TEMPORARY VIEW tmp AS
|
||||
SELECT two, stringu1, ten, string4
|
||||
FROM onek;
|
||||
|
||||
--
|
||||
-- awk '{print $3;}' onek.data | sort -n | uniq
|
||||
--
|
||||
SELECT DISTINCT two FROM tmp ORDER BY 1;
|
||||
|
||||
--
|
||||
-- awk '{print $5;}' onek.data | sort -n | uniq
|
||||
--
|
||||
SELECT DISTINCT ten FROM tmp ORDER BY 1;
|
||||
|
||||
--
|
||||
-- awk '{print $16;}' onek.data | sort -d | uniq
|
||||
--
|
||||
SELECT DISTINCT string4 FROM tmp ORDER BY 1;
|
||||
|
||||
-- [SPARK-28010] Support ORDER BY ... USING syntax
|
||||
--
|
||||
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
|
||||
-- sort +0n -1 +1d -2 +2n -3
|
||||
--
|
||||
-- SELECT DISTINCT two, string4, ten
|
||||
-- FROM tmp
|
||||
-- ORDER BY two using <, string4 using <, ten using <;
|
||||
SELECT DISTINCT two, string4, ten
|
||||
FROM tmp
|
||||
ORDER BY two ASC, string4 ASC, ten ASC;
|
||||
|
||||
-- Skip the person table because there is a point data type that we don't support.
|
||||
--
|
||||
-- awk '{print $2;}' person.data |
|
||||
-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
|
||||
-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
|
||||
-- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
|
||||
-- sort -n -r | uniq
|
||||
--
|
||||
-- SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
|
||||
|
||||
--
|
||||
-- Check mentioning same column more than once
|
||||
--
|
||||
|
||||
-- EXPLAIN (VERBOSE, COSTS OFF)
|
||||
-- SELECT count(*) FROM
|
||||
-- (SELECT DISTINCT two, four, two FROM tenk1) ss;
|
||||
|
||||
SELECT count(*) FROM
|
||||
(SELECT DISTINCT two, four, two FROM tenk1) ss;
|
||||
|
||||
--
|
||||
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
|
||||
-- very own regression file.
|
||||
--
|
||||
|
||||
CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
|
||||
(VALUES (1), (2), (3), (NULL))
|
||||
AS v(f1);
|
||||
|
||||
-- basic cases
|
||||
SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable;
|
||||
SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable;
|
||||
SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable;
|
||||
SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable;
|
||||
|
||||
-- check that optimizer constant-folds it properly
|
||||
SELECT 1 IS DISTINCT FROM 2 as `yes`;
|
||||
SELECT 2 IS DISTINCT FROM 2 as `no`;
|
||||
SELECT 2 IS DISTINCT FROM null as `yes`;
|
||||
SELECT null IS DISTINCT FROM null as `no`;
|
||||
|
||||
-- negated form
|
||||
SELECT 1 IS NOT DISTINCT FROM 2 as `no`;
|
||||
SELECT 2 IS NOT DISTINCT FROM 2 as `yes`;
|
||||
SELECT 2 IS NOT DISTINCT FROM null as `no`;
|
||||
SELECT null IS NOT DISTINCT FROM null as `yes`;
|
|
@ -0,0 +1,225 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 19
|
||||
|
||||
|
||||
-- !query 0
|
||||
CREATE OR REPLACE TEMPORARY VIEW tmp AS
|
||||
SELECT two, stringu1, ten, string4
|
||||
FROM onek
|
||||
-- !query 0 schema
|
||||
struct<>
|
||||
-- !query 0 output
|
||||
|
||||
|
||||
|
||||
-- !query 1
|
||||
SELECT DISTINCT two FROM tmp ORDER BY 1
|
||||
-- !query 1 schema
|
||||
struct<two:int>
|
||||
-- !query 1 output
|
||||
0
|
||||
1
|
||||
|
||||
|
||||
-- !query 2
|
||||
SELECT DISTINCT ten FROM tmp ORDER BY 1
|
||||
-- !query 2 schema
|
||||
struct<ten:int>
|
||||
-- !query 2 output
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
|
||||
|
||||
-- !query 3
|
||||
SELECT DISTINCT string4 FROM tmp ORDER BY 1
|
||||
-- !query 3 schema
|
||||
struct<string4:string>
|
||||
-- !query 3 output
|
||||
AAAAxx
|
||||
HHHHxx
|
||||
OOOOxx
|
||||
VVVVxx
|
||||
|
||||
|
||||
-- !query 4
|
||||
SELECT DISTINCT two, string4, ten
|
||||
FROM tmp
|
||||
ORDER BY two ASC, string4 ASC, ten ASC
|
||||
-- !query 4 schema
|
||||
struct<two:int,string4:string,ten:int>
|
||||
-- !query 4 output
|
||||
0 AAAAxx 0
|
||||
0 AAAAxx 2
|
||||
0 AAAAxx 4
|
||||
0 AAAAxx 6
|
||||
0 AAAAxx 8
|
||||
0 HHHHxx 0
|
||||
0 HHHHxx 2
|
||||
0 HHHHxx 4
|
||||
0 HHHHxx 6
|
||||
0 HHHHxx 8
|
||||
0 OOOOxx 0
|
||||
0 OOOOxx 2
|
||||
0 OOOOxx 4
|
||||
0 OOOOxx 6
|
||||
0 OOOOxx 8
|
||||
0 VVVVxx 0
|
||||
0 VVVVxx 2
|
||||
0 VVVVxx 4
|
||||
0 VVVVxx 6
|
||||
0 VVVVxx 8
|
||||
1 AAAAxx 1
|
||||
1 AAAAxx 3
|
||||
1 AAAAxx 5
|
||||
1 AAAAxx 7
|
||||
1 AAAAxx 9
|
||||
1 HHHHxx 1
|
||||
1 HHHHxx 3
|
||||
1 HHHHxx 5
|
||||
1 HHHHxx 7
|
||||
1 HHHHxx 9
|
||||
1 OOOOxx 1
|
||||
1 OOOOxx 3
|
||||
1 OOOOxx 5
|
||||
1 OOOOxx 7
|
||||
1 OOOOxx 9
|
||||
1 VVVVxx 1
|
||||
1 VVVVxx 3
|
||||
1 VVVVxx 5
|
||||
1 VVVVxx 7
|
||||
1 VVVVxx 9
|
||||
|
||||
|
||||
-- !query 5
|
||||
SELECT count(*) FROM
|
||||
(SELECT DISTINCT two, four, two FROM tenk1) ss
|
||||
-- !query 5 schema
|
||||
struct<count(1):bigint>
|
||||
-- !query 5 output
|
||||
4
|
||||
|
||||
|
||||
-- !query 6
|
||||
CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
|
||||
(VALUES (1), (2), (3), (NULL))
|
||||
AS v(f1)
|
||||
-- !query 6 schema
|
||||
struct<>
|
||||
-- !query 6 output
|
||||
|
||||
|
||||
|
||||
-- !query 7
|
||||
SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable
|
||||
-- !query 7 schema
|
||||
struct<f1:int,not 2:boolean>
|
||||
-- !query 7 output
|
||||
1 true
|
||||
2 false
|
||||
3 true
|
||||
NULL true
|
||||
|
||||
|
||||
-- !query 8
|
||||
SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable
|
||||
-- !query 8 schema
|
||||
struct<f1:int,not null:boolean>
|
||||
-- !query 8 output
|
||||
1 true
|
||||
2 true
|
||||
3 true
|
||||
NULL false
|
||||
|
||||
|
||||
-- !query 9
|
||||
SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable
|
||||
-- !query 9 schema
|
||||
struct<f1:int,false:boolean>
|
||||
-- !query 9 output
|
||||
1 false
|
||||
2 false
|
||||
3 false
|
||||
NULL false
|
||||
|
||||
|
||||
-- !query 10
|
||||
SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable
|
||||
-- !query 10 schema
|
||||
struct<f1:int,not null:boolean>
|
||||
-- !query 10 output
|
||||
1 true
|
||||
2 true
|
||||
3 true
|
||||
NULL false
|
||||
|
||||
|
||||
-- !query 11
|
||||
SELECT 1 IS DISTINCT FROM 2 as `yes`
|
||||
-- !query 11 schema
|
||||
struct<yes:boolean>
|
||||
-- !query 11 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 12
|
||||
SELECT 2 IS DISTINCT FROM 2 as `no`
|
||||
-- !query 12 schema
|
||||
struct<no:boolean>
|
||||
-- !query 12 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 13
|
||||
SELECT 2 IS DISTINCT FROM null as `yes`
|
||||
-- !query 13 schema
|
||||
struct<yes:boolean>
|
||||
-- !query 13 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 14
|
||||
SELECT null IS DISTINCT FROM null as `no`
|
||||
-- !query 14 schema
|
||||
struct<no:boolean>
|
||||
-- !query 14 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 15
|
||||
SELECT 1 IS NOT DISTINCT FROM 2 as `no`
|
||||
-- !query 15 schema
|
||||
struct<no:boolean>
|
||||
-- !query 15 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 16
|
||||
SELECT 2 IS NOT DISTINCT FROM 2 as `yes`
|
||||
-- !query 16 schema
|
||||
struct<yes:boolean>
|
||||
-- !query 16 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 17
|
||||
SELECT 2 IS NOT DISTINCT FROM null as `no`
|
||||
-- !query 17 schema
|
||||
struct<no:boolean>
|
||||
-- !query 17 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 18
|
||||
SELECT null IS NOT DISTINCT FROM null as `yes`
|
||||
-- !query 18 schema
|
||||
struct<yes:boolean>
|
||||
-- !query 18 output
|
||||
true
|
Loading…
Reference in a new issue