[SPARK-28384][SQL][TEST] Port select_distinct.sql

## What changes were proposed in this pull request?

This PR is to port select.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_distinct.sql

The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/select_distinct.out

When porting the test cases, found one PostgreSQL specific features that do not exist in Spark SQL:
[SPARK-28010](https://issues.apache.org/jira/browse/SPARK-28010): Support ORDER BY ... USING syntax

## How was this patch tested?

N/A

Closes #25150 from wangyum/SPARK-28384.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Yuming Wang 2019-07-14 21:55:11 -07:00 committed by Dongjoon Hyun
parent e238ebe9b0
commit 72cc853092
2 changed files with 311 additions and 0 deletions

View file

@ -0,0 +1,86 @@
--
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
--
--
-- SELECT_DISTINCT
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_distinct.sql
--
CREATE OR REPLACE TEMPORARY VIEW tmp AS
SELECT two, stringu1, ten, string4
FROM onek;
--
-- awk '{print $3;}' onek.data | sort -n | uniq
--
SELECT DISTINCT two FROM tmp ORDER BY 1;
--
-- awk '{print $5;}' onek.data | sort -n | uniq
--
SELECT DISTINCT ten FROM tmp ORDER BY 1;
--
-- awk '{print $16;}' onek.data | sort -d | uniq
--
SELECT DISTINCT string4 FROM tmp ORDER BY 1;
-- [SPARK-28010] Support ORDER BY ... USING syntax
--
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
-- sort +0n -1 +1d -2 +2n -3
--
-- SELECT DISTINCT two, string4, ten
-- FROM tmp
-- ORDER BY two using <, string4 using <, ten using <;
SELECT DISTINCT two, string4, ten
FROM tmp
ORDER BY two ASC, string4 ASC, ten ASC;
-- Skip the person table because there is a point data type that we don't support.
--
-- awk '{print $2;}' person.data |
-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
-- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
-- sort -n -r | uniq
--
-- SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
--
-- Check mentioning same column more than once
--
-- EXPLAIN (VERBOSE, COSTS OFF)
-- SELECT count(*) FROM
-- (SELECT DISTINCT two, four, two FROM tenk1) ss;
SELECT count(*) FROM
(SELECT DISTINCT two, four, two FROM tenk1) ss;
--
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file.
--
CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
(VALUES (1), (2), (3), (NULL))
AS v(f1);
-- basic cases
SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable;
SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable;
SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable;
SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable;
-- check that optimizer constant-folds it properly
SELECT 1 IS DISTINCT FROM 2 as `yes`;
SELECT 2 IS DISTINCT FROM 2 as `no`;
SELECT 2 IS DISTINCT FROM null as `yes`;
SELECT null IS DISTINCT FROM null as `no`;
-- negated form
SELECT 1 IS NOT DISTINCT FROM 2 as `no`;
SELECT 2 IS NOT DISTINCT FROM 2 as `yes`;
SELECT 2 IS NOT DISTINCT FROM null as `no`;
SELECT null IS NOT DISTINCT FROM null as `yes`;

View file

@ -0,0 +1,225 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 19
-- !query 0
CREATE OR REPLACE TEMPORARY VIEW tmp AS
SELECT two, stringu1, ten, string4
FROM onek
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
SELECT DISTINCT two FROM tmp ORDER BY 1
-- !query 1 schema
struct<two:int>
-- !query 1 output
0
1
-- !query 2
SELECT DISTINCT ten FROM tmp ORDER BY 1
-- !query 2 schema
struct<ten:int>
-- !query 2 output
0
1
2
3
4
5
6
7
8
9
-- !query 3
SELECT DISTINCT string4 FROM tmp ORDER BY 1
-- !query 3 schema
struct<string4:string>
-- !query 3 output
AAAAxx
HHHHxx
OOOOxx
VVVVxx
-- !query 4
SELECT DISTINCT two, string4, ten
FROM tmp
ORDER BY two ASC, string4 ASC, ten ASC
-- !query 4 schema
struct<two:int,string4:string,ten:int>
-- !query 4 output
0 AAAAxx 0
0 AAAAxx 2
0 AAAAxx 4
0 AAAAxx 6
0 AAAAxx 8
0 HHHHxx 0
0 HHHHxx 2
0 HHHHxx 4
0 HHHHxx 6
0 HHHHxx 8
0 OOOOxx 0
0 OOOOxx 2
0 OOOOxx 4
0 OOOOxx 6
0 OOOOxx 8
0 VVVVxx 0
0 VVVVxx 2
0 VVVVxx 4
0 VVVVxx 6
0 VVVVxx 8
1 AAAAxx 1
1 AAAAxx 3
1 AAAAxx 5
1 AAAAxx 7
1 AAAAxx 9
1 HHHHxx 1
1 HHHHxx 3
1 HHHHxx 5
1 HHHHxx 7
1 HHHHxx 9
1 OOOOxx 1
1 OOOOxx 3
1 OOOOxx 5
1 OOOOxx 7
1 OOOOxx 9
1 VVVVxx 1
1 VVVVxx 3
1 VVVVxx 5
1 VVVVxx 7
1 VVVVxx 9
-- !query 5
SELECT count(*) FROM
(SELECT DISTINCT two, four, two FROM tenk1) ss
-- !query 5 schema
struct<count(1):bigint>
-- !query 5 output
4
-- !query 6
CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
(VALUES (1), (2), (3), (NULL))
AS v(f1)
-- !query 6 schema
struct<>
-- !query 6 output
-- !query 7
SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable
-- !query 7 schema
struct<f1:int,not 2:boolean>
-- !query 7 output
1 true
2 false
3 true
NULL true
-- !query 8
SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable
-- !query 8 schema
struct<f1:int,not null:boolean>
-- !query 8 output
1 true
2 true
3 true
NULL false
-- !query 9
SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable
-- !query 9 schema
struct<f1:int,false:boolean>
-- !query 9 output
1 false
2 false
3 false
NULL false
-- !query 10
SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable
-- !query 10 schema
struct<f1:int,not null:boolean>
-- !query 10 output
1 true
2 true
3 true
NULL false
-- !query 11
SELECT 1 IS DISTINCT FROM 2 as `yes`
-- !query 11 schema
struct<yes:boolean>
-- !query 11 output
true
-- !query 12
SELECT 2 IS DISTINCT FROM 2 as `no`
-- !query 12 schema
struct<no:boolean>
-- !query 12 output
false
-- !query 13
SELECT 2 IS DISTINCT FROM null as `yes`
-- !query 13 schema
struct<yes:boolean>
-- !query 13 output
true
-- !query 14
SELECT null IS DISTINCT FROM null as `no`
-- !query 14 schema
struct<no:boolean>
-- !query 14 output
false
-- !query 15
SELECT 1 IS NOT DISTINCT FROM 2 as `no`
-- !query 15 schema
struct<no:boolean>
-- !query 15 output
false
-- !query 16
SELECT 2 IS NOT DISTINCT FROM 2 as `yes`
-- !query 16 schema
struct<yes:boolean>
-- !query 16 output
true
-- !query 17
SELECT 2 IS NOT DISTINCT FROM null as `no`
-- !query 17 schema
struct<no:boolean>
-- !query 17 output
false
-- !query 18
SELECT null IS NOT DISTINCT FROM null as `yes`
-- !query 18 schema
struct<yes:boolean>
-- !query 18 output
true