[SPARK-28384][SQL][TEST] Port select_distinct.sql

## What changes were proposed in this pull request? This PR is to port select.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_distinct.sql The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/select_distinct.out When porting the test cases, found one PostgreSQL specific features that do not exist in Spark SQL: [SPARK-28010](https://issues.apache.org/jira/browse/SPARK-28010): Support ORDER BY ... USING syntax ## How was this patch tested? N/A Closes #25150 from wangyum/SPARK-28384. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
2019-07-14 21:55:11 -07:00 · 2019-07-14 21:55:11 -07:00 · 72cc853092
parent e238ebe9b0
commit 72cc853092
2 changed files with 311 additions and 0 deletions
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_distinct.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_distinct.sql
@ -0,0 +1,86 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- SELECT_DISTINCT
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_distinct.sql
+--
+
+CREATE OR REPLACE TEMPORARY VIEW tmp AS
+SELECT two, stringu1, ten, string4
+FROM onek;
+
+--
+-- awk '{print $3;}' onek.data | sort -n | uniq
+--
+SELECT DISTINCT two FROM tmp ORDER BY 1;
+
+--
+-- awk '{print $5;}' onek.data | sort -n | uniq
+--
+SELECT DISTINCT ten FROM tmp ORDER BY 1;
+
+--
+-- awk '{print $16;}' onek.data | sort -d | uniq
+--
+SELECT DISTINCT string4 FROM tmp ORDER BY 1;
+
+-- [SPARK-28010] Support ORDER BY ... USING syntax
+--
+-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
+-- sort +0n -1 +1d -2 +2n -3
+--
+-- SELECT DISTINCT two, string4, ten
+--    FROM tmp
+--    ORDER BY two using <, string4 using <, ten using <;
+SELECT DISTINCT two, string4, ten
+   FROM tmp
+   ORDER BY two ASC, string4 ASC, ten ASC;
+
+-- Skip the person table because there is a point data type that we don't support.
+--
+-- awk '{print $2;}' person.data |
+-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
+-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
+-- awk 'BEGIN{FS="      ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
+-- sort -n -r | uniq
+--
+-- SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
+
+--
+-- Check mentioning same column more than once
+--
+
+-- EXPLAIN (VERBOSE, COSTS OFF)
+-- SELECT count(*) FROM
+--   (SELECT DISTINCT two, four, two FROM tenk1) ss;
+
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+
+--
+-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
+-- very own regression file.
+--
+
+CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
+  (VALUES (1), (2), (3), (NULL))
+  AS v(f1);
+
+-- basic cases
+SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable;
+SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable;
+SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable;
+SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable;
+
+-- check that optimizer constant-folds it properly
+SELECT 1 IS DISTINCT FROM 2 as `yes`;
+SELECT 2 IS DISTINCT FROM 2 as `no`;
+SELECT 2 IS DISTINCT FROM null as `yes`;
+SELECT null IS DISTINCT FROM null as `no`;
+
+-- negated form
+SELECT 1 IS NOT DISTINCT FROM 2 as `no`;
+SELECT 2 IS NOT DISTINCT FROM 2 as `yes`;
+SELECT 2 IS NOT DISTINCT FROM null as `no`;
+SELECT null IS NOT DISTINCT FROM null as `yes`;
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out
@ -0,0 +1,225 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 19
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW tmp AS
+SELECT two, stringu1, ten, string4
+FROM onek
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT DISTINCT two FROM tmp ORDER BY 1
+-- !query 1 schema
+struct<two:int>
+-- !query 1 output
+0
+1
+
+
+-- !query 2
+SELECT DISTINCT ten FROM tmp ORDER BY 1
+-- !query 2 schema
+struct<ten:int>
+-- !query 2 output
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 3
+SELECT DISTINCT string4 FROM tmp ORDER BY 1
+-- !query 3 schema
+struct<string4:string>
+-- !query 3 output
+AAAAxx
+HHHHxx
+OOOOxx
+VVVVxx
+
+
+-- !query 4
+SELECT DISTINCT two, string4, ten
+   FROM tmp
+   ORDER BY two ASC, string4 ASC, ten ASC
+-- !query 4 schema
+struct<two:int,string4:string,ten:int>
+-- !query 4 output
+0	AAAAxx	0
+0	AAAAxx	2
+0	AAAAxx	4
+0	AAAAxx	6
+0	AAAAxx	8
+0	HHHHxx	0
+0	HHHHxx	2
+0	HHHHxx	4
+0	HHHHxx	6
+0	HHHHxx	8
+0	OOOOxx	0
+0	OOOOxx	2
+0	OOOOxx	4
+0	OOOOxx	6
+0	OOOOxx	8
+0	VVVVxx	0
+0	VVVVxx	2
+0	VVVVxx	4
+0	VVVVxx	6
+0	VVVVxx	8
+1	AAAAxx	1
+1	AAAAxx	3
+1	AAAAxx	5
+1	AAAAxx	7
+1	AAAAxx	9
+1	HHHHxx	1
+1	HHHHxx	3
+1	HHHHxx	5
+1	HHHHxx	7
+1	HHHHxx	9
+1	OOOOxx	1
+1	OOOOxx	3
+1	OOOOxx	5
+1	OOOOxx	7
+1	OOOOxx	9
+1	VVVVxx	1
+1	VVVVxx	3
+1	VVVVxx	5
+1	VVVVxx	7
+1	VVVVxx	9
+
+
+-- !query 5
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss
+-- !query 5 schema
+struct<count(1):bigint>
+-- !query 5 output
+4
+
+
+-- !query 6
+CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
+  (VALUES (1), (2), (3), (NULL))
+  AS v(f1)
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable
+-- !query 7 schema
+struct<f1:int,not 2:boolean>
+-- !query 7 output
+1	true
+2	false
+3	true
+NULL	true
+
+
+-- !query 8
+SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable
+-- !query 8 schema
+struct<f1:int,not null:boolean>
+-- !query 8 output
+1	true
+2	true
+3	true
+NULL	false
+
+
+-- !query 9
+SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable
+-- !query 9 schema
+struct<f1:int,false:boolean>
+-- !query 9 output
+1	false
+2	false
+3	false
+NULL	false
+
+
+-- !query 10
+SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable
+-- !query 10 schema
+struct<f1:int,not null:boolean>
+-- !query 10 output
+1	true
+2	true
+3	true
+NULL	false
+
+
+-- !query 11
+SELECT 1 IS DISTINCT FROM 2 as `yes`
+-- !query 11 schema
+struct<yes:boolean>
+-- !query 11 output
+true
+
+
+-- !query 12
+SELECT 2 IS DISTINCT FROM 2 as `no`
+-- !query 12 schema
+struct<no:boolean>
+-- !query 12 output
+false
+
+
+-- !query 13
+SELECT 2 IS DISTINCT FROM null as `yes`
+-- !query 13 schema
+struct<yes:boolean>
+-- !query 13 output
+true
+
+
+-- !query 14
+SELECT null IS DISTINCT FROM null as `no`
+-- !query 14 schema
+struct<no:boolean>
+-- !query 14 output
+false
+
+
+-- !query 15
+SELECT 1 IS NOT DISTINCT FROM 2 as `no`
+-- !query 15 schema
+struct<no:boolean>
+-- !query 15 output
+false
+
+
+-- !query 16
+SELECT 2 IS NOT DISTINCT FROM 2 as `yes`
+-- !query 16 schema
+struct<yes:boolean>
+-- !query 16 output
+true
+
+
+-- !query 17
+SELECT 2 IS NOT DISTINCT FROM null as `no`
+-- !query 17 schema
+struct<no:boolean>
+-- !query 17 output
+false
+
+
+-- !query 18
+SELECT null IS NOT DISTINCT FROM null as `yes`
+-- !query 18 schema
+struct<yes:boolean>
+-- !query 18 output
+true