[SPARK-28388][SQL][TEST] Port select_implicit.sql

## What changes were proposed in this pull request?

This PR is to port numeric.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql

The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/select_implicit.out

When porting the test cases, found one PostgreSQL specific features that do not exist in Spark SQL:
[SPARK-28329](https://issues.apache.org/jira/browse/SPARK-28329): SELECT INTO syntax

## How was this patch tested?

N/A

Closes #25152 from wangyum/SPARK-28388.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Yuming Wang 2019-07-18 08:58:27 -07:00 committed by Dongjoon Hyun
parent 8acc22ca64
commit 2cf0491a97
2 changed files with 576 additions and 0 deletions

View file

@ -0,0 +1,160 @@
--
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
--
--
-- SELECT_IMPLICIT
-- Test cases for queries with ordering terms missing from the target list.
-- This used to be called "junkfilter.sql".
-- The parser uses the term "resjunk" to handle these cases.
-- - thomas 1998-07-09
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql
--
-- load test data
CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet;
INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A');
INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b');
INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c');
INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D');
INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e');
INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F');
INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g');
INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
-- w/ existing GROUP BY target
SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
-- failure expected
SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b;
-- w/o existing GROUP BY target and w/o existing same ORDER BY target
SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b;
-- w/ existing GROUP BY target using a relation name in target
SELECT test_missing_target.b, count(*)
FROM test_missing_target GROUP BY b ORDER BY b;
-- w/o existing GROUP BY target
SELECT c FROM test_missing_target ORDER BY a;
-- w/o existing ORDER BY target
SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc;
-- group using reference number
SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
-- order using reference number
SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
-- group using reference number out of range
-- failure expected
SELECT c, count(*) FROM test_missing_target GROUP BY 3;
-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition
-- failure expected
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY b ORDER BY b;
-- order w/ target under ambiguous condition
-- failure NOT expected
SELECT a, a FROM test_missing_target
ORDER BY a;
-- order expression w/ target under ambiguous condition
-- failure NOT expected
SELECT a/2, a/2 FROM test_missing_target
ORDER BY a/2;
-- group expression w/ target under ambiguous condition
-- failure NOT expected
SELECT a/2, a/2 FROM test_missing_target
GROUP BY a/2 ORDER BY a/2;
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b ORDER BY x.b;
-- group w/o existing GROUP BY target under ambiguous condition
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b ORDER BY x.b;
-- [SPARK-28329] SELECT INTO syntax
-- group w/o existing GROUP BY target under ambiguous condition
-- into a table
-- SELECT count(*) INTO TABLE test_missing_target2
-- FROM test_missing_target x, test_missing_target y
-- WHERE x.a = y.a
-- GROUP BY x.b ORDER BY x.b;
-- SELECT * FROM test_missing_target2;
-- Functions and expressions
-- w/ existing GROUP BY target
SELECT a%2, count(b) FROM test_missing_target
GROUP BY test_missing_target.a%2
ORDER BY test_missing_target.a%2;
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
SELECT count(c) FROM test_missing_target
GROUP BY lower(test_missing_target.c)
ORDER BY lower(test_missing_target.c);
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
-- failure expected
SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b;
-- w/o existing GROUP BY target and w/o existing same ORDER BY target
SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2;
-- w/ existing GROUP BY target using a relation name in target
SELECT lower(test_missing_target.c), count(c)
FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c);
-- w/o existing GROUP BY target
SELECT a FROM test_missing_target ORDER BY upper(d);
-- w/o existing ORDER BY target
SELECT count(b) FROM test_missing_target
GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc;
-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition
-- failure expected
SELECT count(x.a) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY b/2 ORDER BY b/2;
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b/2 ORDER BY x.b/2;
-- group w/o existing GROUP BY target under ambiguous condition
-- failure expected due to ambiguous b in count(b)
SELECT count(b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b/2;
-- [SPARK-28329] SELECT INTO syntax
-- group w/o existing GROUP BY target under ambiguous condition
-- into a table
-- SELECT count(x.b) INTO TABLE test_missing_target3
-- FROM test_missing_target x, test_missing_target y
-- WHERE x.a = y.a
-- GROUP BY x.b/2 ORDER BY x.b/2;
-- SELECT * FROM test_missing_target3;
-- Cleanup
DROP TABLE test_missing_target;
-- DROP TABLE test_missing_target2;
-- DROP TABLE test_missing_target3;

View file

@ -0,0 +1,416 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 38
-- !query 0
CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A')
-- !query 1 schema
struct<>
-- !query 1 output
-- !query 2
INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b')
-- !query 2 schema
struct<>
-- !query 2 output
-- !query 3
INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c')
-- !query 3 schema
struct<>
-- !query 3 output
-- !query 4
INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D')
-- !query 4 schema
struct<>
-- !query 4 output
-- !query 5
INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e')
-- !query 5 schema
struct<>
-- !query 5 output
-- !query 6
INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F')
-- !query 6 schema
struct<>
-- !query 6 output
-- !query 7
INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g')
-- !query 7 schema
struct<>
-- !query 7 output
-- !query 8
INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h')
-- !query 8 schema
struct<>
-- !query 8 output
-- !query 9
INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I')
-- !query 9 schema
struct<>
-- !query 9 output
-- !query 10
INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j')
-- !query 10 schema
struct<>
-- !query 10 output
-- !query 11
SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
-- !query 11 schema
struct<c:string,count(1):bigint>
-- !query 11 output
ABAB 2
BBBB 2
CCCC 2
XXXX 1
bbbb 1
cccc 2
-- !query 12
SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
-- !query 12 schema
struct<count(1):bigint>
-- !query 12 output
2
2
2
1
1
2
-- !query 13
SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b
-- !query 13 schema
struct<>
-- !query 13 output
org.apache.spark.sql.AnalysisException
cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61
-- !query 14
SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b
-- !query 14 schema
struct<count(1):bigint>
-- !query 14 output
1
2
3
4
-- !query 15
SELECT test_missing_target.b, count(*)
FROM test_missing_target GROUP BY b ORDER BY b
-- !query 15 schema
struct<b:int,count(1):bigint>
-- !query 15 output
1 1
2 2
3 3
4 4
-- !query 16
SELECT c FROM test_missing_target ORDER BY a
-- !query 16 schema
struct<c:string>
-- !query 16 output
XXXX
ABAB
ABAB
BBBB
BBBB
bbbb
cccc
cccc
CCCC
CCCC
-- !query 17
SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc
-- !query 17 schema
struct<count(1):bigint>
-- !query 17 output
4
3
2
1
-- !query 18
SELECT count(*) FROM test_missing_target ORDER BY 1 desc
-- !query 18 schema
struct<count(1):bigint>
-- !query 18 output
10
-- !query 19
SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1
-- !query 19 schema
struct<c:string,count(1):bigint>
-- !query 19 output
ABAB 2
BBBB 2
CCCC 2
XXXX 1
bbbb 1
cccc 2
-- !query 20
SELECT c, count(*) FROM test_missing_target GROUP BY 3
-- !query 20 schema
struct<>
-- !query 20 output
org.apache.spark.sql.AnalysisException
GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
-- !query 21
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY b ORDER BY b
-- !query 21 schema
struct<>
-- !query 21 output
org.apache.spark.sql.AnalysisException
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
-- !query 22
SELECT a, a FROM test_missing_target
ORDER BY a
-- !query 22 schema
struct<a:int,a:int>
-- !query 22 output
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
-- !query 23
SELECT a/2, a/2 FROM test_missing_target
ORDER BY a/2
-- !query 23 schema
struct<(a div 2):int,(a div 2):int>
-- !query 23 output
0 0
0 0
1 1
1 1
2 2
2 2
3 3
3 3
4 4
4 4
-- !query 24
SELECT a/2, a/2 FROM test_missing_target
GROUP BY a/2 ORDER BY a/2
-- !query 24 schema
struct<(a div 2):int,(a div 2):int>
-- !query 24 output
0 0
1 1
2 2
3 3
4 4
-- !query 25
SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b ORDER BY x.b
-- !query 25 schema
struct<b:int,count(1):bigint>
-- !query 25 output
1 1
2 2
3 3
4 4
-- !query 26
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b ORDER BY x.b
-- !query 26 schema
struct<count(1):bigint>
-- !query 26 output
1
2
3
4
-- !query 27
SELECT a%2, count(b) FROM test_missing_target
GROUP BY test_missing_target.a%2
ORDER BY test_missing_target.a%2
-- !query 27 schema
struct<(a % 2):int,count(b):bigint>
-- !query 27 output
0 5
1 5
-- !query 28
SELECT count(c) FROM test_missing_target
GROUP BY lower(test_missing_target.c)
ORDER BY lower(test_missing_target.c)
-- !query 28 schema
struct<count(c):bigint>
-- !query 28 output
2
3
4
1
-- !query 29
SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b
-- !query 29 schema
struct<>
-- !query 29 output
org.apache.spark.sql.AnalysisException
cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61
-- !query 30
SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2
-- !query 30 schema
struct<count(b):bigint>
-- !query 30 output
1
5
4
-- !query 31
SELECT lower(test_missing_target.c), count(c)
FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c)
-- !query 31 schema
struct<lower(c):string,count(c):bigint>
-- !query 31 output
abab 2
bbbb 3
cccc 4
xxxx 1
-- !query 32
SELECT a FROM test_missing_target ORDER BY upper(d)
-- !query 32 schema
struct<a:int>
-- !query 32 output
0
1
2
3
4
5
6
7
8
9
-- !query 33
SELECT count(b) FROM test_missing_target
GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc
-- !query 33 schema
struct<count(b):bigint>
-- !query 33 output
7
3
-- !query 34
SELECT count(x.a) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY b/2 ORDER BY b/2
-- !query 34 schema
struct<>
-- !query 34 output
org.apache.spark.sql.AnalysisException
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
-- !query 35
SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b/2 ORDER BY x.b/2
-- !query 35 schema
struct<(b div 2):int,count(b):bigint>
-- !query 35 output
0 1
1 5
2 4
-- !query 36
SELECT count(b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
GROUP BY x.b/2
-- !query 36 schema
struct<>
-- !query 36 output
org.apache.spark.sql.AnalysisException
Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
-- !query 37
DROP TABLE test_missing_target
-- !query 37 schema
struct<>
-- !query 37 output