[SPARK-28391][SQL][PYTHON][TESTS] Convert and port 'pgSQL/select_implicit.sql' into UDF test base

## What changes were proposed in this pull request?
This PR adds some tests converted from 'pgSQL/select_implicit.sql' to test UDFs
<details><summary>Diff comparing to 'pgSQL/select_implicit.sql'</summary>
<p>

```diff
... diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
index 0675820..e6a5995 100755
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
 -91,9 +91,11  struct<>

 -- !query 11
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
+test_missing_target.c
+ORDER BY udf(c)
 -- !query 11 schema
-struct<c:string,count(1):bigint>
+struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 11 output
 ABAB	2
 BBBB	2
 -104,9 +106,10  cccc	2

 -- !query 12
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
+SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
+ORDER BY udf(c)
 -- !query 12 schema
-struct<count(1):bigint>
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 12 output
 2
 2
 -117,18 +120,18  struct<count(1):bigint>

 -- !query 13
-SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b
+SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b)
 -- !query 13 schema
 struct<>
 -- !query 13 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61
+cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 70

 -- !query 14
-SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b
+SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b)
 -- !query 14 schema
-struct<count(1):bigint>
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 14 output
 1
 2
 -137,10 +140,10  struct<count(1):bigint>

 -- !query 15
-SELECT test_missing_target.b, count(*)
-  FROM test_missing_target GROUP BY b ORDER BY b
+SELECT udf(test_missing_target.b), udf(count(*))
+  FROM test_missing_target GROUP BY b ORDER BY udf(b)
 -- !query 15 schema
-struct<b:int,count(1):bigint>
+struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 15 output
 1	1
 2	2
 -149,9 +152,9  struct<b:int,count(1):bigint>

 -- !query 16
-SELECT c FROM test_missing_target ORDER BY a
+SELECT udf(c) FROM test_missing_target ORDER BY udf(a)
 -- !query 16 schema
-struct<c:string>
+struct<CAST(udf(cast(c as string)) AS STRING):string>
 -- !query 16 output
 XXXX
 ABAB
 -166,9 +169,9  CCCC

 -- !query 17
-SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc
+SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc
 -- !query 17 schema
-struct<count(1):bigint>
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 17 output
 4
 3
 -177,17 +180,17  struct<count(1):bigint>

 -- !query 18
-SELECT count(*) FROM test_missing_target ORDER BY 1 desc
+SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc
 -- !query 18 schema
-struct<count(1):bigint>
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 18 output
 10

 -- !query 19
-SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1
 -- !query 19 schema
-struct<c:string,count(1):bigint>
+struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 19 output
 ABAB	2
 BBBB	2
 -198,18 +201,18  cccc	2

 -- !query 20
-SELECT c, count(*) FROM test_missing_target GROUP BY 3
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
 -- !query 20 schema
 struct<>
 -- !query 20 output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
+GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63

 -- !query 21
-SELECT count(*) FROM test_missing_target x, test_missing_target y
-	WHERE x.a = y.a
-	GROUP BY b ORDER BY b
+SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY b ORDER BY udf(b)
 -- !query 21 schema
 struct<>
 -- !query 21 output
 -218,10 +221,10  Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10

 -- !query 22
-SELECT a, a FROM test_missing_target
-	ORDER BY a
+SELECT udf(a), udf(a) FROM test_missing_target
+	ORDER BY udf(a)
 -- !query 22 schema
-struct<a:int,a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
 -- !query 22 output
 0	0
 1	1
 -236,10 +239,10  struct<a:int,a:int>

 -- !query 23
-SELECT a/2, a/2 FROM test_missing_target
-	ORDER BY a/2
+SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
+	ORDER BY udf(udf(a)/2)
 -- !query 23 schema
-struct<(a div 2):int,(a div 2):int>
+struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int>
 -- !query 23 output
 0	0
 0	0
 -254,10 +257,10  struct<(a div 2):int,(a div 2):int>

 -- !query 24
-SELECT a/2, a/2 FROM test_missing_target
-	GROUP BY a/2 ORDER BY a/2
+SELECT udf(a/2), udf(a/2) FROM test_missing_target
+	GROUP BY a/2 ORDER BY udf(a/2)
 -- !query 24 schema
-struct<(a div 2):int,(a div 2):int>
+struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int>
 -- !query 24 output
 0	0
 1	1
 -267,11 +270,11  struct<(a div 2):int,(a div 2):int>

 -- !query 25
-SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
-	WHERE x.a = y.a
-	GROUP BY x.b ORDER BY x.b
+SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b ORDER BY udf(x.b)
 -- !query 25 schema
-struct<b:int,count(1):bigint>
+struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 25 output
 1	1
 2	2
 -280,11 +283,11  struct<b:int,count(1):bigint>

 -- !query 26
-SELECT count(*) FROM test_missing_target x, test_missing_target y
-	WHERE x.a = y.a
-	GROUP BY x.b ORDER BY x.b
+SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b ORDER BY udf(x.b)
 -- !query 26 schema
-struct<count(1):bigint>
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 -- !query 26 output
 1
 2
 -293,22 +296,22  struct<count(1):bigint>

 -- !query 27
-SELECT a%2, count(b) FROM test_missing_target
+SELECT a%2, udf(count(udf(b))) FROM test_missing_target
 GROUP BY test_missing_target.a%2
-ORDER BY test_missing_target.a%2
+ORDER BY udf(test_missing_target.a%2)
 -- !query 27 schema
-struct<(a % 2):int,count(b):bigint>
+struct<(a % 2):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
 -- !query 27 output
 0	5
 1	5

 -- !query 28
-SELECT count(c) FROM test_missing_target
+SELECT udf(count(c)) FROM test_missing_target
 GROUP BY lower(test_missing_target.c)
-ORDER BY lower(test_missing_target.c)
+ORDER BY udf(lower(test_missing_target.c))
 -- !query 28 schema
-struct<count(c):bigint>
+struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
 -- !query 28 output
 2
 3
 -317,18 +320,18  struct<count(c):bigint>

 -- !query 29
-SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b
+SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b)
 -- !query 29 schema
 struct<>
 -- !query 29 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61
+cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 75

 -- !query 30
-SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2
+SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2)
 -- !query 30 schema
-struct<count(b):bigint>
+struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
 -- !query 30 output
 1
 5
 -336,10 +339,10  struct<count(b):bigint>

 -- !query 31
-SELECT lower(test_missing_target.c), count(c)
-  FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c)
+SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
+  FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c))
 -- !query 31 schema
-struct<lower(c):string,count(c):bigint>
+struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint>
 -- !query 31 output
 abab	2
 bbbb	3
 -348,9 +351,9  xxxx	1

 -- !query 32
-SELECT a FROM test_missing_target ORDER BY upper(d)
+SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)))
 -- !query 32 schema
-struct<a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 32 output
 0
 1
 -365,19 +368,19  struct<a:int>

 -- !query 33
-SELECT count(b) FROM test_missing_target
-	GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc
+SELECT udf(count(b)) FROM test_missing_target
+	GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc
 -- !query 33 schema
-struct<count(b):bigint>
+struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
 -- !query 33 output
 7
 3

 -- !query 34
-SELECT count(x.a) FROM test_missing_target x, test_missing_target y
-	WHERE x.a = y.a
-	GROUP BY b/2 ORDER BY b/2
+SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY b/2 ORDER BY udf(b/2)
 -- !query 34 schema
 struct<>
 -- !query 34 output
 -386,11 +389,12  Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10

 -- !query 35
-SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
-	WHERE x.a = y.a
-	GROUP BY x.b/2 ORDER BY x.b/2
+SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
+test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b/2 ORDER BY udf(x.b/2)
 -- !query 35 schema
-struct<(b div 2):int,count(b):bigint>
+struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
 -- !query 35 output
 0	1
 1	5
 -398,14 +402,14  struct<(b div 2):int,count(b):bigint>

 -- !query 36
-SELECT count(b) FROM test_missing_target x, test_missing_target y
-	WHERE x.a = y.a
+SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
 	GROUP BY x.b/2
 -- !query 36 schema
 struct<>
 -- !query 36 output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21

 -- !query 37
```

</p>
</details>

## How was this patch tested?
Tested as Guided in SPARK-27921

Closes #25233 from Udbhav30/master.

Authored-by: Udbhav30 <u.agrawal30@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Udbhav30 2019-07-24 09:47:08 +09:00 committed by HyukjinKwon
parent 8fc5cb6285
commit 86dad404bd
2 changed files with 587 additions and 0 deletions

View file

@ -0,0 +1,167 @@
--
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
--
--
-- SELECT_IMPLICIT
-- Test cases for queries with ordering terms missing from the target list.
-- This used to be called "junkfilter.sql".
-- The parser uses the term "resjunk" to handle these cases.
-- - thomas 1998-07-09
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql
--
-- This test file was converted from pgSQL/select_implicit.sql
-- [SPARK-28445] Inconsistency between Scala and Python/Panda udfs when groupby with udf() is used
-- TODO: We should add UDFs in GROUP BY clause when [SPARK-28445] is resolved.
-- load test data
CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet;
INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A');
INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b');
INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c');
INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D');
INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e');
INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F');
INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g');
INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
-- w/ existing GROUP BY target
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
test_missing_target.c
ORDER BY udf(c);
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
ORDER BY udf(c);
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
-- failure expected
SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b);
-- w/o existing GROUP BY target and w/o existing same ORDER BY target
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b);
-- w/ existing GROUP BY target using a relation name in target
SELECT udf(test_missing_target.b), udf(count(*))
FROM test_missing_target GROUP BY b ORDER BY udf(b);
-- w/o existing GROUP BY target
SELECT udf(c) FROM test_missing_target ORDER BY udf(a);
-- w/o existing ORDER BY target
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc;
-- group using reference number
SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc;
-- order using reference number
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1;
-- group using reference number out of range
-- failure expected
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3;
-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition
-- failure expected
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY b ORDER BY udf(b);
-- order w/ target under ambiguous condition
-- failure NOT expected
SELECT udf(a), udf(a) FROM test_missing_target
ORDER BY udf(a);
-- order expression w/ target under ambiguous condition
-- failure NOT expected
SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
ORDER BY udf(udf(a)/2);
-- group expression w/ target under ambiguous condition
-- failure NOT expected
SELECT udf(a/2), udf(a/2) FROM test_missing_target
GROUP BY a/2 ORDER BY udf(a/2);
-- group w/ existing GROUP BY target under ambiguous condition
SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b ORDER BY udf(x.b);
-- group w/o existing GROUP BY target under ambiguous condition
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b ORDER BY udf(x.b);
-- [SPARK-28329] SELECT INTO syntax
-- group w/o existing GROUP BY target under ambiguous condition
-- into a table
-- SELECT count(*) INTO TABLE test_missing_target2
-- FROM test_missing_target x, test_missing_target y
-- WHERE x.a = y.a
-- GROUP BY x.b ORDER BY x.b;
-- SELECT * FROM test_missing_target2;
-- Functions and expressions
-- w/ existing GROUP BY target
SELECT a%2, udf(count(udf(b))) FROM test_missing_target
GROUP BY test_missing_target.a%2
ORDER BY udf(test_missing_target.a%2);
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
SELECT udf(count(c)) FROM test_missing_target
GROUP BY lower(test_missing_target.c)
ORDER BY udf(lower(test_missing_target.c));
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
-- failure expected
SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b);
-- w/o existing GROUP BY target and w/o existing same ORDER BY target
SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2);
-- w/ existing GROUP BY target using a relation name in target
SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c));
-- w/o existing GROUP BY target
SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)));
-- w/o existing ORDER BY target
SELECT udf(count(b)) FROM test_missing_target
GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc;
-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition
-- failure expected
SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY b/2 ORDER BY udf(b/2);
-- group w/ existing GROUP BY target under ambiguous condition
SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b/2 ORDER BY udf(x.b/2);
-- group w/o existing GROUP BY target under ambiguous condition
-- failure expected due to ambiguous b in count(b)
SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b/2;
-- [SPARK-28329] SELECT INTO syntax
-- group w/o existing GROUP BY target under ambiguous condition
-- into a table
-- SELECT count(x.b) INTO TABLE test_missing_target3
-- FROM test_missing_target x, test_missing_target y
-- WHERE x.a = y.a
-- GROUP BY x.b/2 ORDER BY x.b/2;
-- SELECT * FROM test_missing_target3;
-- Cleanup
DROP TABLE test_missing_target;
-- DROP TABLE test_missing_target2;
-- DROP TABLE test_missing_target3;

View file

@ -0,0 +1,420 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 38
-- !query 0
CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A')
-- !query 1 schema
struct<>
-- !query 1 output
-- !query 2
INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b')
-- !query 2 schema
struct<>
-- !query 2 output
-- !query 3
INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c')
-- !query 3 schema
struct<>
-- !query 3 output
-- !query 4
INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D')
-- !query 4 schema
struct<>
-- !query 4 output
-- !query 5
INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e')
-- !query 5 schema
struct<>
-- !query 5 output
-- !query 6
INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F')
-- !query 6 schema
struct<>
-- !query 6 output
-- !query 7
INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g')
-- !query 7 schema
struct<>
-- !query 7 output
-- !query 8
INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h')
-- !query 8 schema
struct<>
-- !query 8 output
-- !query 9
INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I')
-- !query 9 schema
struct<>
-- !query 9 output
-- !query 10
INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j')
-- !query 10 schema
struct<>
-- !query 10 output
-- !query 11
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
test_missing_target.c
ORDER BY udf(c)
-- !query 11 schema
struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 11 output
ABAB 2
BBBB 2
CCCC 2
XXXX 1
bbbb 1
cccc 2
-- !query 12
SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
ORDER BY udf(c)
-- !query 12 schema
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 12 output
2
2
2
1
1
2
-- !query 13
SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b)
-- !query 13 schema
struct<>
-- !query 13 output
org.apache.spark.sql.AnalysisException
cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 70
-- !query 14
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b)
-- !query 14 schema
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 14 output
1
2
3
4
-- !query 15
SELECT udf(test_missing_target.b), udf(count(*))
FROM test_missing_target GROUP BY b ORDER BY udf(b)
-- !query 15 schema
struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 15 output
1 1
2 2
3 3
4 4
-- !query 16
SELECT udf(c) FROM test_missing_target ORDER BY udf(a)
-- !query 16 schema
struct<CAST(udf(cast(c as string)) AS STRING):string>
-- !query 16 output
XXXX
ABAB
ABAB
BBBB
BBBB
bbbb
cccc
cccc
CCCC
CCCC
-- !query 17
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc
-- !query 17 schema
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 17 output
4
3
2
1
-- !query 18
SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc
-- !query 18 schema
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 18 output
10
-- !query 19
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1
-- !query 19 schema
struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 19 output
ABAB 2
BBBB 2
CCCC 2
XXXX 1
bbbb 1
cccc 2
-- !query 20
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
-- !query 20 schema
struct<>
-- !query 20 output
org.apache.spark.sql.AnalysisException
GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
-- !query 21
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY b ORDER BY udf(b)
-- !query 21 schema
struct<>
-- !query 21 output
org.apache.spark.sql.AnalysisException
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
-- !query 22
SELECT udf(a), udf(a) FROM test_missing_target
ORDER BY udf(a)
-- !query 22 schema
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
-- !query 22 output
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
-- !query 23
SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
ORDER BY udf(udf(a)/2)
-- !query 23 schema
struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int>
-- !query 23 output
0 0
0 0
1 1
1 1
2 2
2 2
3 3
3 3
4 4
4 4
-- !query 24
SELECT udf(a/2), udf(a/2) FROM test_missing_target
GROUP BY a/2 ORDER BY udf(a/2)
-- !query 24 schema
struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int>
-- !query 24 output
0 0
1 1
2 2
3 3
4 4
-- !query 25
SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b ORDER BY udf(x.b)
-- !query 25 schema
struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 25 output
1 1
2 2
3 3
4 4
-- !query 26
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b ORDER BY udf(x.b)
-- !query 26 schema
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
-- !query 26 output
1
2
3
4
-- !query 27
SELECT a%2, udf(count(udf(b))) FROM test_missing_target
GROUP BY test_missing_target.a%2
ORDER BY udf(test_missing_target.a%2)
-- !query 27 schema
struct<(a % 2):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
-- !query 27 output
0 5
1 5
-- !query 28
SELECT udf(count(c)) FROM test_missing_target
GROUP BY lower(test_missing_target.c)
ORDER BY udf(lower(test_missing_target.c))
-- !query 28 schema
struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
-- !query 28 output
2
3
4
1
-- !query 29
SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b)
-- !query 29 schema
struct<>
-- !query 29 output
org.apache.spark.sql.AnalysisException
cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 75
-- !query 30
SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2)
-- !query 30 schema
struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
-- !query 30 output
1
5
4
-- !query 31
SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c))
-- !query 31 schema
struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint>
-- !query 31 output
abab 2
bbbb 3
cccc 4
xxxx 1
-- !query 32
SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)))
-- !query 32 schema
struct<CAST(udf(cast(a as string)) AS INT):int>
-- !query 32 output
0
1
2
3
4
5
6
7
8
9
-- !query 33
SELECT udf(count(b)) FROM test_missing_target
GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc
-- !query 33 schema
struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
-- !query 33 output
7
3
-- !query 34
SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY b/2 ORDER BY udf(b/2)
-- !query 34 schema
struct<>
-- !query 34 output
org.apache.spark.sql.AnalysisException
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
-- !query 35
SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b/2 ORDER BY udf(x.b/2)
-- !query 35 schema
struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
-- !query 35 output
0 1
1 5
2 4
-- !query 36
SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
WHERE udf(x.a) = udf(y.a)
GROUP BY x.b/2
-- !query 36 schema
struct<>
-- !query 36 output
org.apache.spark.sql.AnalysisException
Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
-- !query 37
DROP TABLE test_missing_target
-- !query 37 schema
struct<>
-- !query 37 output