[SPARK-28391][SQL][PYTHON][TESTS] Convert and port 'pgSQL/select_implicit.sql' into UDF test base

## What changes were proposed in this pull request? This PR adds some tests converted from 'pgSQL/select_implicit.sql' to test UDFs <details><summary>Diff comparing to 'pgSQL/select_implicit.sql'</summary> <p> ```diff ... diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out index 0675820..e6a5995 100755 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out -91,9 +91,11 struct<> -- !query 11 -SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c +SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY +test_missing_target.c +ORDER BY udf(c) -- !query 11 schema -struct<c:string,count(1):bigint> +struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 11 output ABAB 2 BBBB 2 -104,9 +106,10 cccc 2 -- !query 12 -SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c +SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c +ORDER BY udf(c) -- !query 12 schema -struct<count(1):bigint> +struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 12 output 2 2 -117,18 +120,18 struct<count(1):bigint> -- !query 13 -SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b +SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b) -- !query 13 schema struct<> -- !query 13 output org.apache.spark.sql.AnalysisException -cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61 +cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 70 -- !query 14 -SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b +SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) -- !query 14 schema -struct<count(1):bigint> +struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 14 output 1 2 -137,10 +140,10 struct<count(1):bigint> -- !query 15 -SELECT test_missing_target.b, count(*) - FROM test_missing_target GROUP BY b ORDER BY b +SELECT udf(test_missing_target.b), udf(count(*)) + FROM test_missing_target GROUP BY b ORDER BY udf(b) -- !query 15 schema -struct<b:int,count(1):bigint> +struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 15 output 1 1 2 2 -149,9 +152,9 struct<b:int,count(1):bigint> -- !query 16 -SELECT c FROM test_missing_target ORDER BY a +SELECT udf(c) FROM test_missing_target ORDER BY udf(a) -- !query 16 schema -struct<c:string> +struct<CAST(udf(cast(c as string)) AS STRING):string> -- !query 16 output XXXX ABAB -166,9 +169,9 CCCC -- !query 17 -SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc +SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc -- !query 17 schema -struct<count(1):bigint> +struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 17 output 4 3 -177,17 +180,17 struct<count(1):bigint> -- !query 18 -SELECT count(*) FROM test_missing_target ORDER BY 1 desc +SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc -- !query 18 schema -struct<count(1):bigint> +struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 18 output 10 -- !query 19 -SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1 +SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1 -- !query 19 schema -struct<c:string,count(1):bigint> +struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 19 output ABAB 2 BBBB 2 -198,18 +201,18 cccc 2 -- !query 20 -SELECT c, count(*) FROM test_missing_target GROUP BY 3 +SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3 -- !query 20 schema struct<> -- !query 20 output org.apache.spark.sql.AnalysisException -GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53 +GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63 -- !query 21 -SELECT count(*) FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY b ORDER BY b +SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y + WHERE udf(x.a) = udf(y.a) + GROUP BY b ORDER BY udf(b) -- !query 21 schema struct<> -- !query 21 output -218,10 +221,10 Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 -- !query 22 -SELECT a, a FROM test_missing_target - ORDER BY a +SELECT udf(a), udf(a) FROM test_missing_target + ORDER BY udf(a) -- !query 22 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int> -- !query 22 output 0 0 1 1 -236,10 +239,10 struct<a:int,a:int> -- !query 23 -SELECT a/2, a/2 FROM test_missing_target - ORDER BY a/2 +SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target + ORDER BY udf(udf(a)/2) -- !query 23 schema -struct<(a div 2):int,(a div 2):int> +struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int> -- !query 23 output 0 0 0 0 -254,10 +257,10 struct<(a div 2):int,(a div 2):int> -- !query 24 -SELECT a/2, a/2 FROM test_missing_target - GROUP BY a/2 ORDER BY a/2 +SELECT udf(a/2), udf(a/2) FROM test_missing_target + GROUP BY a/2 ORDER BY udf(a/2) -- !query 24 schema -struct<(a div 2):int,(a div 2):int> +struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int> -- !query 24 output 0 0 1 1 -267,11 +270,11 struct<(a div 2):int,(a div 2):int> -- !query 25 -SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY x.b ORDER BY x.b +SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y + WHERE udf(x.a) = udf(y.a) + GROUP BY x.b ORDER BY udf(x.b) -- !query 25 schema -struct<b:int,count(1):bigint> +struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 25 output 1 1 2 2 -280,11 +283,11 struct<b:int,count(1):bigint> -- !query 26 -SELECT count(*) FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY x.b ORDER BY x.b +SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y + WHERE udf(x.a) = udf(y.a) + GROUP BY x.b ORDER BY udf(x.b) -- !query 26 schema -struct<count(1):bigint> +struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 26 output 1 2 -293,22 +296,22 struct<count(1):bigint> -- !query 27 -SELECT a%2, count(b) FROM test_missing_target +SELECT a%2, udf(count(udf(b))) FROM test_missing_target GROUP BY test_missing_target.a%2 -ORDER BY test_missing_target.a%2 +ORDER BY udf(test_missing_target.a%2) -- !query 27 schema -struct<(a % 2):int,count(b):bigint> +struct<(a % 2):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint> -- !query 27 output 0 5 1 5 -- !query 28 -SELECT count(c) FROM test_missing_target +SELECT udf(count(c)) FROM test_missing_target GROUP BY lower(test_missing_target.c) -ORDER BY lower(test_missing_target.c) +ORDER BY udf(lower(test_missing_target.c)) -- !query 28 schema -struct<count(c):bigint> +struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint> -- !query 28 output 2 3 -317,18 +320,18 struct<count(c):bigint> -- !query 29 -SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b +SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b) -- !query 29 schema struct<> -- !query 29 output org.apache.spark.sql.AnalysisException -cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61 +cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 75 -- !query 30 -SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2 +SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2) -- !query 30 schema -struct<count(b):bigint> +struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint> -- !query 30 output 1 5 -336,10 +339,10 struct<count(b):bigint> -- !query 31 -SELECT lower(test_missing_target.c), count(c) - FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c) +SELECT udf(lower(test_missing_target.c)), udf(count(udf(c))) + FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c)) -- !query 31 schema -struct<lower(c):string,count(c):bigint> +struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint> -- !query 31 output abab 2 bbbb 3 -348,9 +351,9 xxxx 1 -- !query 32 -SELECT a FROM test_missing_target ORDER BY upper(d) +SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d))) -- !query 32 schema -struct<a:int> +struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 32 output 0 1 -365,19 +368,19 struct<a:int> -- !query 33 -SELECT count(b) FROM test_missing_target - GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc +SELECT udf(count(b)) FROM test_missing_target + GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc -- !query 33 schema -struct<count(b):bigint> +struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint> -- !query 33 output 7 3 -- !query 34 -SELECT count(x.a) FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY b/2 ORDER BY b/2 +SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y + WHERE udf(x.a) = udf(y.a) + GROUP BY b/2 ORDER BY udf(b/2) -- !query 34 schema struct<> -- !query 34 output -386,11 +389,12 Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 -- !query 35 -SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY x.b/2 ORDER BY x.b/2 +SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x, +test_missing_target y + WHERE udf(x.a) = udf(y.a) + GROUP BY x.b/2 ORDER BY udf(x.b/2) -- !query 35 schema -struct<(b div 2):int,count(b):bigint> +struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint> -- !query 35 output 0 1 1 5 -398,14 +402,14 struct<(b div 2):int,count(b):bigint> -- !query 36 -SELECT count(b) FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a +SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y + WHERE udf(x.a) = udf(y.a) GROUP BY x.b/2 -- !query 36 schema struct<> -- !query 36 output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13 +Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21 -- !query 37 ``` </p> </details> ## How was this patch tested? Tested as Guided in SPARK-27921 Closes #25233 from Udbhav30/master. Authored-by: Udbhav30 <u.agrawal30@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
2019-07-24 09:47:08 +09:00 · 2019-07-24 09:47:08 +09:00 · 86dad404bd
parent 8fc5cb6285
commit 86dad404bd
2 changed files with 587 additions and 0 deletions
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql
@ -0,0 +1,167 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- SELECT_IMPLICIT
+-- Test cases for queries with ordering terms missing from the target list.
+-- This used to be called "junkfilter.sql".
+-- The parser uses the term "resjunk" to handle these cases.
+-- - thomas 1998-07-09
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql
+--
+-- This test file was converted from pgSQL/select_implicit.sql
+-- [SPARK-28445] Inconsistency between Scala and Python/Panda udfs when groupby with udf() is used
+-- TODO: We should add UDFs in GROUP BY clause when [SPARK-28445] is resolved.
+
+-- load test data
+CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet;
+INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A');
+INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b');
+INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c');
+INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D');
+INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e');
+INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F');
+INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g');
+INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
+INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
+INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
+
+
+--   w/ existing GROUP BY target
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
+test_missing_target.c
+ORDER BY udf(c);
+
+--   w/o existing GROUP BY target using a relation name in GROUP BY clause
+SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
+ORDER BY udf(c);
+
+--   w/o existing GROUP BY target and w/o existing a different ORDER BY target
+--   failure expected
+SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b);
+
+--   w/o existing GROUP BY target and w/o existing same ORDER BY target
+SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b);
+
+--   w/ existing GROUP BY target using a relation name in target
+SELECT udf(test_missing_target.b), udf(count(*))
+  FROM test_missing_target GROUP BY b ORDER BY udf(b);
+
+--   w/o existing GROUP BY target
+SELECT udf(c) FROM test_missing_target ORDER BY udf(a);
+
+--   w/o existing ORDER BY target
+SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc;
+
+--   group using reference number
+SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc;
+
+--   order using reference number
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1;
+
+--   group using reference number out of range
+--   failure expected
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3;
+
+--   group w/o existing GROUP BY and ORDER BY target under ambiguous condition
+--   failure expected
+SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY b ORDER BY udf(b);
+
+--   order w/ target under ambiguous condition
+--   failure NOT expected
+SELECT udf(a), udf(a) FROM test_missing_target
+	ORDER BY udf(a);
+
+--   order expression w/ target under ambiguous condition
+--   failure NOT expected
+SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
+	ORDER BY udf(udf(a)/2);
+
+--   group expression w/ target under ambiguous condition
+--   failure NOT expected
+SELECT udf(a/2), udf(a/2) FROM test_missing_target
+	GROUP BY a/2 ORDER BY udf(a/2);
+
+--   group w/ existing GROUP BY target under ambiguous condition
+SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b ORDER BY udf(x.b);
+
+--   group w/o existing GROUP BY target under ambiguous condition
+SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b ORDER BY udf(x.b);
+
+-- [SPARK-28329] SELECT INTO syntax
+--   group w/o existing GROUP BY target under ambiguous condition
+--   into a table
+-- SELECT count(*) INTO TABLE test_missing_target2
+-- FROM test_missing_target x, test_missing_target y
+-- 	WHERE x.a = y.a
+-- 	GROUP BY x.b ORDER BY x.b;
+-- SELECT * FROM test_missing_target2;
+
+
+--  Functions and expressions
+
+--   w/ existing GROUP BY target
+SELECT a%2, udf(count(udf(b))) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY udf(test_missing_target.a%2);
+
+--   w/o existing GROUP BY target using a relation name in GROUP BY clause
+SELECT udf(count(c)) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY udf(lower(test_missing_target.c));
+
+--   w/o existing GROUP BY target and w/o existing a different ORDER BY target
+--   failure expected
+SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b);
+
+--   w/o existing GROUP BY target and w/o existing same ORDER BY target
+SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2);
+
+--   w/ existing GROUP BY target using a relation name in target
+SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
+  FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c));
+
+--   w/o existing GROUP BY target
+SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)));
+
+--   w/o existing ORDER BY target
+SELECT udf(count(b)) FROM test_missing_target
+	GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc;
+
+--   group w/o existing GROUP BY and ORDER BY target under ambiguous condition
+--   failure expected
+SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY b/2 ORDER BY udf(b/2);
+
+--   group w/ existing GROUP BY target under ambiguous condition
+SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
+test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b/2 ORDER BY udf(x.b/2);
+
+--   group w/o existing GROUP BY target under ambiguous condition
+--   failure expected due to ambiguous b in count(b)
+SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b/2;
+
+-- [SPARK-28329] SELECT INTO syntax
+--   group w/o existing GROUP BY target under ambiguous condition
+--   into a table
+-- SELECT count(x.b) INTO TABLE test_missing_target3
+-- FROM test_missing_target x, test_missing_target y
+-- 	WHERE x.a = y.a
+-- 	GROUP BY x.b/2 ORDER BY x.b/2;
+-- SELECT * FROM test_missing_target3;
+
+--   Cleanup
+DROP TABLE test_missing_target;
+-- DROP TABLE test_missing_target2;
+-- DROP TABLE test_missing_target3;
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
@ -0,0 +1,420 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 38
+
+
+-- !query 0
+CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A')
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b')
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c')
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D')
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e')
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F')
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g')
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
+
+
+-- !query 8
+INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h')
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
+
+
+-- !query 9
+INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I')
+-- !query 9 schema
+struct<>
+-- !query 9 output
+
+
+
+-- !query 10
+INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j')
+-- !query 10 schema
+struct<>
+-- !query 10 output
+
+
+
+-- !query 11
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
+test_missing_target.c
+ORDER BY udf(c)
+-- !query 11 schema
+struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 11 output
+ABAB	2
+BBBB	2
+CCCC	2
+XXXX	1
+bbbb	1
+cccc	2
+
+
+-- !query 12
+SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
+ORDER BY udf(c)
+-- !query 12 schema
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 12 output
+2
+2
+2
+1
+1
+2
+
+
+-- !query 13
+SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b)
+-- !query 13 schema
+struct<>
+-- !query 13 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 70
+
+
+-- !query 14
+SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b)
+-- !query 14 schema
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 14 output
+1
+2
+3
+4
+
+
+-- !query 15
+SELECT udf(test_missing_target.b), udf(count(*))
+  FROM test_missing_target GROUP BY b ORDER BY udf(b)
+-- !query 15 schema
+struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 15 output
+1	1
+2	2
+3	3
+4	4
+
+
+-- !query 16
+SELECT udf(c) FROM test_missing_target ORDER BY udf(a)
+-- !query 16 schema
+struct<CAST(udf(cast(c as string)) AS STRING):string>
+-- !query 16 output
+XXXX
+ABAB
+ABAB
+BBBB
+BBBB
+bbbb
+cccc
+cccc
+CCCC
+CCCC
+
+
+-- !query 17
+SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc
+-- !query 17 schema
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 17 output
+4
+3
+2
+1
+
+
+-- !query 18
+SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc
+-- !query 18 schema
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 18 output
+10
+
+
+-- !query 19
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1
+-- !query 19 schema
+struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 19 output
+ABAB	2
+BBBB	2
+CCCC	2
+XXXX	1
+bbbb	1
+cccc	2
+
+
+-- !query 20
+SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
+-- !query 20 schema
+struct<>
+-- !query 20 output
+org.apache.spark.sql.AnalysisException
+GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
+
+
+-- !query 21
+SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY b ORDER BY udf(b)
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+
+
+-- !query 22
+SELECT udf(a), udf(a) FROM test_missing_target
+	ORDER BY udf(a)
+-- !query 22 schema
+struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
+-- !query 22 output
+0	0
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+
+
+-- !query 23
+SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
+	ORDER BY udf(udf(a)/2)
+-- !query 23 schema
+struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int>
+-- !query 23 output
+0	0
+0	0
+1	1
+1	1
+2	2
+2	2
+3	3
+3	3
+4	4
+4	4
+
+
+-- !query 24
+SELECT udf(a/2), udf(a/2) FROM test_missing_target
+	GROUP BY a/2 ORDER BY udf(a/2)
+-- !query 24 schema
+struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int>
+-- !query 24 output
+0	0
+1	1
+2	2
+3	3
+4	4
+
+
+-- !query 25
+SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b ORDER BY udf(x.b)
+-- !query 25 schema
+struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 25 output
+1	1
+2	2
+3	3
+4	4
+
+
+-- !query 26
+SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b ORDER BY udf(x.b)
+-- !query 26 schema
+struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+-- !query 26 output
+1
+2
+3
+4
+
+
+-- !query 27
+SELECT a%2, udf(count(udf(b))) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY udf(test_missing_target.a%2)
+-- !query 27 schema
+struct<(a % 2):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
+-- !query 27 output
+0	5
+1	5
+
+
+-- !query 28
+SELECT udf(count(c)) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY udf(lower(test_missing_target.c))
+-- !query 28 schema
+struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
+-- !query 28 output
+2
+3
+4
+1
+
+
+-- !query 29
+SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b)
+-- !query 29 schema
+struct<>
+-- !query 29 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 75
+
+
+-- !query 30
+SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2)
+-- !query 30 schema
+struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
+-- !query 30 output
+1
+5
+4
+
+
+-- !query 31
+SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
+  FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c))
+-- !query 31 schema
+struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint>
+-- !query 31 output
+abab	2
+bbbb	3
+cccc	4
+xxxx	1
+
+
+-- !query 32
+SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)))
+-- !query 32 schema
+struct<CAST(udf(cast(a as string)) AS INT):int>
+-- !query 32 output
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 33
+SELECT udf(count(b)) FROM test_missing_target
+	GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc
+-- !query 33 schema
+struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
+-- !query 33 output
+7
+3
+
+
+-- !query 34
+SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY b/2 ORDER BY udf(b/2)
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+
+
+-- !query 35
+SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
+test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b/2 ORDER BY udf(x.b/2)
+-- !query 35 schema
+struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
+-- !query 35 output
+0	1
+1	5
+2	4
+
+
+-- !query 36
+SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
+	WHERE udf(x.a) = udf(y.a)
+	GROUP BY x.b/2
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
+
+
+-- !query 37
+DROP TABLE test_missing_target
+-- !query 37 schema
+struct<>
+-- !query 37 output
+