[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql'
## What changes were proposed in this pull request? This PR adds UDF cases into group by clause in 'pgSQL/select_implicit.sql' <details><summary>Diff comparing to 'pgSQL/select_implicit.sql'</summary> <p> ```diff diff --git a/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out b/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out index 17303b2..0675820 100755 --- a/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out +++ b/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out -91,11 +91,9 struct<> -- !query 11 -SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY -udf(test_missing_target.c) -ORDER BY udf(c) +SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c -- !query 11 schema -struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<c:string,count(1):bigint> -- !query 11 output ABAB 2 BBBB 2 -106,10 +104,9 cccc 2 -- !query 12 -SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(test_missing_target.c) -ORDER BY udf(c) +SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c -- !query 12 schema -struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<count(1):bigint> -- !query 12 output 2 2 -120,18 +117,18 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 13 -SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b) +SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b -- !query 13 schema struct<> -- !query 13 output org.apache.spark.sql.AnalysisException -cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 75 +cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61 -- !query 14 -SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b) +SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b -- !query 14 schema -struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<count(1):bigint> -- !query 14 output 1 2 -140,10 +137,10 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 15 -SELECT udf(test_missing_target.b), udf(count(*)) - FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b) +SELECT test_missing_target.b, count(*) + FROM test_missing_target GROUP BY b ORDER BY b -- !query 15 schema -struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<b:int,count(1):bigint> -- !query 15 output 1 1 2 2 -152,9 +149,9 struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string) -- !query 16 -SELECT udf(c) FROM test_missing_target ORDER BY udf(a) +SELECT c FROM test_missing_target ORDER BY a -- !query 16 schema -struct<CAST(udf(cast(c as string)) AS STRING):string> +struct<c:string> -- !query 16 output XXXX ABAB -169,10 +166,9 CCCC -- !query 17 -SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b) -desc +SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc -- !query 17 schema -struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<count(1):bigint> -- !query 17 output 4 3 -181,17 +177,17 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 18 -SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc +SELECT count(*) FROM test_missing_target ORDER BY 1 desc -- !query 18 schema -struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<count(1):bigint> -- !query 18 output 10 -- !query 19 -SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1 +SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1 -- !query 19 schema -struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<c:string,count(1):bigint> -- !query 19 output ABAB 2 BBBB 2 -202,30 +198,30 cccc 2 -- !query 20 -SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3 +SELECT c, count(*) FROM test_missing_target GROUP BY 3 -- !query 20 schema struct<> -- !query 20 output org.apache.spark.sql.AnalysisException -GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63 +GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53 -- !query 21 -SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y - WHERE udf(x.a) = udf(y.a) - GROUP BY udf(b) ORDER BY udf(b) +SELECT count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY b ORDER BY b -- !query 21 schema struct<> -- !query 21 output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14 +Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 -- !query 22 -SELECT udf(a), udf(a) FROM test_missing_target - ORDER BY udf(a) +SELECT a, a FROM test_missing_target + ORDER BY a -- !query 22 schema -struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int> +struct<a:int,a:int> -- !query 22 output 0 0 1 1 -240,10 +236,10 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS IN -- !query 23 -SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target - ORDER BY udf(udf(a)/2) +SELECT a/2, a/2 FROM test_missing_target + ORDER BY a/2 -- !query 23 schema -struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int> +struct<(a div 2):int,(a div 2):int> -- !query 23 output 0 0 0 0 -258,10 +254,10 struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS -- !query 24 -SELECT udf(a/2), udf(a/2) FROM test_missing_target - GROUP BY udf(a/2) ORDER BY udf(a/2) +SELECT a/2, a/2 FROM test_missing_target + GROUP BY a/2 ORDER BY a/2 -- !query 24 schema -struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int> +struct<(a div 2):int,(a div 2):int> -- !query 24 output 0 0 1 1 -271,11 +267,11 struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) a -- !query 25 -SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y - WHERE udf(x.a) = udf(y.a) - GROUP BY udf(x.b) ORDER BY udf(x.b) +SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b -- !query 25 schema -struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<b:int,count(1):bigint> -- !query 25 output 1 1 2 2 -284,11 +280,11 struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string) -- !query 26 -SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y - WHERE udf(x.a) = udf(y.a) - GROUP BY udf(x.b) ORDER BY udf(x.b) +SELECT count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b -- !query 26 schema -struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> +struct<count(1):bigint> -- !query 26 output 1 2 -297,22 +293,22 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint> -- !query 27 -SELECT udf(a%2), udf(count(udf(b))) FROM test_missing_target -GROUP BY udf(test_missing_target.a%2) -ORDER BY udf(test_missing_target.a%2) +SELECT a%2, count(b) FROM test_missing_target +GROUP BY test_missing_target.a%2 +ORDER BY test_missing_target.a%2 -- !query 27 schema -struct<CAST(udf(cast((a % 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint> +struct<(a % 2):int,count(b):bigint> -- !query 27 output 0 5 1 5 -- !query 28 -SELECT udf(count(c)) FROM test_missing_target -GROUP BY udf(lower(test_missing_target.c)) -ORDER BY udf(lower(test_missing_target.c)) +SELECT count(c) FROM test_missing_target +GROUP BY lower(test_missing_target.c) +ORDER BY lower(test_missing_target.c) -- !query 28 schema -struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint> +struct<count(c):bigint> -- !query 28 output 2 3 -321,18 +317,18 struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint> -- !query 29 -SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b) +SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b -- !query 29 schema struct<> -- !query 29 output org.apache.spark.sql.AnalysisException -cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 80 +cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61 -- !query 30 -SELECT udf(count(b)) FROM test_missing_target GROUP BY udf(b/2) ORDER BY udf(b/2) +SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2 -- !query 30 schema -struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint> +struct<count(b):bigint> -- !query 30 output 1 5 -340,10 +336,10 struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint> -- !query 31 -SELECT udf(lower(test_missing_target.c)), udf(count(udf(c))) - FROM test_missing_target GROUP BY udf(lower(c)) ORDER BY udf(lower(c)) +SELECT lower(test_missing_target.c), count(c) + FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c) -- !query 31 schema -struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint> +struct<lower(c):string,count(c):bigint> -- !query 31 output abab 2 bbbb 3 -352,9 +348,9 xxxx 1 -- !query 32 -SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d))) +SELECT a FROM test_missing_target ORDER BY upper(d) -- !query 32 schema -struct<CAST(udf(cast(a as string)) AS INT):int> +struct<a:int> -- !query 32 output 0 1 -369,33 +365,32 struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 33 -SELECT udf(count(b)) FROM test_missing_target - GROUP BY udf((b + 1) / 2) ORDER BY udf((b + 1) / 2) desc +SELECT count(b) FROM test_missing_target + GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc -- !query 33 schema -struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint> +struct<count(b):bigint> -- !query 33 output 7 3 -- !query 34 -SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y - WHERE udf(x.a) = udf(y.a) - GROUP BY udf(b/2) ORDER BY udf(b/2) +SELECT count(x.a) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY b/2 ORDER BY b/2 -- !query 34 schema struct<> -- !query 34 output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14 +Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 -- !query 35 -SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x, -test_missing_target y - WHERE udf(x.a) = udf(y.a) - GROUP BY udf(x.b/2) ORDER BY udf(x.b/2) +SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2 ORDER BY x.b/2 -- !query 35 schema -struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint> +struct<(b div 2):int,count(b):bigint> -- !query 35 output 0 1 1 5 -403,14 +398,14 struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast( -- !query 36 -SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y - WHERE udf(x.a) = udf(y.a) - GROUP BY udf(x.b/2) +SELECT count(b) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2 -- !query 36 schema struct<> -- !query 36 output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21 +Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13 -- !query 37 ``` </p> </details> ## How was this patch tested? Tested as Guided in SPARK-27921 Closes #25350 from Udbhav30/master. Authored-by: Udbhav30 <u.agrawal30@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
da3d4b6a35
commit
150dbc5dc2
|
@ -10,8 +10,6 @@
|
|||
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql
|
||||
--
|
||||
-- This test file was converted from pgSQL/select_implicit.sql
|
||||
-- [SPARK-28445] Inconsistency between Scala and Python/Panda udfs when groupby with udf() is used
|
||||
-- TODO: We should add UDFs in GROUP BY clause when [SPARK-28445] is resolved.
|
||||
|
||||
-- load test data
|
||||
CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet;
|
||||
|
@ -29,29 +27,29 @@ INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
|
|||
|
||||
-- w/ existing GROUP BY target
|
||||
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
|
||||
test_missing_target.c
|
||||
udf(test_missing_target.c)
|
||||
ORDER BY udf(c);
|
||||
|
||||
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(test_missing_target.c)
|
||||
ORDER BY udf(c);
|
||||
|
||||
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
|
||||
-- failure expected
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b);
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b);
|
||||
|
||||
-- w/o existing GROUP BY target and w/o existing same ORDER BY target
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b);
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b);
|
||||
|
||||
-- w/ existing GROUP BY target using a relation name in target
|
||||
SELECT udf(test_missing_target.b), udf(count(*))
|
||||
FROM test_missing_target GROUP BY b ORDER BY udf(b);
|
||||
FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b);
|
||||
|
||||
-- w/o existing GROUP BY target
|
||||
SELECT udf(c) FROM test_missing_target ORDER BY udf(a);
|
||||
|
||||
-- w/o existing ORDER BY target
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc;
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b) desc;
|
||||
|
||||
-- group using reference number
|
||||
SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc;
|
||||
|
@ -67,7 +65,7 @@ SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3;
|
|||
-- failure expected
|
||||
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY b ORDER BY udf(b);
|
||||
GROUP BY udf(b) ORDER BY udf(b);
|
||||
|
||||
-- order w/ target under ambiguous condition
|
||||
-- failure NOT expected
|
||||
|
@ -82,17 +80,17 @@ SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
|
|||
-- group expression w/ target under ambiguous condition
|
||||
-- failure NOT expected
|
||||
SELECT udf(a/2), udf(a/2) FROM test_missing_target
|
||||
GROUP BY a/2 ORDER BY udf(a/2);
|
||||
GROUP BY udf(a/2) ORDER BY udf(a/2);
|
||||
|
||||
-- group w/ existing GROUP BY target under ambiguous condition
|
||||
SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b ORDER BY udf(x.b);
|
||||
GROUP BY udf(x.b) ORDER BY udf(x.b);
|
||||
|
||||
-- group w/o existing GROUP BY target under ambiguous condition
|
||||
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b ORDER BY udf(x.b);
|
||||
GROUP BY udf(x.b) ORDER BY udf(x.b);
|
||||
|
||||
-- [SPARK-28329] SELECT INTO syntax
|
||||
-- group w/o existing GROUP BY target under ambiguous condition
|
||||
|
@ -107,50 +105,50 @@ SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
|
|||
-- Functions and expressions
|
||||
|
||||
-- w/ existing GROUP BY target
|
||||
SELECT a%2, udf(count(udf(b))) FROM test_missing_target
|
||||
GROUP BY test_missing_target.a%2
|
||||
SELECT udf(a%2), udf(count(udf(b))) FROM test_missing_target
|
||||
GROUP BY udf(test_missing_target.a%2)
|
||||
ORDER BY udf(test_missing_target.a%2);
|
||||
|
||||
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
|
||||
SELECT udf(count(c)) FROM test_missing_target
|
||||
GROUP BY lower(test_missing_target.c)
|
||||
GROUP BY udf(lower(test_missing_target.c))
|
||||
ORDER BY udf(lower(test_missing_target.c));
|
||||
|
||||
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
|
||||
-- failure expected
|
||||
SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b);
|
||||
SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b);
|
||||
|
||||
-- w/o existing GROUP BY target and w/o existing same ORDER BY target
|
||||
SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2);
|
||||
SELECT udf(count(b)) FROM test_missing_target GROUP BY udf(b/2) ORDER BY udf(b/2);
|
||||
|
||||
-- w/ existing GROUP BY target using a relation name in target
|
||||
SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
|
||||
FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c));
|
||||
FROM test_missing_target GROUP BY udf(lower(c)) ORDER BY udf(lower(c));
|
||||
|
||||
-- w/o existing GROUP BY target
|
||||
SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)));
|
||||
|
||||
-- w/o existing ORDER BY target
|
||||
SELECT udf(count(b)) FROM test_missing_target
|
||||
GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc;
|
||||
GROUP BY udf((b + 1) / 2) ORDER BY udf((b + 1) / 2) desc;
|
||||
|
||||
-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition
|
||||
-- failure expected
|
||||
SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY b/2 ORDER BY udf(b/2);
|
||||
GROUP BY udf(b/2) ORDER BY udf(b/2);
|
||||
|
||||
-- group w/ existing GROUP BY target under ambiguous condition
|
||||
SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
|
||||
test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b/2 ORDER BY udf(x.b/2);
|
||||
GROUP BY udf(x.b/2) ORDER BY udf(x.b/2);
|
||||
|
||||
-- group w/o existing GROUP BY target under ambiguous condition
|
||||
-- failure expected due to ambiguous b in count(b)
|
||||
SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b/2;
|
||||
GROUP BY udf(x.b/2);
|
||||
|
||||
-- [SPARK-28329] SELECT INTO syntax
|
||||
-- group w/o existing GROUP BY target under ambiguous condition
|
||||
|
|
|
@ -92,7 +92,7 @@ struct<>
|
|||
|
||||
-- !query 11
|
||||
SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
|
||||
test_missing_target.c
|
||||
udf(test_missing_target.c)
|
||||
ORDER BY udf(c)
|
||||
-- !query 11 schema
|
||||
struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
|
@ -106,7 +106,7 @@ cccc 2
|
|||
|
||||
|
||||
-- !query 12
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY test_missing_target.c
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(test_missing_target.c)
|
||||
ORDER BY udf(c)
|
||||
-- !query 12 schema
|
||||
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
|
@ -120,16 +120,16 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
|||
|
||||
|
||||
-- !query 13
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY a ORDER BY udf(b)
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
|
||||
-- !query 13 schema
|
||||
struct<>
|
||||
-- !query 13 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 70
|
||||
cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 75
|
||||
|
||||
|
||||
-- !query 14
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b)
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
|
||||
-- !query 14 schema
|
||||
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
-- !query 14 output
|
||||
|
@ -141,7 +141,7 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
|||
|
||||
-- !query 15
|
||||
SELECT udf(test_missing_target.b), udf(count(*))
|
||||
FROM test_missing_target GROUP BY b ORDER BY udf(b)
|
||||
FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
|
||||
-- !query 15 schema
|
||||
struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
-- !query 15 output
|
||||
|
@ -169,7 +169,7 @@ CCCC
|
|||
|
||||
|
||||
-- !query 17
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY b ORDER BY udf(b) desc
|
||||
SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b) desc
|
||||
-- !query 17 schema
|
||||
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
-- !query 17 output
|
||||
|
@ -212,12 +212,12 @@ GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
|
|||
-- !query 21
|
||||
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY b ORDER BY udf(b)
|
||||
GROUP BY udf(b) ORDER BY udf(b)
|
||||
-- !query 21 schema
|
||||
struct<>
|
||||
-- !query 21 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
|
||||
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
|
||||
|
||||
|
||||
-- !query 22
|
||||
|
@ -258,7 +258,7 @@ struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS
|
|||
|
||||
-- !query 24
|
||||
SELECT udf(a/2), udf(a/2) FROM test_missing_target
|
||||
GROUP BY a/2 ORDER BY udf(a/2)
|
||||
GROUP BY udf(a/2) ORDER BY udf(a/2)
|
||||
-- !query 24 schema
|
||||
struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int>
|
||||
-- !query 24 output
|
||||
|
@ -272,7 +272,7 @@ struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) a
|
|||
-- !query 25
|
||||
SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b ORDER BY udf(x.b)
|
||||
GROUP BY udf(x.b) ORDER BY udf(x.b)
|
||||
-- !query 25 schema
|
||||
struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
-- !query 25 output
|
||||
|
@ -285,7 +285,7 @@ struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)
|
|||
-- !query 26
|
||||
SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b ORDER BY udf(x.b)
|
||||
GROUP BY udf(x.b) ORDER BY udf(x.b)
|
||||
-- !query 26 schema
|
||||
struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
||||
-- !query 26 output
|
||||
|
@ -296,11 +296,11 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
|
|||
|
||||
|
||||
-- !query 27
|
||||
SELECT a%2, udf(count(udf(b))) FROM test_missing_target
|
||||
GROUP BY test_missing_target.a%2
|
||||
SELECT udf(a%2), udf(count(udf(b))) FROM test_missing_target
|
||||
GROUP BY udf(test_missing_target.a%2)
|
||||
ORDER BY udf(test_missing_target.a%2)
|
||||
-- !query 27 schema
|
||||
struct<(a % 2):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
|
||||
struct<CAST(udf(cast((a % 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
|
||||
-- !query 27 output
|
||||
0 5
|
||||
1 5
|
||||
|
@ -308,7 +308,7 @@ struct<(a % 2):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as s
|
|||
|
||||
-- !query 28
|
||||
SELECT udf(count(c)) FROM test_missing_target
|
||||
GROUP BY lower(test_missing_target.c)
|
||||
GROUP BY udf(lower(test_missing_target.c))
|
||||
ORDER BY udf(lower(test_missing_target.c))
|
||||
-- !query 28 schema
|
||||
struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
|
||||
|
@ -320,16 +320,16 @@ struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
|
|||
|
||||
|
||||
-- !query 29
|
||||
SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY a ORDER BY udf(b)
|
||||
SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
|
||||
-- !query 29 schema
|
||||
struct<>
|
||||
-- !query 29 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 75
|
||||
cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 80
|
||||
|
||||
|
||||
-- !query 30
|
||||
SELECT udf(count(b)) FROM test_missing_target GROUP BY b/2 ORDER BY udf(b/2)
|
||||
SELECT udf(count(b)) FROM test_missing_target GROUP BY udf(b/2) ORDER BY udf(b/2)
|
||||
-- !query 30 schema
|
||||
struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
|
||||
-- !query 30 output
|
||||
|
@ -340,7 +340,7 @@ struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
|
|||
|
||||
-- !query 31
|
||||
SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
|
||||
FROM test_missing_target GROUP BY lower(c) ORDER BY udf(lower(c))
|
||||
FROM test_missing_target GROUP BY udf(lower(c)) ORDER BY udf(lower(c))
|
||||
-- !query 31 schema
|
||||
struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint>
|
||||
-- !query 31 output
|
||||
|
@ -369,7 +369,7 @@ struct<CAST(udf(cast(a as string)) AS INT):int>
|
|||
|
||||
-- !query 33
|
||||
SELECT udf(count(b)) FROM test_missing_target
|
||||
GROUP BY (b + 1) / 2 ORDER BY udf((b + 1) / 2) desc
|
||||
GROUP BY udf((b + 1) / 2) ORDER BY udf((b + 1) / 2) desc
|
||||
-- !query 33 schema
|
||||
struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
|
||||
-- !query 33 output
|
||||
|
@ -380,19 +380,19 @@ struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
|
|||
-- !query 34
|
||||
SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY b/2 ORDER BY udf(b/2)
|
||||
GROUP BY udf(b/2) ORDER BY udf(b/2)
|
||||
-- !query 34 schema
|
||||
struct<>
|
||||
-- !query 34 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
|
||||
Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
|
||||
|
||||
|
||||
-- !query 35
|
||||
SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
|
||||
test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b/2 ORDER BY udf(x.b/2)
|
||||
GROUP BY udf(x.b/2) ORDER BY udf(x.b/2)
|
||||
-- !query 35 schema
|
||||
struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
|
||||
-- !query 35 output
|
||||
|
@ -404,7 +404,7 @@ struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(
|
|||
-- !query 36
|
||||
SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
|
||||
WHERE udf(x.a) = udf(y.a)
|
||||
GROUP BY x.b/2
|
||||
GROUP BY udf(x.b/2)
|
||||
-- !query 36 schema
|
||||
struct<>
|
||||
-- !query 36 output
|
||||
|
|
Loading…
Reference in a new issue