[SPARK-28275][SQL][PYTHON][TESTS] Convert and port 'count.sql' into UDF test base
## What changes were proposed in this pull request? This PR adds some tests converted from 'count.sql' to test UDFs <details><summary>Diff comparing to 'count.sql'</summary> <p> ```diff diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out index b8a86d4c44..9476937abd 100644 --- a/sql/core/src/test/resources/sql-tests/results/count.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out -14,42 +14,42 struct<> -- !query 1 SELECT - count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b)) + udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b))) FROM testData -- !query 1 schema -struct<count(1):bigint,count(1):bigint,count(NULL):bigint,count(a):bigint,count(b):bigint,count((a + b)):bigint,count(named_struct(a, a, b, b)):bigint> +struct<udf(count(1)):string,udf(count(1)):string,udf(count(null)):string,udf(count(a)):string,udf(count(b)):string,udf(count((a + b))):string,udf(count(named_struct(a, a, b, b))):string> -- !query 1 output 7 7 0 5 5 4 7 -- !query 2 SELECT - count(DISTINCT 1), - count(DISTINCT null), - count(DISTINCT a), - count(DISTINCT b), - count(DISTINCT (a + b)), - count(DISTINCT (a, b)) + udf(count(DISTINCT 1)), + udf(count(DISTINCT null)), + udf(count(DISTINCT a)), + udf(count(DISTINCT b)), + udf(count(DISTINCT (a + b))), + udf(count(DISTINCT (a, b))) FROM testData -- !query 2 schema -struct<count(DISTINCT 1):bigint,count(DISTINCT NULL):bigint,count(DISTINCT a):bigint,count(DISTINCT b):bigint,count(DISTINCT (a + b)):bigint,count(DISTINCT named_struct(a, a, b, b)):bigint> +struct<udf(count(distinct 1)):string,udf(count(distinct null)):string,udf(count(distinct a)):string,udf(count(distinct b)):string,udf(count(distinct (a + b))):string,udf(count(distinct named_struct(a, a, b, b))):string> -- !query 2 output 1 0 2 2 2 6 -- !query 3 -SELECT count(a, b), count(b, a), count(testData.*) FROM testData +SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData -- !query 3 schema -struct<count(a, b):bigint,count(b, a):bigint,count(a, b):bigint> +struct<udf(count(a, b)):string,udf(count(b, a)):string,udf(count(a, b)):string> -- !query 3 output 4 4 4 -- !query 4 SELECT - count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*) + udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*)) FROM testData -- !query 4 schema -struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint> +struct<udf(count(distinct a, b)):string,udf(count(distinct b, a)):string,udf(count(distinct a, b)):string,udf(count(distinct a, b)):string> -- !query 4 output 3 3 3 3 ``` </p> </details> ## How was this patch tested? Tested as guided in SPARK-27921. Closes #25089 from vinodkc/br_Fix_SPARK-28275. Authored-by: Vinod KC <vinod.kc.in@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
06ac7d5966
commit
b598dfd5b4
|
@ -0,0 +1,28 @@
|
|||
-- This test file was converted from count.sql
|
||||
-- Test data.
|
||||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
|
||||
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
|
||||
AS testData(a, b);
|
||||
|
||||
-- count with single expression
|
||||
SELECT
|
||||
udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
|
||||
FROM testData;
|
||||
|
||||
-- distinct count with single expression
|
||||
SELECT
|
||||
udf(count(DISTINCT 1)),
|
||||
udf(count(DISTINCT null)),
|
||||
udf(count(DISTINCT a)),
|
||||
udf(count(DISTINCT b)),
|
||||
udf(count(DISTINCT (a + b))),
|
||||
udf(count(DISTINCT (a, b)))
|
||||
FROM testData;
|
||||
|
||||
-- count with multiple expressions
|
||||
SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData;
|
||||
|
||||
-- distinct count with multiple expressions
|
||||
SELECT
|
||||
udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
|
||||
FROM testData;
|
|
@ -0,0 +1,55 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 5
|
||||
|
||||
|
||||
-- !query 0
|
||||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
|
||||
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
|
||||
AS testData(a, b)
|
||||
-- !query 0 schema
|
||||
struct<>
|
||||
-- !query 0 output
|
||||
|
||||
|
||||
|
||||
-- !query 1
|
||||
SELECT
|
||||
udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
|
||||
FROM testData
|
||||
-- !query 1 schema
|
||||
struct<udf(count(1)):string,udf(count(1)):string,udf(count(null)):string,udf(count(a)):string,udf(count(b)):string,udf(count((a + b))):string,udf(count(named_struct(a, a, b, b))):string>
|
||||
-- !query 1 output
|
||||
7 7 0 5 5 4 7
|
||||
|
||||
|
||||
-- !query 2
|
||||
SELECT
|
||||
udf(count(DISTINCT 1)),
|
||||
udf(count(DISTINCT null)),
|
||||
udf(count(DISTINCT a)),
|
||||
udf(count(DISTINCT b)),
|
||||
udf(count(DISTINCT (a + b))),
|
||||
udf(count(DISTINCT (a, b)))
|
||||
FROM testData
|
||||
-- !query 2 schema
|
||||
struct<udf(count(distinct 1)):string,udf(count(distinct null)):string,udf(count(distinct a)):string,udf(count(distinct b)):string,udf(count(distinct (a + b))):string,udf(count(distinct named_struct(a, a, b, b))):string>
|
||||
-- !query 2 output
|
||||
1 0 2 2 2 6
|
||||
|
||||
|
||||
-- !query 3
|
||||
SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData
|
||||
-- !query 3 schema
|
||||
struct<udf(count(a, b)):string,udf(count(b, a)):string,udf(count(a, b)):string>
|
||||
-- !query 3 output
|
||||
4 4 4
|
||||
|
||||
|
||||
-- !query 4
|
||||
SELECT
|
||||
udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
|
||||
FROM testData
|
||||
-- !query 4 schema
|
||||
struct<udf(count(distinct a, b)):string,udf(count(distinct b, a)):string,udf(count(distinct a, b)):string,udf(count(distinct a, b)):string>
|
||||
-- !query 4 output
|
||||
3 3 3 3
|
Loading…
Reference in a new issue