[SPARK-28275][SQL][PYTHON][TESTS] Convert and port 'count.sql' into UDF test base

## What changes were proposed in this pull request?

This PR adds some tests converted from 'count.sql' to test UDFs

<details><summary>Diff comparing to 'count.sql'</summary>
<p>

```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
index b8a86d4c44..9476937abd 100644
--- a/sql/core/src/test/resources/sql-tests/results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
 -14,42 +14,42  struct<>

 -- !query 1
 SELECT
-  count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b))
+  udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
 FROM testData
 -- !query 1 schema
-struct<count(1):bigint,count(1):bigint,count(NULL):bigint,count(a):bigint,count(b):bigint,count((a + b)):bigint,count(named_struct(a, a, b, b)):bigint>
+struct<udf(count(1)):string,udf(count(1)):string,udf(count(null)):string,udf(count(a)):string,udf(count(b)):string,udf(count((a + b))):string,udf(count(named_struct(a, a, b, b))):string>
 -- !query 1 output
 7	7	0	5	5	4	7

 -- !query 2
 SELECT
-  count(DISTINCT 1),
-  count(DISTINCT null),
-  count(DISTINCT a),
-  count(DISTINCT b),
-  count(DISTINCT (a + b)),
-  count(DISTINCT (a, b))
+  udf(count(DISTINCT 1)),
+  udf(count(DISTINCT null)),
+  udf(count(DISTINCT a)),
+  udf(count(DISTINCT b)),
+  udf(count(DISTINCT (a + b))),
+  udf(count(DISTINCT (a, b)))
 FROM testData
 -- !query 2 schema
-struct<count(DISTINCT 1):bigint,count(DISTINCT NULL):bigint,count(DISTINCT a):bigint,count(DISTINCT b):bigint,count(DISTINCT (a + b)):bigint,count(DISTINCT named_struct(a, a, b, b)):bigint>
+struct<udf(count(distinct 1)):string,udf(count(distinct null)):string,udf(count(distinct a)):string,udf(count(distinct b)):string,udf(count(distinct (a + b))):string,udf(count(distinct named_struct(a, a, b, b))):string>
 -- !query 2 output
 1	0	2	2	2	6

 -- !query 3
-SELECT count(a, b), count(b, a), count(testData.*) FROM testData
+SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData
 -- !query 3 schema
-struct<count(a, b):bigint,count(b, a):bigint,count(a, b):bigint>
+struct<udf(count(a, b)):string,udf(count(b, a)):string,udf(count(a, b)):string>
 -- !query 3 output
 4	4	4

 -- !query 4
 SELECT
-  count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*)
+  udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
 FROM testData
 -- !query 4 schema
-struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint>
+struct<udf(count(distinct a, b)):string,udf(count(distinct b, a)):string,udf(count(distinct a, b)):string,udf(count(distinct a, b)):string>
 -- !query 4 output
 3	3	3	3

```

</p>
</details>

## How was this patch tested?

Tested as guided in SPARK-27921.

Closes #25089 from vinodkc/br_Fix_SPARK-28275.

Authored-by: Vinod KC <vinod.kc.in@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Vinod KC 2019-07-11 09:39:53 +09:00 committed by HyukjinKwon
parent 06ac7d5966
commit b598dfd5b4
2 changed files with 83 additions and 0 deletions

View file

@ -0,0 +1,28 @@
-- This test file was converted from count.sql
-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
AS testData(a, b);
-- count with single expression
SELECT
udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
FROM testData;
-- distinct count with single expression
SELECT
udf(count(DISTINCT 1)),
udf(count(DISTINCT null)),
udf(count(DISTINCT a)),
udf(count(DISTINCT b)),
udf(count(DISTINCT (a + b))),
udf(count(DISTINCT (a, b)))
FROM testData;
-- count with multiple expressions
SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData;
-- distinct count with multiple expressions
SELECT
udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
FROM testData;

View file

@ -0,0 +1,55 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 5
-- !query 0
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
AS testData(a, b)
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
SELECT
udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
FROM testData
-- !query 1 schema
struct<udf(count(1)):string,udf(count(1)):string,udf(count(null)):string,udf(count(a)):string,udf(count(b)):string,udf(count((a + b))):string,udf(count(named_struct(a, a, b, b))):string>
-- !query 1 output
7 7 0 5 5 4 7
-- !query 2
SELECT
udf(count(DISTINCT 1)),
udf(count(DISTINCT null)),
udf(count(DISTINCT a)),
udf(count(DISTINCT b)),
udf(count(DISTINCT (a + b))),
udf(count(DISTINCT (a, b)))
FROM testData
-- !query 2 schema
struct<udf(count(distinct 1)):string,udf(count(distinct null)):string,udf(count(distinct a)):string,udf(count(distinct b)):string,udf(count(distinct (a + b))):string,udf(count(distinct named_struct(a, a, b, b))):string>
-- !query 2 output
1 0 2 2 2 6
-- !query 3
SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData
-- !query 3 schema
struct<udf(count(a, b)):string,udf(count(b, a)):string,udf(count(a, b)):string>
-- !query 3 output
4 4 4
-- !query 4
SELECT
udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
FROM testData
-- !query 4 schema
struct<udf(count(distinct a, b)):string,udf(count(distinct b, a)):string,udf(count(distinct a, b)):string,udf(count(distinct a, b)):string>
-- !query 4 output
3 3 3 3