[SPARK-25736][SQL][TEST] add tests to verify the behavior of multi-column count
## What changes were proposed in this pull request? AFAIK multi-column count is not widely supported by the mainstream databases(postgres doesn't support), and the SQL standard doesn't define it clearly, as near as I can tell. Since Spark supports it, we should clearly document the current behavior and add tests to verify it. ## How was this patch tested? N/A Closes #22728 from cloud-fan/doc. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: hyukjinkwon <gurwls223@apache.org>
This commit is contained in:
parent
5c7f6b6636
commit
e028fd3aed
|
@ -52,7 +52,7 @@ abstract class CountLike extends DeclarativeAggregate {
|
|||
usage = """
|
||||
_FUNC_(*) - Returns the total number of retrieved rows, including rows containing null.
|
||||
|
||||
_FUNC_(expr) - Returns the number of rows for which the supplied expression is non-null.
|
||||
_FUNC_(expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are all non-null.
|
||||
|
||||
_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-null.
|
||||
""")
|
||||
|
|
27
sql/core/src/test/resources/sql-tests/inputs/count.sql
Normal file
27
sql/core/src/test/resources/sql-tests/inputs/count.sql
Normal file
|
@ -0,0 +1,27 @@
|
|||
-- Test data.
|
||||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
|
||||
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
|
||||
AS testData(a, b);
|
||||
|
||||
-- count with single expression
|
||||
SELECT
|
||||
count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b))
|
||||
FROM testData;
|
||||
|
||||
-- distinct count with single expression
|
||||
SELECT
|
||||
count(DISTINCT 1),
|
||||
count(DISTINCT null),
|
||||
count(DISTINCT a),
|
||||
count(DISTINCT b),
|
||||
count(DISTINCT (a + b)),
|
||||
count(DISTINCT (a, b))
|
||||
FROM testData;
|
||||
|
||||
-- count with multiple expressions
|
||||
SELECT count(a, b), count(b, a), count(testData.*) FROM testData;
|
||||
|
||||
-- distinct count with multiple expressions
|
||||
SELECT
|
||||
count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*)
|
||||
FROM testData;
|
55
sql/core/src/test/resources/sql-tests/results/count.sql.out
Normal file
55
sql/core/src/test/resources/sql-tests/results/count.sql.out
Normal file
|
@ -0,0 +1,55 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 5
|
||||
|
||||
|
||||
-- !query 0
|
||||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
|
||||
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
|
||||
AS testData(a, b)
|
||||
-- !query 0 schema
|
||||
struct<>
|
||||
-- !query 0 output
|
||||
|
||||
|
||||
|
||||
-- !query 1
|
||||
SELECT
|
||||
count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b))
|
||||
FROM testData
|
||||
-- !query 1 schema
|
||||
struct<count(1):bigint,count(1):bigint,count(NULL):bigint,count(a):bigint,count(b):bigint,count((a + b)):bigint,count(named_struct(a, a, b, b)):bigint>
|
||||
-- !query 1 output
|
||||
7 7 0 5 5 4 7
|
||||
|
||||
|
||||
-- !query 2
|
||||
SELECT
|
||||
count(DISTINCT 1),
|
||||
count(DISTINCT null),
|
||||
count(DISTINCT a),
|
||||
count(DISTINCT b),
|
||||
count(DISTINCT (a + b)),
|
||||
count(DISTINCT (a, b))
|
||||
FROM testData
|
||||
-- !query 2 schema
|
||||
struct<count(DISTINCT 1):bigint,count(DISTINCT NULL):bigint,count(DISTINCT a):bigint,count(DISTINCT b):bigint,count(DISTINCT (a + b)):bigint,count(DISTINCT named_struct(a, a, b, b)):bigint>
|
||||
-- !query 2 output
|
||||
1 0 2 2 2 6
|
||||
|
||||
|
||||
-- !query 3
|
||||
SELECT count(a, b), count(b, a), count(testData.*) FROM testData
|
||||
-- !query 3 schema
|
||||
struct<count(a, b):bigint,count(b, a):bigint,count(a, b):bigint>
|
||||
-- !query 3 output
|
||||
4 4 4
|
||||
|
||||
|
||||
-- !query 4
|
||||
SELECT
|
||||
count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*)
|
||||
FROM testData
|
||||
-- !query 4 schema
|
||||
struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint>
|
||||
-- !query 4 output
|
||||
3 3 3 3
|
Loading…
Reference in a new issue