Revert "[SPARK-29107][SQL][TESTS] Port window.sql (Part 1)"

This reverts commit 81915dacc4.
This commit is contained in:
Dongjoon Hyun 2019-10-14 15:15:32 -07:00
parent bfa09cf049
commit ff9fcd501c
2 changed files with 0 additions and 1250 deletions

View file

@ -1,348 +0,0 @@
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
--
-- Window Functions Testing
-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L1-L319
CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
CREATE TABLE empsalary (
depname string,
empno integer,
salary int,
enroll_date date
) USING parquet;
INSERT INTO empsalary VALUES
('develop', 10, 5200, '2007-08-01'),
('sales', 1, 5000, '2006-10-01'),
('personnel', 5, 3500, '2007-12-10'),
('sales', 4, 4800, '2007-08-08'),
('personnel', 2, 3900, '2006-12-23'),
('develop', 7, 4200, '2008-01-01'),
('develop', 9, 4500, '2008-01-01'),
('sales', 3, 4800, '2007-08-01'),
('develop', 8, 6000, '2006-10-01'),
('develop', 11, 5200, '2007-08-15');
SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary;
SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary;
-- with GROUP BY
SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
GROUP BY four, ten ORDER BY four, ten;
SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname);
-- [SPARK-28064] Order by does not accept a call to rank()
-- SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w;
-- empty window specification
SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10;
SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ();
-- no window operation
SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten);
-- cumulative aggregate
SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10;
SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10;
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10;
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 < 10;
-- [SPARK-28065] ntile does not accept NULL as input
-- SELECT ntile(NULL) OVER (ORDER BY ten, four), ten, four FROM tenk1 LIMIT 2;
SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
-- [SPARK-28068] `lag` second argument must be a literal in Spark
-- SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
-- [SPARK-28068] `lag` second argument must be a literal in Spark
-- SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
-- last returns the last row of the frame, which is CURRENT ROW in ORDER BY window.
SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
ORDER BY four, ten;
-- [SPARK-27951] ANSI SQL: NTH_VALUE function
-- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four
-- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s;
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY two ORDER BY ten) AS wsum
FROM tenk1 GROUP BY ten, two;
SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10;
SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
FROM tenk1 WHERE unique2 < 10;
-- opexpr with different windows evaluation.
SELECT * FROM(
SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
FROM tenk1
)sub WHERE total <> fourcount + twosum;
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10;
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten);
-- more than one window with GROUP BY
SELECT sum(salary),
row_number() OVER (ORDER BY depname),
sum(sum(salary)) OVER (ORDER BY depname DESC)
FROM empsalary GROUP BY depname;
-- identical windows with different names
SELECT sum(salary) OVER w1, count(*) OVER w2
FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary);
-- subplan
-- [SPARK-28379] Correlated scalar subqueries must be aggregated
-- SELECT lead(ten, (SELECT two FROM tenk1 WHERE s.unique2 = unique2)) OVER (PARTITION BY four ORDER BY ten)
-- FROM tenk1 s WHERE unique2 < 10;
-- empty table
SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s;
-- mixture of agg/wfunc in the same window
SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
-- strict aggs
-- Temporarily turns off the ANSI mode because of compatibility issues between
-- keywords related to date (in this case, year)
SET spark.sql.parser.ansi.enabled=false;
SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
SELECT *,
CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
CASE WHEN
AVG(salary) OVER (PARTITION BY depname) < salary
THEN 200 END AS depadj FROM empsalary
)s;
SET spark.sql.parser.ansi.enabled=true;
create temporary view int4_tbl as select * from values
(0),
(123456),
(-123456),
(2147483647),
(-2147483647)
as int4_tbl(f1);
-- window function over ungrouped agg over empty row set (bug before 9.1)
SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42;
-- window function with ORDER BY an expression involving aggregates (9.1 bug)
select ten,
sum(unique1) + sum(unique2) as res,
rank() over (order by sum(unique1) + sum(unique2)) as rank
from tenk1
group by ten order by ten;
-- window and aggregate with GROUP BY expression (9.2 bug)
-- explain
-- select first(max(x)) over (), y
-- from (select unique1 as x, ten+four as y from tenk1) ss
-- group by y;
-- test non-default frame specifications
SELECT four, ten,
sum(ten) over (partition by four order by ten),
last(ten) over (partition by four order by ten)
FROM (select distinct ten, four from tenk1) ss;
SELECT four, ten,
sum(ten) over (partition by four order by ten range between unbounded preceding and current row),
last(ten) over (partition by four order by ten range between unbounded preceding and current row)
FROM (select distinct ten, four from tenk1) ss;
SELECT four, ten,
sum(ten) over (partition by four order by ten range between unbounded preceding and unbounded following),
last(ten) over (partition by four order by ten range between unbounded preceding and unbounded following)
FROM (select distinct ten, four from tenk1) ss;
-- [SPARK-29451] Some queries with divisions in SQL windows are failling in Thrift
-- SELECT four, ten/4 as two,
-- sum(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row),
-- last(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row)
-- FROM (select distinct ten, four from tenk1) ss;
-- [SPARK-29451] Some queries with divisions in SQL windows are failling in Thrift
-- SELECT four, ten/4 as two,
-- sum(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row),
-- last(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row)
-- FROM (select distinct ten, four from tenk1) ss;
SELECT sum(unique1) over (order by four range between current row and unbounded following),
unique1, four
FROM tenk1 WHERE unique1 < 10;
SELECT sum(unique1) over (rows between current row and unbounded following),
unique1, four
FROM tenk1 WHERE unique1 < 10;
SELECT sum(unique1) over (rows between 2 preceding and 2 following),
unique1, four
FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;
SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
unique1, four
FROM tenk1 WHERE unique1 < 10;
SELECT sum(unique1) over (rows between 1 following and 3 following),
unique1, four
FROM tenk1 WHERE unique1 < 10;
SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
unique1, four
FROM tenk1 WHERE unique1 < 10;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (w range between current row and unbounded following),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude group),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude ties),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
-- [SPARK-27951] ANSI SQL: NTH_VALUE function
-- SELECT first_value(unique1) over w,
-- nth_value(unique1, 2) over w AS nth_2,
-- last_value(unique1) over w, unique1, four
-- FROM tenk1 WHERE unique1 < 10
-- WINDOW w AS (order by four range between current row and unbounded following);
-- [SPARK-28501] Frame bound value must be a literal.
-- SELECT sum(unique1) over
-- (order by unique1
-- rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
-- unique1
-- FROM tenk1 WHERE unique1 < 10;
CREATE TEMP VIEW v_window AS
SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
FROM range(1, 11) i;
SELECT * FROM v_window;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- CREATE OR REPLACE TEMP VIEW v_window AS
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
-- exclude current row) as sum_rows FROM range(1, 10) i;
-- SELECT * FROM v_window;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- CREATE OR REPLACE TEMP VIEW v_window AS
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
-- exclude group) as sum_rows FROM range(1, 10) i;
-- SELECT * FROM v_window;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- CREATE OR REPLACE TEMP VIEW v_window AS
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
-- exclude ties) as sum_rows FROM generate_series(1, 10) i;
-- [SPARK-28428] Spark `exclude` always expecting `()`
-- CREATE OR REPLACE TEMP VIEW v_window AS
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
-- exclude no others) as sum_rows FROM generate_series(1, 10) i;
-- SELECT * FROM v_window;
-- [SPARK-28648] Adds support to `groups` unit type in window clauses
-- CREATE OR REPLACE TEMP VIEW v_window AS
-- SELECT i.id, sum(i.id) over (order by i.id groups between 1 preceding and 1 following) as sum_rows FROM range(1, 11) i;
-- SELECT * FROM v_window;
DROP VIEW v_window;
DROP TABLE empsalary;
DROP VIEW tenk2;
DROP VIEW int4_tbl;

View file

@ -1,902 +0,0 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 55
-- !query 0
CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
CREATE TABLE empsalary (
depname string,
empno integer,
salary int,
enroll_date date
) USING parquet
-- !query 1 schema
struct<>
-- !query 1 output
-- !query 2
INSERT INTO empsalary VALUES
('develop', 10, 5200, '2007-08-01'),
('sales', 1, 5000, '2006-10-01'),
('personnel', 5, 3500, '2007-12-10'),
('sales', 4, 4800, '2007-08-08'),
('personnel', 2, 3900, '2006-12-23'),
('develop', 7, 4200, '2008-01-01'),
('develop', 9, 4500, '2008-01-01'),
('sales', 3, 4800, '2007-08-01'),
('develop', 8, 6000, '2006-10-01'),
('develop', 11, 5200, '2007-08-15')
-- !query 2 schema
struct<>
-- !query 2 output
-- !query 3
SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary
-- !query 3 schema
struct<depname:string,empno:int,salary:int,sum(CAST(salary AS BIGINT)) OVER (PARTITION BY depname ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
-- !query 3 output
develop 7 4200 25100
develop 9 4500 25100
develop 10 5200 25100
develop 11 5200 25100
develop 8 6000 25100
personnel 5 3500 7400
personnel 2 3900 7400
sales 4 4800 14600
sales 3 4800 14600
sales 1 5000 14600
-- !query 4
SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary
-- !query 4 schema
struct<depname:string,empno:int,salary:int,RANK() OVER (PARTITION BY depname ORDER BY salary ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
-- !query 4 output
develop 10 5200 3
develop 11 5200 3
develop 7 4200 1
develop 8 6000 5
develop 9 4500 2
personnel 2 3900 2
personnel 5 3500 1
sales 1 5000 3
sales 3 4800 1
sales 4 4800 1
-- !query 5
SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
GROUP BY four, ten ORDER BY four, ten
-- !query 5 schema
struct<four:int,ten:int,sum(sum(CAST(four AS BIGINT))) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(ten):double>
-- !query 5 output
0 0 0 0.0
0 2 0 2.0
0 4 0 4.0
0 6 0 6.0
0 8 0 8.0
1 1 2500 1.0
1 3 2500 3.0
1 5 2500 5.0
1 7 2500 7.0
1 9 2500 9.0
2 0 5000 0.0
2 2 5000 2.0
2 4 5000 4.0
2 6 5000 6.0
2 8 5000 8.0
3 1 7500 1.0
3 3 7500 3.0
3 5 7500 5.0
3 7 7500 7.0
3 9 7500 9.0
-- !query 6
SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname)
-- !query 6 schema
struct<depname:string,empno:int,salary:int,sum(CAST(salary AS BIGINT)) OVER (PARTITION BY depname ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
-- !query 6 output
develop 10 5200 25100
develop 11 5200 25100
develop 7 4200 25100
develop 8 6000 25100
develop 9 4500 25100
personnel 2 3900 7400
personnel 5 3500 7400
sales 1 5000 14600
sales 3 4800 14600
sales 4 4800 14600
-- !query 7
SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10
-- !query 7 schema
struct<count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
-- !query 7 output
10
10
10
10
10
10
10
10
10
10
-- !query 8
SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ()
-- !query 8 schema
struct<count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
-- !query 8 output
10
10
10
10
10
10
10
10
10
10
-- !query 9
SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten)
-- !query 9 schema
struct<four:int>
-- !query 9 output
-- !query 10
SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10
-- !query 10 schema
struct<sum_1:bigint,ten:int,four:int>
-- !query 10 output
0 0 0
0 0 0
0 4 0
1 7 1
1 9 1
2 0 2
3 1 3
3 3 3
4 1 1
5 1 1
-- !query 11
SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10
-- !query 11 schema
struct<row_number() OVER (ORDER BY unique2 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
-- !query 11 output
1
10
2
3
4
5
6
7
8
9
-- !query 12
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10
-- !query 12 schema
struct<rank_1:int,ten:int,four:int>
-- !query 12 output
1 0 0
1 0 0
1 0 2
1 1 1
1 1 1
1 1 3
2 3 3
3 4 0
3 7 1
4 9 1
-- !query 13
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 13 schema
struct<DENSE_RANK() OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
-- !query 13 output
1 0 0
1 0 0
1 0 2
1 1 1
1 1 1
1 1 3
2 3 3
2 4 0
2 7 1
3 9 1
-- !query 14
SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 14 schema
struct<PERCENT_RANK() OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,ten:int,four:int>
-- !query 14 output
0.0 0 0
0.0 0 0
0.0 0 2
0.0 1 1
0.0 1 1
0.0 1 3
0.6666666666666666 7 1
1.0 3 3
1.0 4 0
1.0 9 1
-- !query 15
SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 15 schema
struct<cume_dist() OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,ten:int,four:int>
-- !query 15 output
0.5 1 1
0.5 1 1
0.5 1 3
0.6666666666666666 0 0
0.6666666666666666 0 0
0.75 7 1
1.0 0 2
1.0 3 3
1.0 4 0
1.0 9 1
-- !query 16
SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 16 schema
struct<ntile(3) OVER (ORDER BY ten ASC NULLS FIRST, four ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
-- !query 16 output
1 0 0
1 0 0
1 0 2
1 1 1
2 1 1
2 1 3
2 3 3
3 4 0
3 7 1
3 9 1
-- !query 17
SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 17 schema
struct<lag(ten, 1, NULL) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING):int,ten:int,four:int>
-- !query 17 output
0 0 0
0 4 0
1 1 1
1 3 3
1 7 1
7 9 1
NULL 0 0
NULL 0 2
NULL 1 1
NULL 1 3
-- !query 18
SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 18 schema
struct<lead(ten, 1, NULL) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int,ten:int,four:int>
-- !query 18 output
0 0 0
1 1 1
3 1 3
4 0 0
7 1 1
9 7 1
NULL 0 2
NULL 3 3
NULL 4 0
NULL 9 1
-- !query 19
SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 19 schema
struct<lead((ten * 2), 1, NULL) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int,ten:int,four:int>
-- !query 19 output
0 0 0
14 1 1
18 7 1
2 1 1
6 1 3
8 0 0
NULL 0 2
NULL 3 3
NULL 4 0
NULL 9 1
-- !query 20
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 20 schema
struct<lead((ten * 2), 1, -1) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int,ten:int,four:int>
-- !query 20 output
-1 0 2
-1 3 3
-1 4 0
-1 9 1
0 0 0
14 1 1
18 7 1
2 1 1
6 1 3
8 0 0
-- !query 21
SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 21 schema
struct<first(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
-- !query 21 output
0 0 0
0 0 0
0 0 2
0 4 0
1 1 1
1 1 1
1 1 3
1 3 3
1 7 1
1 9 1
-- !query 22
SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
-- !query 22 schema
struct<last(four, false) OVER (ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
-- !query 22 output
0 4 0
1 1 1
1 1 1
1 1 3
1 7 1
1 9 1
2 0 0
2 0 0
2 0 2
3 3 3
-- !query 23
SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
ORDER BY four, ten
-- !query 23 schema
struct<last(ten, false) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):int,ten:int,four:int>
-- !query 23 output
4 0 0
4 0 0
4 4 0
9 1 1
9 1 1
9 7 1
9 9 1
0 0 2
3 1 3
3 3 3
-- !query 24
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY two ORDER BY ten) AS wsum
FROM tenk1 GROUP BY ten, two
-- !query 24 schema
struct<ten:int,two:int,gsum:bigint,wsum:bigint>
-- !query 24 output
0 0 45000 45000
1 1 46000 46000
2 0 47000 92000
3 1 48000 94000
4 0 49000 141000
5 1 50000 144000
6 0 51000 192000
7 1 52000 196000
8 0 53000 245000
9 1 54000 250000
-- !query 25
SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10
-- !query 25 schema
struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,four:int>
-- !query 25 output
2 3
2 3
4 1
4 1
4 1
4 1
-- !query 26
SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
FROM tenk1 WHERE unique2 < 10
-- !query 26 schema
struct<cntsum:bigint>
-- !query 26 output
136
22
22
24
24
51
82
87
92
92
-- !query 27
SELECT * FROM(
SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
FROM tenk1
)sub WHERE total <> fourcount + twosum
-- !query 27 schema
struct<total:bigint,fourcount:bigint,twosum:bigint>
-- !query 27 output
-- !query 28
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10
-- !query 28 schema
struct<avg(CAST(four AS BIGINT)) OVER (PARTITION BY four ORDER BY (CAST(thousand AS DOUBLE) / CAST(100 AS DOUBLE)) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
-- !query 28 output
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
3.0
3.0
-- !query 29
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten)
-- !query 29 schema
struct<ten:int,two:int,gsum:bigint,wsum:bigint>
-- !query 29 output
0 0 45000 45000
1 1 46000 46000
2 0 47000 92000
3 1 48000 94000
4 0 49000 141000
5 1 50000 144000
6 0 51000 192000
7 1 52000 196000
8 0 53000 245000
9 1 54000 250000
-- !query 30
SELECT sum(salary),
row_number() OVER (ORDER BY depname),
sum(sum(salary)) OVER (ORDER BY depname DESC)
FROM empsalary GROUP BY depname
-- !query 30 schema
struct<sum(salary):bigint,row_number() OVER (ORDER BY depname ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,sum(sum(CAST(salary AS BIGINT))) OVER (ORDER BY depname DESC NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
-- !query 30 output
14600 3 14600
25100 1 47100
7400 2 22000
-- !query 31
SELECT sum(salary) OVER w1, count(*) OVER w2
FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary)
-- !query 31 schema
struct<sum(CAST(salary AS BIGINT)) OVER (ORDER BY salary ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,count(1) OVER (ORDER BY salary ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
-- !query 31 output
11600 3
16100 4
25700 6
25700 6
30700 7
3500 1
41100 9
41100 9
47100 10
7400 2
-- !query 32
SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s
-- !query 32 schema
struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
-- !query 32 output
-- !query 33
SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC)
-- !query 33 schema
struct<sum(CAST(salary AS BIGINT)) OVER (PARTITION BY depname ORDER BY salary DESC NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,RANK() OVER (PARTITION BY depname ORDER BY salary DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
-- !query 33 output
14600 2
14600 2
16400 2
16400 2
20900 4
25100 5
3900 1
5000 1
6000 1
7400 2
-- !query 34
SET spark.sql.parser.ansi.enabled=false
-- !query 34 schema
struct<key:string,value:string>
-- !query 34 output
spark.sql.parser.ansi.enabled false
-- !query 35
SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
SELECT *,
CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
CASE WHEN
AVG(salary) OVER (PARTITION BY depname) < salary
THEN 200 END AS depadj FROM empsalary
)s
-- !query 35 schema
struct<empno:int,depname:string,salary:int,bonus:int,depadj:int,min(bonus) OVER (ORDER BY empno ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,max(depadj) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):int>
-- !query 35 output
1 sales 5000 1000 200 1000 200
10 develop 5200 500 200 500 200
11 develop 5200 500 200 500 200
2 personnel 3900 1000 200 1000 200
3 sales 4800 500 NULL 500 200
4 sales 4800 500 NULL 500 200
5 personnel 3500 500 NULL 500 200
7 develop 4200 NULL NULL 500 200
8 develop 6000 1000 200 500 200
9 develop 4500 NULL NULL 500 200
-- !query 36
SET spark.sql.parser.ansi.enabled=true
-- !query 36 schema
struct<key:string,value:string>
-- !query 36 output
spark.sql.parser.ansi.enabled true
-- !query 37
create temporary view int4_tbl as select * from values
(0),
(123456),
(-123456),
(2147483647),
(-2147483647)
as int4_tbl(f1)
-- !query 37 schema
struct<>
-- !query 37 output
-- !query 38
SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42
-- !query 38 schema
struct<sum(count(f1)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
-- !query 38 output
0
-- !query 39
select ten,
sum(unique1) + sum(unique2) as res,
rank() over (order by sum(unique1) + sum(unique2)) as rank
from tenk1
group by ten order by ten
-- !query 39 schema
struct<ten:int,res:bigint,rank:int>
-- !query 39 output
0 9976146 4
1 10114187 9
2 10059554 8
3 9878541 1
4 9881005 2
5 9981670 5
6 9947099 3
7 10120309 10
8 9991305 6
9 10040184 7
-- !query 40
SELECT four, ten,
sum(ten) over (partition by four order by ten),
last(ten) over (partition by four order by ten)
FROM (select distinct ten, four from tenk1) ss
-- !query 40 schema
struct<four:int,ten:int,sum(CAST(ten AS BIGINT)) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,last(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
-- !query 40 output
0 0 0 0
0 2 2 2
0 4 6 4
0 6 12 6
0 8 20 8
1 1 1 1
1 3 4 3
1 5 9 5
1 7 16 7
1 9 25 9
2 0 0 0
2 2 2 2
2 4 6 4
2 6 12 6
2 8 20 8
3 1 1 1
3 3 4 3
3 5 9 5
3 7 16 7
3 9 25 9
-- !query 41
SELECT four, ten,
sum(ten) over (partition by four order by ten range between unbounded preceding and current row),
last(ten) over (partition by four order by ten range between unbounded preceding and current row)
FROM (select distinct ten, four from tenk1) ss
-- !query 41 schema
struct<four:int,ten:int,sum(ten) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,last(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
-- !query 41 output
0 0 0 0
0 2 2 2
0 4 6 4
0 6 12 6
0 8 20 8
1 1 1 1
1 3 4 3
1 5 9 5
1 7 16 7
1 9 25 9
2 0 0 0
2 2 2 2
2 4 6 4
2 6 12 6
2 8 20 8
3 1 1 1
3 3 4 3
3 5 9 5
3 7 16 7
3 9 25 9
-- !query 42
SELECT four, ten,
sum(ten) over (partition by four order by ten range between unbounded preceding and unbounded following),
last(ten) over (partition by four order by ten range between unbounded preceding and unbounded following)
FROM (select distinct ten, four from tenk1) ss
-- !query 42 schema
struct<four:int,ten:int,sum(ten) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,last(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):int>
-- !query 42 output
0 0 20 8
0 2 20 8
0 4 20 8
0 6 20 8
0 8 20 8
1 1 25 9
1 3 25 9
1 5 25 9
1 7 25 9
1 9 25 9
2 0 20 8
2 2 20 8
2 4 20 8
2 6 20 8
2 8 20 8
3 1 25 9
3 3 25 9
3 5 25 9
3 7 25 9
3 9 25 9
-- !query 43
SELECT sum(unique1) over (order by four range between current row and unbounded following),
unique1, four
FROM tenk1 WHERE unique1 < 10
-- !query 43 schema
struct<sum(unique1) OVER (ORDER BY four ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint,unique1:int,four:int>
-- !query 43 output
10 3 3
10 7 3
18 2 2
18 6 2
33 1 1
33 5 1
33 9 1
45 0 0
45 4 0
45 8 0
-- !query 44
SELECT sum(unique1) over (rows between current row and unbounded following),
unique1, four
FROM tenk1 WHERE unique1 < 10
-- !query 44 schema
struct<sum(unique1) OVER (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint,unique1:int,four:int>
-- !query 44 output
0 0 0
10 3 3
15 5 1
23 8 0
32 9 1
38 6 2
39 1 1
41 2 2
45 4 0
7 7 3
-- !query 45
SELECT sum(unique1) over (rows between 2 preceding and 2 following),
unique1, four
FROM tenk1 WHERE unique1 < 10
-- !query 45 schema
struct<sum(unique1) OVER (ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING):bigint,unique1:int,four:int>
-- !query 45 output
10 0 0
13 2 2
15 7 3
22 1 1
23 3 3
26 6 2
29 9 1
31 8 0
32 5 1
7 4 0
-- !query 46
SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
unique1, four
FROM tenk1 WHERE unique1 < 10
-- !query 46 schema
struct<sum(unique1) OVER (ROWS BETWEEN 2 PRECEDING AND 1 PRECEDING):bigint,unique1:int,four:int>
-- !query 46 output
10 0 0
13 3 3
15 8 0
17 5 1
3 6 2
4 2 2
6 1 1
7 9 1
8 7 3
NULL 4 0
-- !query 47
SELECT sum(unique1) over (rows between 1 following and 3 following),
unique1, four
FROM tenk1 WHERE unique1 < 10
-- !query 47 schema
struct<sum(unique1) OVER (ROWS BETWEEN 1 FOLLOWING AND 3 FOLLOWING):bigint,unique1:int,four:int>
-- !query 47 output
0 7 3
10 5 1
15 8 0
16 2 2
16 9 1
22 6 2
23 1 1
7 3 3
9 4 0
NULL 0 0
-- !query 48
SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
unique1, four
FROM tenk1 WHERE unique1 < 10
-- !query 48 schema
struct<sum(unique1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint,unique1:int,four:int>
-- !query 48 output
13 1 1
22 6 2
30 9 1
35 8 0
38 5 1
45 0 0
45 3 3
45 7 3
6 4 0
7 2 2
-- !query 49
CREATE TEMP VIEW v_window AS
SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
FROM range(1, 11) i
-- !query 49 schema
struct<>
-- !query 49 output
-- !query 50
SELECT * FROM v_window
-- !query 50 schema
struct<id:bigint,sum_rows:bigint>
-- !query 50 output
1 3
10 19
2 6
3 9
4 12
5 15
6 18
7 21
8 24
9 27
-- !query 51
DROP VIEW v_window
-- !query 51 schema
struct<>
-- !query 51 output
-- !query 52
DROP TABLE empsalary
-- !query 52 schema
struct<>
-- !query 52 output
-- !query 53
DROP VIEW tenk2
-- !query 53 schema
struct<>
-- !query 53 output
-- !query 54
DROP VIEW int4_tbl
-- !query 54 schema
struct<>
-- !query 54 output