[SPARK-28228][SQL][TESTS] Refactoring for nested CTE tests

### What changes were proposed in this pull request?
Split the nested CTE cases into a single file `cte-nested.sql`, which will be reused in cte-legacy.sql and cte-nonlegacy.sql.

### Why are the changes needed?
Make the cases easy to maintain.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing UT.

Closes #27667 from xuanyuanking/SPARK-28228-test.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Yuanjian Li 2020-02-25 17:37:34 +09:00 committed by HyukjinKwon
parent f152d2a0a8
commit e45f2c7fc0
8 changed files with 285 additions and 612 deletions

View file

@ -1,115 +1,2 @@
create temporary view t as select * from values 0, 1, 2 as t(id);
create temporary view t2 as select * from values 0, 1 as t(id);
-- CTE legacy substitution
SET spark.sql.legacy.ctePrecedencePolicy=legacy;
-- CTE in CTE definition
WITH t as (
WITH t2 AS (SELECT 1)
SELECT * FROM t2
)
SELECT * FROM t;
-- CTE in subquery
SELECT max(c) FROM (
WITH t(c) AS (SELECT 1)
SELECT * FROM t
);
-- CTE in subquery expression
SELECT (
WITH t AS (SELECT 1)
SELECT * FROM t
);
-- CTE in CTE definition shadows outer
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2)
SELECT * FROM t
)
SELECT * FROM t2;
-- CTE in CTE definition shadows outer 2
WITH
t(c) AS (SELECT 1),
t2 AS (
SELECT (
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
)
SELECT * FROM t2;
-- CTE in CTE definition shadows outer 3
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2),
t2 AS (
WITH t AS (SELECT 3)
SELECT * FROM t
)
SELECT * FROM t2
)
SELECT * FROM t2;
-- CTE in subquery shadows outer
WITH t(c) AS (SELECT 1)
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
);
-- CTE in subquery shadows outer 2
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
);
-- CTE in subquery shadows outer 3
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 3)
SELECT * FROM t
)
);
-- CTE in subquery expression shadows outer
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
);
-- CTE in subquery expression shadows outer 2
WITH t AS (SELECT 1)
SELECT (
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
);
-- CTE in subquery expression shadows outer 3
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT (
WITH t AS (SELECT 3)
SELECT * FROM t
)
);
-- Clean up
DROP VIEW IF EXISTS t;
DROP VIEW IF EXISTS t2;
--SET spark.sql.legacy.ctePrecedencePolicy = legacy
--IMPORT cte-nested.sql

View file

@ -0,0 +1,105 @@
-- CTE in CTE definition
WITH t as (
WITH t2 AS (SELECT 1)
SELECT * FROM t2
)
SELECT * FROM t;
-- CTE in subquery
SELECT max(c) FROM (
WITH t(c) AS (SELECT 1)
SELECT * FROM t
);
-- CTE in subquery expression
SELECT (
WITH t AS (SELECT 1)
SELECT * FROM t
);
-- CTE in CTE definition shadows outer
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2)
SELECT * FROM t
)
SELECT * FROM t2;
-- CTE in CTE definition shadows outer 2
WITH
t(c) AS (SELECT 1),
t2 AS (
SELECT (
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
)
SELECT * FROM t2;
-- CTE in CTE definition shadows outer 3
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2),
t2 AS (
WITH t AS (SELECT 3)
SELECT * FROM t
)
SELECT * FROM t2
)
SELECT * FROM t2;
-- CTE in subquery shadows outer
WITH t(c) AS (SELECT 1)
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
);
-- CTE in subquery shadows outer 2
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
);
-- CTE in subquery shadows outer 3
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 3)
SELECT * FROM t
)
);
-- CTE in subquery expression shadows outer
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
);
-- CTE in subquery expression shadows outer 2
WITH t AS (SELECT 1)
SELECT (
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
);
-- CTE in subquery expression shadows outer 3
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT (
WITH t AS (SELECT 3)
SELECT * FROM t
)
);

View file

@ -1,2 +1,2 @@
--SET spark.sql.legacy.ctePrecedencePolicy = corrected
--IMPORT cte.sql
--IMPORT cte-nested.sql

View file

@ -49,112 +49,6 @@ WITH
t(x) AS (SELECT 2)
SELECT * FROM t;
-- CTE in CTE definition
WITH t as (
WITH t2 AS (SELECT 1)
SELECT * FROM t2
)
SELECT * FROM t;
-- CTE in subquery
SELECT max(c) FROM (
WITH t(c) AS (SELECT 1)
SELECT * FROM t
);
-- CTE in subquery expression
SELECT (
WITH t AS (SELECT 1)
SELECT * FROM t
);
-- CTE in CTE definition shadows outer
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2)
SELECT * FROM t
)
SELECT * FROM t2;
-- CTE in CTE definition shadows outer 2
WITH
t(c) AS (SELECT 1),
t2 AS (
SELECT (
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
)
SELECT * FROM t2;
-- CTE in CTE definition shadows outer 3
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2),
t2 AS (
WITH t AS (SELECT 3)
SELECT * FROM t
)
SELECT * FROM t2
)
SELECT * FROM t2;
-- CTE in subquery shadows outer
WITH t(c) AS (SELECT 1)
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
);
-- CTE in subquery shadows outer 2
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
);
-- CTE in subquery shadows outer 3
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 3)
SELECT * FROM t
)
);
-- CTE in subquery expression shadows outer
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
);
-- CTE in subquery expression shadows outer 2
WITH t AS (SELECT 1)
SELECT (
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
);
-- CTE in subquery expression shadows outer 3
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT (
WITH t AS (SELECT 3)
SELECT * FROM t
)
);
-- Clean up
DROP VIEW IF EXISTS t;
DROP VIEW IF EXISTS t2;

View file

@ -1,29 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 17
-- !query
create temporary view t as select * from values 0, 1, 2 as t(id)
-- !query schema
struct<>
-- !query output
-- !query
create temporary view t2 as select * from values 0, 1 as t(id)
-- !query schema
struct<>
-- !query output
-- !query
SET spark.sql.legacy.ctePrecedencePolicy=legacy
-- !query schema
struct<key:string,value:string>
-- !query output
spark.sql.legacy.ctePrecedencePolicy legacy
-- Number of queries: 12
-- !query
@ -190,19 +166,3 @@ SELECT (
struct<scalarsubquery():int>
-- !query output
1
-- !query
DROP VIEW IF EXISTS t
-- !query schema
struct<>
-- !query output
-- !query
DROP VIEW IF EXISTS t2
-- !query schema
struct<>
-- !query output

View file

@ -0,0 +1,174 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 12
-- !query
WITH t as (
WITH t2 AS (SELECT 1)
SELECT * FROM t2
)
SELECT * FROM t
-- !query schema
struct<1:int>
-- !query output
1
-- !query
SELECT max(c) FROM (
WITH t(c) AS (SELECT 1)
SELECT * FROM t
)
-- !query schema
struct<max(c):int>
-- !query output
1
-- !query
SELECT (
WITH t AS (SELECT 1)
SELECT * FROM t
)
-- !query schema
struct<scalarsubquery():int>
-- !query output
1
-- !query
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2)
SELECT * FROM t
)
SELECT * FROM t2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH
t(c) AS (SELECT 1),
t2 AS (
SELECT (
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
)
SELECT * FROM t2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2),
t2 AS (
WITH t AS (SELECT 3)
SELECT * FROM t
)
SELECT * FROM t2
)
SELECT * FROM t2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH t(c) AS (SELECT 1)
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
-- !query schema
struct<max(c):int>
-- !query output
2
-- !query
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
-- !query schema
struct<sum(c):bigint>
-- !query output
2
-- !query
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 3)
SELECT * FROM t
)
)
-- !query schema
struct<sum(c):bigint>
-- !query output
3
-- !query
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH t AS (SELECT 1)
SELECT (
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT (
WITH t AS (SELECT 3)
SELECT * FROM t
)
)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;

View file

@ -1,164 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 27
-- !query
create temporary view t as select * from values 0, 1, 2 as t(id)
-- !query schema
struct<>
-- !query output
-- !query
create temporary view t2 as select * from values 0, 1 as t(id)
-- !query schema
struct<>
-- !query output
-- !query
WITH s AS (SELECT 1 FROM s) SELECT * FROM s
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Table or view not found: s; line 1 pos 25
-- !query
WITH r AS (SELECT (SELECT * FROM r))
SELECT * FROM r
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Table or view not found: r; line 1 pos 33
-- !query
WITH t AS (SELECT 1 FROM t) SELECT * FROM t
-- !query schema
struct<1:int>
-- !query output
1
1
1
-- !query
WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Table or view not found: s2; line 1 pos 26
-- !query
WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2
-- !query schema
struct<id:int,2:int>
-- !query output
0 2
0 2
1 2
1 2
-- !query
WITH CTE1 AS (
SELECT b.id AS id
FROM T2 a
CROSS JOIN (SELECT id AS id FROM T2) b
)
SELECT t1.id AS c1,
t2.id AS c2
FROM CTE1 t1
CROSS JOIN CTE1 t2
-- !query schema
struct<c1:int,c2:int>
-- !query output
0 0
0 0
0 0
0 0
0 1
0 1
0 1
0 1
1 0
1 0
1 0
1 0
1 1
1 1
1 1
1 1
-- !query
WITH t(x) AS (SELECT 1)
SELECT * FROM t WHERE x = 1
-- !query schema
struct<x:int>
-- !query output
1
-- !query
WITH t(x, y) AS (SELECT 1, 2)
SELECT * FROM t WHERE x = 1 AND y = 2
-- !query schema
struct<x:int,y:int>
-- !query output
1 2
-- !query
WITH t(x, x) AS (SELECT 1, 2)
SELECT * FROM t
-- !query schema
struct<x:int,x:int>
-- !query output
1 2
-- !query
WITH t() AS (SELECT 1)
SELECT * FROM t
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input 'WITH t()'(line 1, pos 7)
== SQL ==
WITH t() AS (SELECT 1)
-------^^^
SELECT * FROM t
-- !query
WITH
t(x) AS (SELECT 1),
t(x) AS (SELECT 2)
SELECT * FROM t
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
CTE definition can't have duplicate names: 't'.(line 1, pos 0)
== SQL ==
WITH
^^^
t(x) AS (SELECT 1),
t(x) AS (SELECT 2)
SELECT * FROM t
-- Number of queries: 12
-- !query
@ -325,19 +166,3 @@ SELECT (
struct<scalarsubquery():int>
-- !query output
3
-- !query
DROP VIEW IF EXISTS t
-- !query schema
struct<>
-- !query output
-- !query
DROP VIEW IF EXISTS t2
-- !query schema
struct<>
-- !query output

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 27
-- Number of queries: 15
-- !query
@ -161,178 +161,6 @@ WITH
SELECT * FROM t
-- !query
WITH t as (
WITH t2 AS (SELECT 1)
SELECT * FROM t2
)
SELECT * FROM t
-- !query schema
struct<1:int>
-- !query output
1
-- !query
SELECT max(c) FROM (
WITH t(c) AS (SELECT 1)
SELECT * FROM t
)
-- !query schema
struct<max(c):int>
-- !query output
1
-- !query
SELECT (
WITH t AS (SELECT 1)
SELECT * FROM t
)
-- !query schema
struct<scalarsubquery():int>
-- !query output
1
-- !query
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2)
SELECT * FROM t
)
SELECT * FROM t2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH
t(c) AS (SELECT 1),
t2 AS (
SELECT (
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
)
SELECT * FROM t2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2),
t2 AS (
WITH t AS (SELECT 3)
SELECT * FROM t
)
SELECT * FROM t2
)
SELECT * FROM t2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH t(c) AS (SELECT 1)
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
-- !query schema
struct<max(c):int>
-- !query output
2
-- !query
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
)
-- !query schema
struct<sum(c):bigint>
-- !query output
2
-- !query
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT max(c) AS c FROM (
WITH t(c) AS (SELECT 3)
SELECT * FROM t
)
)
-- !query schema
struct<sum(c):bigint>
-- !query output
3
-- !query
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH t AS (SELECT 1)
SELECT (
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT (
WITH t AS (SELECT 3)
SELECT * FROM t
)
)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
-- !query
DROP VIEW IF EXISTS t
-- !query schema