[SPARK-19799][SQL] Support WITH clause in subqueries
## What changes were proposed in this pull request? This PR adds support of `WITH` clause within a subquery so this query becomes valid: ``` SELECT max(c) FROM ( WITH t AS (SELECT 1 AS c) SELECT * FROM t ) ``` ## How was this patch tested? Added new UTs. Closes #24831 from peter-toth/SPARK-19799-2. Authored-by: Peter Toth <peter.toth@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
cec6a32904
commit
cad440d1f5
|
@ -278,7 +278,7 @@ locationSpec
|
|||
;
|
||||
|
||||
query
|
||||
: ctes? queryNoWith
|
||||
: ctes? queryTerm queryOrganization
|
||||
;
|
||||
|
||||
insertInto
|
||||
|
@ -380,10 +380,6 @@ dmlStatementNoWith
|
|||
| fromClause multiInsertQueryBody+ #multiInsertQuery
|
||||
;
|
||||
|
||||
queryNoWith
|
||||
: queryTerm queryOrganization
|
||||
;
|
||||
|
||||
queryOrganization
|
||||
: (ORDER BY order+=sortItem (',' order+=sortItem)*)?
|
||||
(CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)?
|
||||
|
@ -412,7 +408,7 @@ queryPrimary
|
|||
| fromStatement #fromStmt
|
||||
| TABLE multipartIdentifier #table
|
||||
| inlineTable #inlineTableDefault1
|
||||
| '(' queryNoWith ')' #subquery
|
||||
| '(' query ')' #subquery
|
||||
;
|
||||
|
||||
sortItem
|
||||
|
@ -583,7 +579,7 @@ identifierComment
|
|||
|
||||
relationPrimary
|
||||
: multipartIdentifier sample? tableAlias #tableName
|
||||
| '(' queryNoWith ')' sample? tableAlias #aliasedQuery
|
||||
| '(' query ')' sample? tableAlias #aliasedQuery
|
||||
| '(' relation ')' sample? tableAlias #aliasedRelation
|
||||
| inlineTable #inlineTableDefault2
|
||||
| functionTable #tableValuedFunction
|
||||
|
|
|
@ -224,12 +224,13 @@ class Analyzer(
|
|||
case other => other
|
||||
}
|
||||
|
||||
def substituteCTE(plan: LogicalPlan, cteName: String, ctePlan: LogicalPlan): LogicalPlan = {
|
||||
private def substituteCTE(
|
||||
plan: LogicalPlan,
|
||||
cteName: String,
|
||||
ctePlan: LogicalPlan): LogicalPlan = {
|
||||
plan resolveOperatorsUp {
|
||||
case UnresolvedRelation(Seq(table)) if resolver(cteName, table) =>
|
||||
ctePlan
|
||||
case u: UnresolvedRelation =>
|
||||
u
|
||||
case other =>
|
||||
// This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE.
|
||||
other transformExpressions {
|
||||
|
|
|
@ -111,7 +111,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
* Create a top-level plan with Common Table Expressions.
|
||||
*/
|
||||
override def visitQuery(ctx: QueryContext): LogicalPlan = withOrigin(ctx) {
|
||||
val query = plan(ctx.queryNoWith)
|
||||
val query = plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
|
||||
|
||||
// Apply CTEs
|
||||
query.optionalMap(ctx.ctes)(withCTE)
|
||||
|
@ -175,10 +175,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
}
|
||||
}
|
||||
|
||||
override def visitQueryNoWith(ctx: QueryNoWithContext): LogicalPlan = withOrigin(ctx) {
|
||||
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a named logical plan.
|
||||
*
|
||||
|
@ -890,7 +886,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
* Create a logical plan for a sub-query.
|
||||
*/
|
||||
override def visitSubquery(ctx: SubqueryContext): LogicalPlan = withOrigin(ctx) {
|
||||
plan(ctx.queryNoWith)
|
||||
plan(ctx.query)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -978,7 +974,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
* }}}
|
||||
*/
|
||||
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
|
||||
val relation = plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)
|
||||
val relation = plan(ctx.query).optionalMap(ctx.sample)(withSample)
|
||||
if (ctx.tableAlias.strictIdentifier == null) {
|
||||
// For un-aliased subqueries, use a default alias name that is not likely to conflict with
|
||||
// normal subquery names, so that parent operators can only access the columns in subquery by
|
||||
|
|
|
@ -4,6 +4,9 @@ create temporary view t2 as select * from values 0, 1 as t(id);
|
|||
-- WITH clause should not fall into infinite loop by referencing self
|
||||
WITH s AS (SELECT 1 FROM s) SELECT * FROM s;
|
||||
|
||||
WITH r AS (SELECT (SELECT * FROM r))
|
||||
SELECT * FROM r;
|
||||
|
||||
-- WITH clause should reference the base table
|
||||
WITH t AS (SELECT 1 FROM t) SELECT * FROM t;
|
||||
|
||||
|
@ -28,6 +31,112 @@ FROM CTE1 t1
|
|||
WITH t(x) AS (SELECT 1)
|
||||
SELECT * FROM t WHERE x = 1;
|
||||
|
||||
-- CTE in CTE definition
|
||||
WITH t as (
|
||||
WITH t2 AS (SELECT 1)
|
||||
SELECT * FROM t2
|
||||
)
|
||||
SELECT * FROM t;
|
||||
|
||||
-- CTE in subquery
|
||||
SELECT max(c) FROM (
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT * FROM t
|
||||
);
|
||||
|
||||
-- CTE in subquery expression
|
||||
SELECT (
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT * FROM t
|
||||
);
|
||||
|
||||
-- CTE in CTE definition shadows outer
|
||||
WITH
|
||||
t AS (SELECT 1),
|
||||
t2 AS (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
SELECT * FROM t2;
|
||||
|
||||
-- CTE in CTE definition shadows outer 2
|
||||
WITH
|
||||
t(c) AS (SELECT 1),
|
||||
t2 AS (
|
||||
SELECT (
|
||||
SELECT max(c) FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
)
|
||||
)
|
||||
SELECT * FROM t2;
|
||||
|
||||
-- CTE in CTE definition shadows outer 3
|
||||
WITH
|
||||
t AS (SELECT 1),
|
||||
t2 AS (
|
||||
WITH t AS (SELECT 2),
|
||||
t2 AS (
|
||||
WITH t AS (SELECT 3)
|
||||
SELECT * FROM t
|
||||
)
|
||||
SELECT * FROM t2
|
||||
)
|
||||
SELECT * FROM t2;
|
||||
|
||||
-- CTE in subquery shadows outer
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT max(c) FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
);
|
||||
|
||||
-- CTE in subquery shadows outer 2
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT sum(c) FROM (
|
||||
SELECT max(c) AS c FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
);
|
||||
|
||||
-- CTE in subquery shadows outer 3
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT sum(c) FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT max(c) AS c FROM (
|
||||
WITH t(c) AS (SELECT 3)
|
||||
SELECT * FROM t
|
||||
)
|
||||
);
|
||||
|
||||
-- CTE in subquery expression shadows outer
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
);
|
||||
|
||||
-- CTE in subquery expression shadows outer 2
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT (
|
||||
SELECT (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
);
|
||||
|
||||
-- CTE in subquery expression shadows outer 3
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT (
|
||||
WITH t AS (SELECT 3)
|
||||
SELECT * FROM t
|
||||
)
|
||||
);
|
||||
|
||||
-- Clean up
|
||||
DROP VIEW IF EXISTS t;
|
||||
DROP VIEW IF EXISTS t2;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 10
|
||||
-- Number of queries: 23
|
||||
|
||||
|
||||
-- !query 0
|
||||
|
@ -28,36 +28,46 @@ Table or view not found: s; line 1 pos 25
|
|||
|
||||
|
||||
-- !query 3
|
||||
WITH t AS (SELECT 1 FROM t) SELECT * FROM t
|
||||
WITH r AS (SELECT (SELECT * FROM r))
|
||||
SELECT * FROM r
|
||||
-- !query 3 schema
|
||||
struct<1:int>
|
||||
struct<>
|
||||
-- !query 3 output
|
||||
1
|
||||
1
|
||||
1
|
||||
org.apache.spark.sql.AnalysisException
|
||||
Table or view not found: r; line 1 pos 33
|
||||
|
||||
|
||||
-- !query 4
|
||||
WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
|
||||
WITH t AS (SELECT 1 FROM t) SELECT * FROM t
|
||||
-- !query 4 schema
|
||||
struct<>
|
||||
struct<1:int>
|
||||
-- !query 4 output
|
||||
1
|
||||
1
|
||||
1
|
||||
|
||||
|
||||
-- !query 5
|
||||
WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
|
||||
-- !query 5 schema
|
||||
struct<>
|
||||
-- !query 5 output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
Table or view not found: s2; line 1 pos 26
|
||||
|
||||
|
||||
-- !query 5
|
||||
WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2
|
||||
-- !query 5 schema
|
||||
struct<id:int,2:int>
|
||||
-- !query 5 output
|
||||
0 2
|
||||
0 2
|
||||
1 2
|
||||
1 2
|
||||
|
||||
|
||||
-- !query 6
|
||||
WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2
|
||||
-- !query 6 schema
|
||||
struct<id:int,2:int>
|
||||
-- !query 6 output
|
||||
0 2
|
||||
0 2
|
||||
1 2
|
||||
1 2
|
||||
|
||||
|
||||
-- !query 7
|
||||
WITH CTE1 AS (
|
||||
SELECT b.id AS id
|
||||
FROM T2 a
|
||||
|
@ -67,47 +77,213 @@ SELECT t1.id AS c1,
|
|||
t2.id AS c2
|
||||
FROM CTE1 t1
|
||||
CROSS JOIN CTE1 t2
|
||||
-- !query 6 schema
|
||||
struct<c1:int,c2:int>
|
||||
-- !query 6 output
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 1
|
||||
0 1
|
||||
0 1
|
||||
0 1
|
||||
1 0
|
||||
1 0
|
||||
1 0
|
||||
1 0
|
||||
1 1
|
||||
1 1
|
||||
1 1
|
||||
1 1
|
||||
|
||||
|
||||
-- !query 7
|
||||
WITH t(x) AS (SELECT 1)
|
||||
SELECT * FROM t WHERE x = 1
|
||||
-- !query 7 schema
|
||||
struct<x:int>
|
||||
struct<c1:int,c2:int>
|
||||
-- !query 7 output
|
||||
1
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 1
|
||||
0 1
|
||||
0 1
|
||||
0 1
|
||||
1 0
|
||||
1 0
|
||||
1 0
|
||||
1 0
|
||||
1 1
|
||||
1 1
|
||||
1 1
|
||||
1 1
|
||||
|
||||
|
||||
-- !query 8
|
||||
DROP VIEW IF EXISTS t
|
||||
WITH t(x) AS (SELECT 1)
|
||||
SELECT * FROM t WHERE x = 1
|
||||
-- !query 8 schema
|
||||
struct<>
|
||||
struct<x:int>
|
||||
-- !query 8 output
|
||||
|
||||
1
|
||||
|
||||
|
||||
-- !query 9
|
||||
DROP VIEW IF EXISTS t2
|
||||
WITH t as (
|
||||
WITH t2 AS (SELECT 1)
|
||||
SELECT * FROM t2
|
||||
)
|
||||
SELECT * FROM t
|
||||
-- !query 9 schema
|
||||
struct<>
|
||||
struct<1:int>
|
||||
-- !query 9 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 10
|
||||
SELECT max(c) FROM (
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT * FROM t
|
||||
)
|
||||
-- !query 10 schema
|
||||
struct<max(c):int>
|
||||
-- !query 10 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 11
|
||||
SELECT (
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT * FROM t
|
||||
)
|
||||
-- !query 11 schema
|
||||
struct<scalarsubquery():int>
|
||||
-- !query 11 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 12
|
||||
WITH
|
||||
t AS (SELECT 1),
|
||||
t2 AS (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
SELECT * FROM t2
|
||||
-- !query 12 schema
|
||||
struct<1:int>
|
||||
-- !query 12 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 13
|
||||
WITH
|
||||
t(c) AS (SELECT 1),
|
||||
t2 AS (
|
||||
SELECT (
|
||||
SELECT max(c) FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
)
|
||||
)
|
||||
SELECT * FROM t2
|
||||
-- !query 13 schema
|
||||
struct<scalarsubquery():int>
|
||||
-- !query 13 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 14
|
||||
WITH
|
||||
t AS (SELECT 1),
|
||||
t2 AS (
|
||||
WITH t AS (SELECT 2),
|
||||
t2 AS (
|
||||
WITH t AS (SELECT 3)
|
||||
SELECT * FROM t
|
||||
)
|
||||
SELECT * FROM t2
|
||||
)
|
||||
SELECT * FROM t2
|
||||
-- !query 14 schema
|
||||
struct<2:int>
|
||||
-- !query 14 output
|
||||
2
|
||||
|
||||
|
||||
-- !query 15
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT max(c) FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
-- !query 15 schema
|
||||
struct<max(c):int>
|
||||
-- !query 15 output
|
||||
2
|
||||
|
||||
|
||||
-- !query 16
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT sum(c) FROM (
|
||||
SELECT max(c) AS c FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
)
|
||||
-- !query 16 schema
|
||||
struct<sum(c):bigint>
|
||||
-- !query 16 output
|
||||
2
|
||||
|
||||
|
||||
-- !query 17
|
||||
WITH t(c) AS (SELECT 1)
|
||||
SELECT sum(c) FROM (
|
||||
WITH t(c) AS (SELECT 2)
|
||||
SELECT max(c) AS c FROM (
|
||||
WITH t(c) AS (SELECT 3)
|
||||
SELECT * FROM t
|
||||
)
|
||||
)
|
||||
-- !query 17 schema
|
||||
struct<sum(c):bigint>
|
||||
-- !query 17 output
|
||||
3
|
||||
|
||||
|
||||
-- !query 18
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
-- !query 18 schema
|
||||
struct<scalarsubquery():int>
|
||||
-- !query 18 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 19
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT (
|
||||
SELECT (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT * FROM t
|
||||
)
|
||||
)
|
||||
-- !query 19 schema
|
||||
struct<scalarsubquery():int>
|
||||
-- !query 19 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 20
|
||||
WITH t AS (SELECT 1)
|
||||
SELECT (
|
||||
WITH t AS (SELECT 2)
|
||||
SELECT (
|
||||
WITH t AS (SELECT 3)
|
||||
SELECT * FROM t
|
||||
)
|
||||
)
|
||||
-- !query 20 schema
|
||||
struct<scalarsubquery():int>
|
||||
-- !query 20 output
|
||||
1
|
||||
|
||||
|
||||
-- !query 21
|
||||
DROP VIEW IF EXISTS t
|
||||
-- !query 21 schema
|
||||
struct<>
|
||||
-- !query 21 output
|
||||
|
||||
|
||||
|
||||
-- !query 22
|
||||
DROP VIEW IF EXISTS t2
|
||||
-- !query 22 schema
|
||||
struct<>
|
||||
-- !query 22 output
|
||||
|
||||
|
|
Loading…
Reference in a new issue