[SPARK-15232][SQL] Add subquery SQL building tests to LogicalPlanToSQLSuite

## What changes were proposed in this pull request?

We currently test subquery SQL building using the `HiveCompatibilitySuite`. The is not desired since SQL building is actually a part of `sql/core` and because we are slowly reducing our dependency on Hive. This PR adds the same tests from the whitelist of `HiveCompatibilitySuite` into `LogicalPlanToSQLSuite`.

## How was this patch tested?

This adds more testcases. Pass the Jenkins tests.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #14383 from dongjoon-hyun/SPARK-15232.
This commit is contained in:
Dongjoon Hyun 2016-07-27 23:29:26 -07:00 committed by Reynold Xin
parent 11d427c924
commit 5c2ae79bfc
15 changed files with 282 additions and 0 deletions

View file

@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT /*+ MAPJOIN(srcpart) */ subq.key1, z.value
FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
ORDER BY subq.key1, z.value
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key1`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_7` AS `gen_attr_6`, `gen_attr_9` AS `gen_attr_8`, `gen_attr_11` AS `gen_attr_10` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_7` FROM `default`.`src1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_9`, `value` AS `gen_attr_11` FROM `default`.`src`) AS gen_subquery_1 ON (`gen_attr_5` = `gen_attr_9`)) AS subq INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1`, `ds` AS `gen_attr_3`, `hr` AS `gen_attr_4` FROM `default`.`srcpart`) AS gen_subquery_2 ON (((`gen_attr_0` = `gen_attr_2`) AND (`gen_attr_3` = "2008-04-08")) AND (CAST(`gen_attr_4` AS DOUBLE) = CAST(11 AS DOUBLE))) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_3

View file

@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT a.k, a.c
FROM (SELECT b.key as k, count(1) as c
FROM src b
GROUP BY b.key) a
WHERE a.k >= 90
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `k`, `gen_attr_1` AS `c` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_2` AS `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`) AS a WHERE (`gen_attr_0` >= 90)) AS a

View file

@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_9')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS b

View file

@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from (select *
from src b
where exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_9')) a
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS a) AS a

View file

@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, count(*)
from src b
group by b.key
having exists (select a.key
from src a
where a.key = b.key and a.value > 'val_9')
--------------------------------------------------------------------------------
SELECT `gen_attr_1` AS `key`, `gen_attr_2` AS `count(1)` FROM (SELECT `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_1`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_1` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_0` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3)) AS b

View file

@ -0,0 +1,10 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from (select b.key, count(*)
from src b
group by b.key
having exists (select a.key
from src a
where a.key = b.key and a.value > 'val_9')) a
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_2` = `gen_attr_0`)) AS gen_subquery_3)) AS a) AS a

View file

@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, min(b.value)
from src b
group by b.key
having exists (select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value))
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_4`) AS `gen_attr_1`, min(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_5` AS `1` FROM (SELECT 1 AS `gen_attr_5` FROM (SELECT `gen_attr_6`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_2 WHERE (`gen_attr_2` = `gen_attr_3`)) AS gen_subquery_4)) AS gen_subquery_1) AS b

View file

@ -0,0 +1,6 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT key
FROM src
WHERE key in (SELECT max(key) FROM src)
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key` FROM (SELECT `gen_attr_0` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_0 WHERE (`gen_attr_0` IN (SELECT `gen_attr_3` AS `_c0` FROM (SELECT `gen_attr_1` AS `gen_attr_3` FROM (SELECT max(`gen_attr_4`) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2) AS gen_subquery_1) AS gen_subquery_3))) AS src

View file

@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select key, count(*)
from src
group by key
having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key)
order by key
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (`gen_attr_2` IN (SELECT `gen_attr_5` AS `_c0` FROM (SELECT `gen_attr_3` AS `gen_attr_5` FROM (SELECT count(1) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_7` FROM `default`.`src`) AS gen_subquery_3 WHERE (CAST(`gen_attr_6` AS DOUBLE) = CAST("90" AS DOUBLE)) GROUP BY `gen_attr_6`) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS src

View file

@ -0,0 +1,10 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, min(b.value)
from src b
group by b.key
having b.key in (select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value))
order by b.key
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > "val_9")) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS b

View file

@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where not exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_2')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3))) AS b

View file

@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where not exists (select a.key
from src a
where b.value = a.value and a.value > 'val_2')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT `gen_attr_4`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE (`gen_attr_1` = `gen_attr_2`)) AS gen_subquery_3))) AS b

View file

@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
group by key, value
having not exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_12')
--------------------------------------------------------------------------------
SELECT `gen_attr_3` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_3`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_3`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE ((`gen_attr_0` = `gen_attr_1`) AND (`gen_attr_2` = `gen_attr_3`))) AS gen_subquery_3))) AS b

View file

@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
group by key, value
having not exists (select distinct a.key
from src a
where b.value = a.value and a.value > 'val_12')
--------------------------------------------------------------------------------
SELECT `gen_attr_2` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_2`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT DISTINCT `gen_attr_4`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3))) AS b

View file

@ -934,6 +934,169 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
}
}
test("broadcast join") {
checkSQL(
"""
|SELECT /*+ MAPJOIN(srcpart) */ subq.key1, z.value
|FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
| FROM src1 x JOIN src y ON (x.key = y.key)) subq
|JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
|ORDER BY subq.key1, z.value
""".stripMargin,
"broadcast_join_subquery")
}
test("subquery using single table") {
checkSQL(
"""
|SELECT a.k, a.c
|FROM (SELECT b.key as k, count(1) as c
| FROM src b
| GROUP BY b.key) a
|WHERE a.k >= 90
""".stripMargin,
"subq2")
}
test("correlated subqueries using EXISTS on where clause") {
checkSQL(
"""
|select *
|from src b
|where exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_9')
""".stripMargin,
"subquery_exists_1")
checkSQL(
"""
|select *
|from (select *
| from src b
| where exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_9')) a
""".stripMargin,
"subquery_exists_2")
}
test("correlated subqueries using EXISTS on having clause") {
checkSQL(
"""
|select b.key, count(*)
|from src b
|group by b.key
|having exists (select a.key
| from src a
| where a.key = b.key and a.value > 'val_9')
""".stripMargin,
"subquery_exists_having_1")
checkSQL(
"""
|select *
|from (select b.key, count(*)
| from src b
| group by b.key
| having exists (select a.key
| from src a
| where a.key = b.key and a.value > 'val_9')) a
""".stripMargin,
"subquery_exists_having_2")
checkSQL(
"""
|select b.key, min(b.value)
|from src b
|group by b.key
|having exists (select a.key
| from src a
| where a.value > 'val_9' and a.value = min(b.value))
""".stripMargin,
"subquery_exists_having_3")
}
test("correlated subqueries using NOT EXISTS on where clause") {
checkSQL(
"""
|select *
|from src b
|where not exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_2')
""".stripMargin,
"subquery_not_exists_1")
checkSQL(
"""
|select *
|from src b
|where not exists (select a.key
| from src a
| where b.value = a.value and a.value > 'val_2')
""".stripMargin,
"subquery_not_exists_2")
}
test("correlated subqueries using NOT EXISTS on having clause") {
checkSQL(
"""
|select *
|from src b
|group by key, value
|having not exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_12')
""".stripMargin,
"subquery_not_exists_having_1")
checkSQL(
"""
|select *
|from src b
|group by key, value
|having not exists (select distinct a.key
| from src a
| where b.value = a.value and a.value > 'val_12')
""".stripMargin,
"subquery_not_exists_having_2")
}
test("subquery using IN on where clause") {
checkSQL(
"""
|SELECT key
|FROM src
|WHERE key in (SELECT max(key) FROM src)
""".stripMargin,
"subquery_in")
}
test("subquery using IN on having clause") {
checkSQL(
"""
|select key, count(*)
|from src
|group by key
|having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key)
|order by key
""".stripMargin,
"subquery_in_having_1")
checkSQL(
"""
|select b.key, min(b.value)
|from src b
|group by b.key
|having b.key in (select a.key
| from src a
| where a.value > 'val_9' and a.value = min(b.value))
|order by b.key
""".stripMargin,
"subquery_in_having_2")
}
test("SPARK-14933 - select orc table") {
withTable("orc_t") {
sql("create table orc_t stored as orc as select 1 as c1, 'abc' as c2")