[SPARK-13205][SQL] SQL Generation Support for Self Join

This PR addresses two issues:
  - Self join does not work in SQL Generation
  - When creating new instances for `LogicalRelation`, `metastoreTableIdentifier` is lost.

liancheng Could you please review the code changes? Thank you!

Author: gatorsmile <gatorsmile@gmail.com>

Closes #11084 from gatorsmile/selfJoinInSQLGen.
This commit is contained in:
gatorsmile 2016-02-11 11:08:21 +08:00 committed by Cheng Lian
parent 663cc400f3
commit 0f09f02269
3 changed files with 22 additions and 2 deletions

View file

@ -76,7 +76,11 @@ case class LogicalRelation(
/** Used to lookup original attribute capitalization */
val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o)))
def newInstance(): this.type = LogicalRelation(relation).asInstanceOf[this.type]
def newInstance(): this.type =
LogicalRelation(
relation,
expectedOutputAttributes,
metastoreTableIdentifier).asInstanceOf[this.type]
override def simpleString: String = s"Relation[${output.mkString(",")}] $relation"
}

View file

@ -142,7 +142,15 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
Some(s"`$database`.`$table`")
case Subquery(alias, child) =>
toSQL(child).map(childSQL => s"($childSQL) AS $alias")
toSQL(child).map( childSQL =>
child match {
// Parentheses is not used for persisted data source relations
// e.g., select x.c1 from (t1) as x inner join (t1) as y on x.c1 = y.c1
case Subquery(_, _: LogicalRelation | _: MetastoreRelation) =>
s"$childSQL AS $alias"
case _ =>
s"($childSQL) AS $alias"
})
case Join(left, right, joinType, condition) =>
for {

View file

@ -104,6 +104,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0")
}
test("self join") {
checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
}
test("self join with group by") {
checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key")
}
test("three-child union") {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
}