[SPARK-15047][SQL] Cleanup SQL Parser

## What changes were proposed in this pull request?
This PR addresses a few minor issues in SQL parser:

- Removes some unused rules and keywords in the grammar.
- Removes code path for fallback SQL parsing (was needed for Hive native parsing).
- Use `UnresolvedGenerator` instead of hard-coding `Explode` & `JsonTuple`.
- Adds a more generic way of creating error messages for unsupported Hive features.
- Use `visitFunctionName` as much as possible.
- Interpret a `CatalogColumn`'s `DataType` directly instead of parsing it again.

## How was this patch tested?
Existing tests.

Author: Herman van Hovell <hvanhovell@questtec.nl>

Closes #12826 from hvanhovell/SPARK-15047.
This commit is contained in:
Herman van Hovell 2016-05-02 18:12:31 -07:00 committed by Reynold Xin
parent d37c7f7f04
commit 1c19c2769e
6 changed files with 31 additions and 77 deletions

View file

@ -121,17 +121,13 @@ statement
| UNCACHE TABLE identifier #uncacheTable
| CLEAR CACHE #clearCache
| LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
tableIdentifier partitionSpec? #loadData
tableIdentifier partitionSpec? #loadData
| TRUNCATE TABLE tableIdentifier partitionSpec?
(COLUMNS identifierList)? #truncateTable
| ADD identifier .*? #addResource
| SET ROLE .*? #failNativeCommand
| SET .*? #setConfiguration
| kws=unsupportedHiveNativeCommands .*? #failNativeCommand
| hiveNativeCommands #executeNativeCommand
;
hiveNativeCommands
: TRUNCATE TABLE tableIdentifier partitionSpec?
(COLUMNS identifierList)?
| unsupportedHiveNativeCommands .*? #failNativeCommand
;
unsupportedHiveNativeCommands
@ -267,14 +263,6 @@ nestedConstantList
: '(' constantList (',' constantList)* ')'
;
skewedLocation
: (constant | constantList) EQ STRING
;
skewedLocationList
: '(' skewedLocation (',' skewedLocation)* ')'
;
createFileFormat
: STORED AS fileFormat
| STORED BY storageHandler
@ -609,11 +597,6 @@ explainOption
: LOGICAL | FORMATTED | EXTENDED | CODEGEN
;
transactionMode
: ISOLATION LEVEL SNAPSHOT #isolationLevel
| READ accessMode=(ONLY | WRITE) #transactionAccessMode
;
qualifiedName
: identifier ('.' identifier)*
;
@ -661,8 +644,7 @@ nonReserved
| VIEW | REPLACE
| IF
| NO | DATA
| START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL
| SNAPSHOT | READ | WRITE | ONLY
| START | TRANSACTION | COMMIT | ROLLBACK
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
| EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST
| AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
@ -778,13 +760,6 @@ START: 'START';
TRANSACTION: 'TRANSACTION';
COMMIT: 'COMMIT';
ROLLBACK: 'ROLLBACK';
WORK: 'WORK';
ISOLATION: 'ISOLATION';
LEVEL: 'LEVEL';
SNAPSHOT: 'SNAPSHOT';
READ: 'READ';
WRITE: 'WRITE';
ONLY: 'ONLY';
MACRO: 'MACRO';
IF: 'IF';

View file

@ -81,26 +81,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* ******************************************************************************************** */
protected def plan(tree: ParserRuleContext): LogicalPlan = typedVisit(tree)
/**
* Make sure we do not try to create a plan for a native command.
*/
override def visitExecuteNativeCommand(ctx: ExecuteNativeCommandContext): LogicalPlan = null
/**
* Create a plan for a SHOW FUNCTIONS command.
*/
override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) {
import ctx._
if (qualifiedName != null) {
val names = qualifiedName().identifier().asScala.map(_.getText).toList
names match {
case db :: name :: Nil =>
ShowFunctions(Some(db), Some(name))
case name :: Nil =>
ShowFunctions(None, Some(name))
case _ =>
throw new ParseException("SHOW FUNCTIONS unsupported name", ctx)
}
val name = visitFunctionName(qualifiedName)
ShowFunctions(name.database, Some(name.funcName))
} else if (pattern != null) {
ShowFunctions(None, Some(string(pattern)))
} else {
@ -117,7 +105,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
if (describeFuncName.STRING() != null) {
string(describeFuncName.STRING())
} else if (describeFuncName.qualifiedName() != null) {
describeFuncName.qualifiedName().identifier().asScala.map(_.getText).mkString(".")
visitFunctionName(describeFuncName.qualifiedName).unquotedString
} else {
describeFuncName.getText
}
@ -554,19 +542,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
query: LogicalPlan,
ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) {
val expressions = expressionList(ctx.expression)
// Create the generator.
val generator = ctx.qualifiedName.getText.toLowerCase match {
case "explode" if expressions.size == 1 =>
Explode(expressions.head)
case "json_tuple" =>
JsonTuple(expressions)
case name =>
UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions)
}
Generate(
generator,
UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions),
join = true,
outer = ctx.OUTER != null,
Some(ctx.tblName.getText.toLowerCase),

View file

@ -53,19 +53,15 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
astBuilder.visitSingleStatement(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ => nativeCommand(sqlText)
case _ =>
val position = Origin(None, None)
throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
}
}
/** Get the builder (visitor) which converts a ParseTree into a AST. */
protected def astBuilder: AstBuilder
/** Create a native command, or fail when this is not supported. */
protected def nativeCommand(sqlText: String): LogicalPlan = {
val position = Origin(None, None)
throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
}
protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
logInfo(s"Parsing command: $command")

View file

@ -53,7 +53,7 @@ class PlanParserSuite extends PlanTest {
assertEqual("show functions foo", ShowFunctions(None, Some("foo")))
assertEqual("show functions foo.bar", ShowFunctions(Some("foo"), Some("bar")))
assertEqual("show functions 'foo\\\\.*'", ShowFunctions(None, Some("foo\\.*")))
intercept("show functions foo.bar.baz", "SHOW FUNCTIONS unsupported name")
intercept("show functions foo.bar.baz", "Unsupported function name")
}
test("describe function") {
@ -263,11 +263,14 @@ class PlanParserSuite extends PlanTest {
}
test("lateral view") {
val explode = UnresolvedGenerator(FunctionIdentifier("explode"), Seq('x))
val jsonTuple = UnresolvedGenerator(FunctionIdentifier("json_tuple"), Seq('x, 'y))
// Single lateral view
assertEqual(
"select * from t lateral view explode(x) expl as x",
table("t")
.generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x"))
.generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
.select(star()))
// Multiple lateral views
@ -277,12 +280,12 @@ class PlanParserSuite extends PlanTest {
|lateral view explode(x) expl
|lateral view outer json_tuple(x, y) jtup q, z""".stripMargin,
table("t")
.generate(Explode('x), join = true, outer = false, Some("expl"), Seq.empty)
.generate(JsonTuple(Seq('x, 'y)), join = true, outer = true, Some("jtup"), Seq("q", "z"))
.generate(explode, join = true, outer = false, Some("expl"), Seq.empty)
.generate(jsonTuple, join = true, outer = true, Some("jtup"), Seq("q", "z"))
.select(star()))
// Multi-Insert lateral views.
val from = table("t1").generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x"))
val from = table("t1").generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
assertEqual(
"""from t1
|lateral view explode(x) expl as x
@ -294,7 +297,7 @@ class PlanParserSuite extends PlanTest {
|where s < 10
""".stripMargin,
Union(from
.generate(JsonTuple(Seq('x, 'y)), join = true, outer = false, Some("jtup"), Seq("q", "z"))
.generate(jsonTuple, join = true, outer = false, Some("jtup"), Seq("q", "z"))
.select(star())
.insertInto("t2"),
from.where('s < 10).select(star()).insertInto("t3")))

View file

@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
import scala.util.Try
import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.antlr.v4.runtime.tree.TerminalNode
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.catalyst.TableIdentifier
@ -31,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation,
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
import org.apache.spark.sql.types.DataType
/**
* Concrete parser for Spark SQL statements.
@ -780,9 +781,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
*/
override def visitFailNativeCommand(
ctx: FailNativeCommandContext): LogicalPlan = withOrigin(ctx) {
val keywords = if (ctx.kws != null) {
Seq(ctx.kws.kw1, ctx.kws.kw2, ctx.kws.kw3, ctx.kws.kw4, ctx.kws.kw5, ctx.kws.kw6)
.filter(_ != null).map(_.getText).mkString(" ")
val keywords = if (ctx.unsupportedHiveNativeCommands != null) {
ctx.unsupportedHiveNativeCommands.children.asScala.collect {
case n: TerminalNode => n.getText
}.mkString(" ")
} else {
// SET ROLE is the exception to the rule, because we handle this before other SET commands.
"SET ROLE"
@ -1109,7 +1111,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
// just convert the whole type string to lower case, otherwise the struct field names
// will no longer be case sensitive. Instead, we rely on our parser to get the proper
// case before passing it to Hive.
CatalystSqlParser.parseDataType(col.dataType.getText).catalogString,
typedVisit[DataType](col.dataType).catalogString,
nullable = true,
Option(col.STRING).map(string))
}

View file

@ -254,12 +254,13 @@ class HiveDDLCommandSuite extends PlanTest {
}
test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
val plan = parser.parsePlan(
val analyzer = TestHive.sparkSession.sessionState.analyzer
val plan = analyzer.execute(parser.parsePlan(
"""
|SELECT *
|FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
|LATERAL VIEW json_tuple(json, 'f1', 'f2') jt AS a, b
""".stripMargin)
""".stripMargin))
assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple])
}