[SPARK-15047][SQL] Cleanup SQL Parser
## What changes were proposed in this pull request? This PR addresses a few minor issues in SQL parser: - Removes some unused rules and keywords in the grammar. - Removes code path for fallback SQL parsing (was needed for Hive native parsing). - Use `UnresolvedGenerator` instead of hard-coding `Explode` & `JsonTuple`. - Adds a more generic way of creating error messages for unsupported Hive features. - Use `visitFunctionName` as much as possible. - Interpret a `CatalogColumn`'s `DataType` directly instead of parsing it again. ## How was this patch tested? Existing tests. Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #12826 from hvanhovell/SPARK-15047.
This commit is contained in:
parent
d37c7f7f04
commit
1c19c2769e
|
@ -121,17 +121,13 @@ statement
|
|||
| UNCACHE TABLE identifier #uncacheTable
|
||||
| CLEAR CACHE #clearCache
|
||||
| LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
|
||||
tableIdentifier partitionSpec? #loadData
|
||||
tableIdentifier partitionSpec? #loadData
|
||||
| TRUNCATE TABLE tableIdentifier partitionSpec?
|
||||
(COLUMNS identifierList)? #truncateTable
|
||||
| ADD identifier .*? #addResource
|
||||
| SET ROLE .*? #failNativeCommand
|
||||
| SET .*? #setConfiguration
|
||||
| kws=unsupportedHiveNativeCommands .*? #failNativeCommand
|
||||
| hiveNativeCommands #executeNativeCommand
|
||||
;
|
||||
|
||||
hiveNativeCommands
|
||||
: TRUNCATE TABLE tableIdentifier partitionSpec?
|
||||
(COLUMNS identifierList)?
|
||||
| unsupportedHiveNativeCommands .*? #failNativeCommand
|
||||
;
|
||||
|
||||
unsupportedHiveNativeCommands
|
||||
|
@ -267,14 +263,6 @@ nestedConstantList
|
|||
: '(' constantList (',' constantList)* ')'
|
||||
;
|
||||
|
||||
skewedLocation
|
||||
: (constant | constantList) EQ STRING
|
||||
;
|
||||
|
||||
skewedLocationList
|
||||
: '(' skewedLocation (',' skewedLocation)* ')'
|
||||
;
|
||||
|
||||
createFileFormat
|
||||
: STORED AS fileFormat
|
||||
| STORED BY storageHandler
|
||||
|
@ -609,11 +597,6 @@ explainOption
|
|||
: LOGICAL | FORMATTED | EXTENDED | CODEGEN
|
||||
;
|
||||
|
||||
transactionMode
|
||||
: ISOLATION LEVEL SNAPSHOT #isolationLevel
|
||||
| READ accessMode=(ONLY | WRITE) #transactionAccessMode
|
||||
;
|
||||
|
||||
qualifiedName
|
||||
: identifier ('.' identifier)*
|
||||
;
|
||||
|
@ -661,8 +644,7 @@ nonReserved
|
|||
| VIEW | REPLACE
|
||||
| IF
|
||||
| NO | DATA
|
||||
| START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL
|
||||
| SNAPSHOT | READ | WRITE | ONLY
|
||||
| START | TRANSACTION | COMMIT | ROLLBACK
|
||||
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
|
||||
| EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST
|
||||
| AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
|
||||
|
@ -778,13 +760,6 @@ START: 'START';
|
|||
TRANSACTION: 'TRANSACTION';
|
||||
COMMIT: 'COMMIT';
|
||||
ROLLBACK: 'ROLLBACK';
|
||||
WORK: 'WORK';
|
||||
ISOLATION: 'ISOLATION';
|
||||
LEVEL: 'LEVEL';
|
||||
SNAPSHOT: 'SNAPSHOT';
|
||||
READ: 'READ';
|
||||
WRITE: 'WRITE';
|
||||
ONLY: 'ONLY';
|
||||
MACRO: 'MACRO';
|
||||
|
||||
IF: 'IF';
|
||||
|
|
|
@ -81,26 +81,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
|
|||
* ******************************************************************************************** */
|
||||
protected def plan(tree: ParserRuleContext): LogicalPlan = typedVisit(tree)
|
||||
|
||||
/**
|
||||
* Make sure we do not try to create a plan for a native command.
|
||||
*/
|
||||
override def visitExecuteNativeCommand(ctx: ExecuteNativeCommandContext): LogicalPlan = null
|
||||
|
||||
/**
|
||||
* Create a plan for a SHOW FUNCTIONS command.
|
||||
*/
|
||||
override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) {
|
||||
import ctx._
|
||||
if (qualifiedName != null) {
|
||||
val names = qualifiedName().identifier().asScala.map(_.getText).toList
|
||||
names match {
|
||||
case db :: name :: Nil =>
|
||||
ShowFunctions(Some(db), Some(name))
|
||||
case name :: Nil =>
|
||||
ShowFunctions(None, Some(name))
|
||||
case _ =>
|
||||
throw new ParseException("SHOW FUNCTIONS unsupported name", ctx)
|
||||
}
|
||||
val name = visitFunctionName(qualifiedName)
|
||||
ShowFunctions(name.database, Some(name.funcName))
|
||||
} else if (pattern != null) {
|
||||
ShowFunctions(None, Some(string(pattern)))
|
||||
} else {
|
||||
|
@ -117,7 +105,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
|
|||
if (describeFuncName.STRING() != null) {
|
||||
string(describeFuncName.STRING())
|
||||
} else if (describeFuncName.qualifiedName() != null) {
|
||||
describeFuncName.qualifiedName().identifier().asScala.map(_.getText).mkString(".")
|
||||
visitFunctionName(describeFuncName.qualifiedName).unquotedString
|
||||
} else {
|
||||
describeFuncName.getText
|
||||
}
|
||||
|
@ -554,19 +542,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
|
|||
query: LogicalPlan,
|
||||
ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) {
|
||||
val expressions = expressionList(ctx.expression)
|
||||
|
||||
// Create the generator.
|
||||
val generator = ctx.qualifiedName.getText.toLowerCase match {
|
||||
case "explode" if expressions.size == 1 =>
|
||||
Explode(expressions.head)
|
||||
case "json_tuple" =>
|
||||
JsonTuple(expressions)
|
||||
case name =>
|
||||
UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions)
|
||||
}
|
||||
|
||||
Generate(
|
||||
generator,
|
||||
UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions),
|
||||
join = true,
|
||||
outer = ctx.OUTER != null,
|
||||
Some(ctx.tblName.getText.toLowerCase),
|
||||
|
|
|
@ -53,19 +53,15 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
|
|||
override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
|
||||
astBuilder.visitSingleStatement(parser.singleStatement()) match {
|
||||
case plan: LogicalPlan => plan
|
||||
case _ => nativeCommand(sqlText)
|
||||
case _ =>
|
||||
val position = Origin(None, None)
|
||||
throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the builder (visitor) which converts a ParseTree into a AST. */
|
||||
protected def astBuilder: AstBuilder
|
||||
|
||||
/** Create a native command, or fail when this is not supported. */
|
||||
protected def nativeCommand(sqlText: String): LogicalPlan = {
|
||||
val position = Origin(None, None)
|
||||
throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
|
||||
}
|
||||
|
||||
protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
|
||||
logInfo(s"Parsing command: $command")
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ class PlanParserSuite extends PlanTest {
|
|||
assertEqual("show functions foo", ShowFunctions(None, Some("foo")))
|
||||
assertEqual("show functions foo.bar", ShowFunctions(Some("foo"), Some("bar")))
|
||||
assertEqual("show functions 'foo\\\\.*'", ShowFunctions(None, Some("foo\\.*")))
|
||||
intercept("show functions foo.bar.baz", "SHOW FUNCTIONS unsupported name")
|
||||
intercept("show functions foo.bar.baz", "Unsupported function name")
|
||||
}
|
||||
|
||||
test("describe function") {
|
||||
|
@ -263,11 +263,14 @@ class PlanParserSuite extends PlanTest {
|
|||
}
|
||||
|
||||
test("lateral view") {
|
||||
val explode = UnresolvedGenerator(FunctionIdentifier("explode"), Seq('x))
|
||||
val jsonTuple = UnresolvedGenerator(FunctionIdentifier("json_tuple"), Seq('x, 'y))
|
||||
|
||||
// Single lateral view
|
||||
assertEqual(
|
||||
"select * from t lateral view explode(x) expl as x",
|
||||
table("t")
|
||||
.generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x"))
|
||||
.generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
|
||||
.select(star()))
|
||||
|
||||
// Multiple lateral views
|
||||
|
@ -277,12 +280,12 @@ class PlanParserSuite extends PlanTest {
|
|||
|lateral view explode(x) expl
|
||||
|lateral view outer json_tuple(x, y) jtup q, z""".stripMargin,
|
||||
table("t")
|
||||
.generate(Explode('x), join = true, outer = false, Some("expl"), Seq.empty)
|
||||
.generate(JsonTuple(Seq('x, 'y)), join = true, outer = true, Some("jtup"), Seq("q", "z"))
|
||||
.generate(explode, join = true, outer = false, Some("expl"), Seq.empty)
|
||||
.generate(jsonTuple, join = true, outer = true, Some("jtup"), Seq("q", "z"))
|
||||
.select(star()))
|
||||
|
||||
// Multi-Insert lateral views.
|
||||
val from = table("t1").generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x"))
|
||||
val from = table("t1").generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
|
||||
assertEqual(
|
||||
"""from t1
|
||||
|lateral view explode(x) expl as x
|
||||
|
@ -294,7 +297,7 @@ class PlanParserSuite extends PlanTest {
|
|||
|where s < 10
|
||||
""".stripMargin,
|
||||
Union(from
|
||||
.generate(JsonTuple(Seq('x, 'y)), join = true, outer = false, Some("jtup"), Seq("q", "z"))
|
||||
.generate(jsonTuple, join = true, outer = false, Some("jtup"), Seq("q", "z"))
|
||||
.select(star())
|
||||
.insertInto("t2"),
|
||||
from.where('s < 10).select(star()).insertInto("t3")))
|
||||
|
|
|
@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
|
|||
import scala.util.Try
|
||||
|
||||
import org.antlr.v4.runtime.{ParserRuleContext, Token}
|
||||
import org.antlr.v4.runtime.tree.TerminalNode
|
||||
|
||||
import org.apache.spark.sql.SaveMode
|
||||
import org.apache.spark.sql.catalyst.TableIdentifier
|
||||
|
@ -31,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation,
|
|||
import org.apache.spark.sql.execution.command._
|
||||
import org.apache.spark.sql.execution.datasources._
|
||||
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
|
||||
|
||||
import org.apache.spark.sql.types.DataType
|
||||
|
||||
/**
|
||||
* Concrete parser for Spark SQL statements.
|
||||
|
@ -780,9 +781,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
|
|||
*/
|
||||
override def visitFailNativeCommand(
|
||||
ctx: FailNativeCommandContext): LogicalPlan = withOrigin(ctx) {
|
||||
val keywords = if (ctx.kws != null) {
|
||||
Seq(ctx.kws.kw1, ctx.kws.kw2, ctx.kws.kw3, ctx.kws.kw4, ctx.kws.kw5, ctx.kws.kw6)
|
||||
.filter(_ != null).map(_.getText).mkString(" ")
|
||||
val keywords = if (ctx.unsupportedHiveNativeCommands != null) {
|
||||
ctx.unsupportedHiveNativeCommands.children.asScala.collect {
|
||||
case n: TerminalNode => n.getText
|
||||
}.mkString(" ")
|
||||
} else {
|
||||
// SET ROLE is the exception to the rule, because we handle this before other SET commands.
|
||||
"SET ROLE"
|
||||
|
@ -1109,7 +1111,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
|
|||
// just convert the whole type string to lower case, otherwise the struct field names
|
||||
// will no longer be case sensitive. Instead, we rely on our parser to get the proper
|
||||
// case before passing it to Hive.
|
||||
CatalystSqlParser.parseDataType(col.dataType.getText).catalogString,
|
||||
typedVisit[DataType](col.dataType).catalogString,
|
||||
nullable = true,
|
||||
Option(col.STRING).map(string))
|
||||
}
|
||||
|
|
|
@ -254,12 +254,13 @@ class HiveDDLCommandSuite extends PlanTest {
|
|||
}
|
||||
|
||||
test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
|
||||
val plan = parser.parsePlan(
|
||||
val analyzer = TestHive.sparkSession.sessionState.analyzer
|
||||
val plan = analyzer.execute(parser.parsePlan(
|
||||
"""
|
||||
|SELECT *
|
||||
|FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
|
||||
|LATERAL VIEW json_tuple(json, 'f1', 'f2') jt AS a, b
|
||||
""".stripMargin)
|
||||
""".stripMargin))
|
||||
|
||||
assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple])
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue