[SPARK-3058] [SQL] Support EXTENDED for EXPLAIN
Provide `extended` keyword support for `explain` command in SQL. e.g. ``` explain extended select key as a1, value as a2 from src where key=1; == Parsed Logical Plan == Project ['key AS a1#3,'value AS a2#4] Filter ('key = 1) UnresolvedRelation None, src, None == Analyzed Logical Plan == Project [key#8 AS a1#3,value#9 AS a2#4] Filter (CAST(key#8, DoubleType) = CAST(1, DoubleType)) MetastoreRelation default, src, None == Optimized Logical Plan == Project [key#8 AS a1#3,value#9 AS a2#4] Filter (CAST(key#8, DoubleType) = 1.0) MetastoreRelation default, src, None == Physical Plan == Project [key#8 AS a1#3,value#9 AS a2#4] Filter (CAST(key#8, DoubleType) = 1.0) HiveTableScan [key#8,value#9], (MetastoreRelation default, src, None), None Code Generation: false == RDD == (2) MappedRDD[14] at map at HiveContext.scala:350 MapPartitionsRDD[13] at mapPartitions at basicOperators.scala:42 MapPartitionsRDD[12] at mapPartitions at basicOperators.scala:57 MapPartitionsRDD[11] at mapPartitions at TableReader.scala:112 MappedRDD[10] at map at TableReader.scala:240 HadoopRDD[9] at HadoopRDD at TableReader.scala:230 ``` It's the sub task of #1847. But can go without any dependency. Author: Cheng Hao <hao.cheng@intel.com> Closes #1962 from chenghao-intel/explain_extended and squashes the following commits: 295db74 [Cheng Hao] Fix bug in printing the simple execution plan 48bc989 [Cheng Hao] Support EXTENDED for EXPLAIN
This commit is contained in:
parent
cae9414d38
commit
156eb39661
|
@ -50,7 +50,7 @@ case class SetCommand(key: Option[String], value: Option[String]) extends Comman
|
|||
* Returned by a parser when the users only wants to see what query plan would be executed, without
|
||||
* actually performing the execution.
|
||||
*/
|
||||
case class ExplainCommand(plan: LogicalPlan) extends Command {
|
||||
case class ExplainCommand(plan: LogicalPlan, extended: Boolean = false) extends Command {
|
||||
override def output =
|
||||
Seq(AttributeReference("plan", StringType, nullable = false)())
|
||||
}
|
||||
|
|
|
@ -408,10 +408,18 @@ class SQLContext(@transient val sparkContext: SparkContext)
|
|||
protected def stringOrError[A](f: => A): String =
|
||||
try f.toString catch { case e: Throwable => e.toString }
|
||||
|
||||
def simpleString: String = stringOrError(executedPlan)
|
||||
def simpleString: String =
|
||||
s"""== Physical Plan ==
|
||||
|${stringOrError(executedPlan)}
|
||||
"""
|
||||
|
||||
override def toString: String =
|
||||
s"""== Logical Plan ==
|
||||
// TODO previously will output RDD details by run (${stringOrError(toRdd.toDebugString)})
|
||||
// however, the `toRdd` will cause the real execution, which is not what we want.
|
||||
// We need to think about how to avoid the side effect.
|
||||
s"""== Parsed Logical Plan ==
|
||||
|${stringOrError(logical)}
|
||||
|== Analyzed Logical Plan ==
|
||||
|${stringOrError(analyzed)}
|
||||
|== Optimized Logical Plan ==
|
||||
|${stringOrError(optimizedPlan)}
|
||||
|
@ -419,7 +427,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
|
|||
|${stringOrError(executedPlan)}
|
||||
|Code Generation: ${executedPlan.codegenEnabled}
|
||||
|== RDD ==
|
||||
|${stringOrError(toRdd.toDebugString)}
|
||||
""".stripMargin.trim
|
||||
}
|
||||
|
||||
|
|
|
@ -301,8 +301,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
|
|||
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
|
||||
case logical.SetCommand(key, value) =>
|
||||
Seq(execution.SetCommand(key, value, plan.output)(context))
|
||||
case logical.ExplainCommand(logicalPlan) =>
|
||||
Seq(execution.ExplainCommand(logicalPlan, plan.output)(context))
|
||||
case logical.ExplainCommand(logicalPlan, extended) =>
|
||||
Seq(execution.ExplainCommand(logicalPlan, plan.output, extended)(context))
|
||||
case logical.CacheCommand(tableName, cache) =>
|
||||
Seq(execution.CacheCommand(tableName, cache)(context))
|
||||
case _ => Nil
|
||||
|
|
|
@ -108,15 +108,19 @@ case class SetCommand(
|
|||
*/
|
||||
@DeveloperApi
|
||||
case class ExplainCommand(
|
||||
logicalPlan: LogicalPlan, output: Seq[Attribute])(
|
||||
logicalPlan: LogicalPlan, output: Seq[Attribute], extended: Boolean)(
|
||||
@transient context: SQLContext)
|
||||
extends LeafNode with Command {
|
||||
|
||||
// Run through the optimizer to generate the physical plan.
|
||||
override protected[sql] lazy val sideEffectResult: Seq[String] = try {
|
||||
"Physical execution plan:" +: context.executePlan(logicalPlan).executedPlan.toString.split("\n")
|
||||
// TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
|
||||
val queryExecution = context.executePlan(logicalPlan)
|
||||
val outputString = if (extended) queryExecution.toString else queryExecution.simpleString
|
||||
|
||||
outputString.split("\n")
|
||||
} catch { case cause: TreeNodeException[_] =>
|
||||
"Error occurred during query planning: " +: cause.getMessage.split("\n")
|
||||
("Error occurred during query planning: \n" + cause.getMessage).split("\n")
|
||||
}
|
||||
|
||||
def execute(): RDD[Row] = {
|
||||
|
|
|
@ -424,7 +424,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
|
|||
logical match {
|
||||
case _: NativeCommand => "<Native command: executed by Hive>"
|
||||
case _: SetCommand => "<SET command: executed by Hive, and noted by SQLContext>"
|
||||
case _ => executedPlan.toString
|
||||
case _ => super.simpleString
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -409,10 +409,9 @@ private[hive] object HiveQl {
|
|||
ExplainCommand(NoRelation)
|
||||
case Token("TOK_EXPLAIN", explainArgs) =>
|
||||
// Ignore FORMATTED if present.
|
||||
val Some(query) :: _ :: _ :: Nil =
|
||||
val Some(query) :: _ :: extended :: Nil =
|
||||
getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)
|
||||
// TODO: support EXTENDED?
|
||||
ExplainCommand(nodeToPlan(query))
|
||||
ExplainCommand(nodeToPlan(query), extended != None)
|
||||
|
||||
case Token("TOK_DESCTABLE", describeArgs) =>
|
||||
// Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.hive.execution
|
||||
|
||||
import org.apache.spark.sql.QueryTest
|
||||
import org.apache.spark.sql.hive.test.TestHive
|
||||
import org.apache.spark.sql.hive.test.TestHive._
|
||||
import org.apache.spark.sql.Row
|
||||
|
||||
/**
|
||||
* A set of tests that validates support for Hive Explain command.
|
||||
*/
|
||||
class HiveExplainSuite extends QueryTest {
|
||||
private def check(sqlCmd: String, exists: Boolean, keywords: String*) {
|
||||
val outputs = sql(sqlCmd).collect().map(_.getString(0)).mkString
|
||||
for (key <- keywords) {
|
||||
if (exists) {
|
||||
assert(outputs.contains(key), s"Failed for $sqlCmd ($key doens't exist in result)")
|
||||
} else {
|
||||
assert(!outputs.contains(key), s"Failed for $sqlCmd ($key existed in the result)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("explain extended command") {
|
||||
check(" explain select * from src where key=123 ", true,
|
||||
"== Physical Plan ==")
|
||||
check(" explain select * from src where key=123 ", false,
|
||||
"== Parsed Logical Plan ==",
|
||||
"== Analyzed Logical Plan ==",
|
||||
"== Optimized Logical Plan ==")
|
||||
check(" explain extended select * from src where key=123 ", true,
|
||||
"== Parsed Logical Plan ==",
|
||||
"== Analyzed Logical Plan ==",
|
||||
"== Optimized Logical Plan ==",
|
||||
"== Physical Plan ==",
|
||||
"Code Generation", "== RDD ==")
|
||||
}
|
||||
}
|
|
@ -327,7 +327,7 @@ class HiveQuerySuite extends HiveComparisonTest {
|
|||
|
||||
def isExplanation(result: SchemaRDD) = {
|
||||
val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
|
||||
explanation.size > 1 && explanation.head.startsWith("Physical execution plan")
|
||||
explanation.exists(_ == "== Physical Plan ==")
|
||||
}
|
||||
|
||||
test("SPARK-1704: Explain commands as a SchemaRDD") {
|
||||
|
|
Loading…
Reference in a new issue