[SPARK-3058] [SQL] Support EXTENDED for EXPLAIN

Provide `extended` keyword support for `explain` command in SQL. e.g.
```
explain extended select key as a1, value as a2 from src where key=1;
== Parsed Logical Plan ==
Project ['key AS a1#3,'value AS a2#4]
 Filter ('key = 1)
  UnresolvedRelation None, src, None

== Analyzed Logical Plan ==
Project [key#8 AS a1#3,value#9 AS a2#4]
 Filter (CAST(key#8, DoubleType) = CAST(1, DoubleType))
  MetastoreRelation default, src, None

== Optimized Logical Plan ==
Project [key#8 AS a1#3,value#9 AS a2#4]
 Filter (CAST(key#8, DoubleType) = 1.0)
  MetastoreRelation default, src, None

== Physical Plan ==
Project [key#8 AS a1#3,value#9 AS a2#4]
 Filter (CAST(key#8, DoubleType) = 1.0)
  HiveTableScan [key#8,value#9], (MetastoreRelation default, src, None), None

Code Generation: false
== RDD ==
(2) MappedRDD[14] at map at HiveContext.scala:350
  MapPartitionsRDD[13] at mapPartitions at basicOperators.scala:42
  MapPartitionsRDD[12] at mapPartitions at basicOperators.scala:57
  MapPartitionsRDD[11] at mapPartitions at TableReader.scala:112
  MappedRDD[10] at map at TableReader.scala:240
  HadoopRDD[9] at HadoopRDD at TableReader.scala:230
```

It's the sub task of #1847. But can go without any dependency.

Author: Cheng Hao <hao.cheng@intel.com>

Closes #1962 from chenghao-intel/explain_extended and squashes the following commits:

295db74 [Cheng Hao] Fix bug in printing the simple execution plan
48bc989 [Cheng Hao] Support EXTENDED for EXPLAIN
This commit is contained in:
Cheng Hao 2014-08-25 17:43:56 -07:00 committed by Michael Armbrust
parent cae9414d38
commit 156eb39661
8 changed files with 78 additions and 14 deletions

View file

@ -50,7 +50,7 @@ case class SetCommand(key: Option[String], value: Option[String]) extends Comman
* Returned by a parser when the users only wants to see what query plan would be executed, without
* actually performing the execution.
*/
case class ExplainCommand(plan: LogicalPlan) extends Command {
case class ExplainCommand(plan: LogicalPlan, extended: Boolean = false) extends Command {
override def output =
Seq(AttributeReference("plan", StringType, nullable = false)())
}

View file

@ -408,10 +408,18 @@ class SQLContext(@transient val sparkContext: SparkContext)
protected def stringOrError[A](f: => A): String =
try f.toString catch { case e: Throwable => e.toString }
def simpleString: String = stringOrError(executedPlan)
def simpleString: String =
s"""== Physical Plan ==
|${stringOrError(executedPlan)}
"""
override def toString: String =
s"""== Logical Plan ==
// TODO previously will output RDD details by run (${stringOrError(toRdd.toDebugString)})
// however, the `toRdd` will cause the real execution, which is not what we want.
// We need to think about how to avoid the side effect.
s"""== Parsed Logical Plan ==
|${stringOrError(logical)}
|== Analyzed Logical Plan ==
|${stringOrError(analyzed)}
|== Optimized Logical Plan ==
|${stringOrError(optimizedPlan)}
@ -419,7 +427,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
|${stringOrError(executedPlan)}
|Code Generation: ${executedPlan.codegenEnabled}
|== RDD ==
|${stringOrError(toRdd.toDebugString)}
""".stripMargin.trim
}

View file

@ -301,8 +301,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
case logical.SetCommand(key, value) =>
Seq(execution.SetCommand(key, value, plan.output)(context))
case logical.ExplainCommand(logicalPlan) =>
Seq(execution.ExplainCommand(logicalPlan, plan.output)(context))
case logical.ExplainCommand(logicalPlan, extended) =>
Seq(execution.ExplainCommand(logicalPlan, plan.output, extended)(context))
case logical.CacheCommand(tableName, cache) =>
Seq(execution.CacheCommand(tableName, cache)(context))
case _ => Nil

View file

@ -108,15 +108,19 @@ case class SetCommand(
*/
@DeveloperApi
case class ExplainCommand(
logicalPlan: LogicalPlan, output: Seq[Attribute])(
logicalPlan: LogicalPlan, output: Seq[Attribute], extended: Boolean)(
@transient context: SQLContext)
extends LeafNode with Command {
// Run through the optimizer to generate the physical plan.
override protected[sql] lazy val sideEffectResult: Seq[String] = try {
"Physical execution plan:" +: context.executePlan(logicalPlan).executedPlan.toString.split("\n")
// TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
val queryExecution = context.executePlan(logicalPlan)
val outputString = if (extended) queryExecution.toString else queryExecution.simpleString
outputString.split("\n")
} catch { case cause: TreeNodeException[_] =>
"Error occurred during query planning: " +: cause.getMessage.split("\n")
("Error occurred during query planning: \n" + cause.getMessage).split("\n")
}
def execute(): RDD[Row] = {

View file

@ -424,7 +424,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
logical match {
case _: NativeCommand => "<Native command: executed by Hive>"
case _: SetCommand => "<SET command: executed by Hive, and noted by SQLContext>"
case _ => executedPlan.toString
case _ => super.simpleString
}
}
}

View file

@ -409,10 +409,9 @@ private[hive] object HiveQl {
ExplainCommand(NoRelation)
case Token("TOK_EXPLAIN", explainArgs) =>
// Ignore FORMATTED if present.
val Some(query) :: _ :: _ :: Nil =
val Some(query) :: _ :: extended :: Nil =
getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)
// TODO: support EXTENDED?
ExplainCommand(nodeToPlan(query))
ExplainCommand(nodeToPlan(query), extended != None)
case Token("TOK_DESCTABLE", describeArgs) =>
// Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

View file

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.hive.execution
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.Row
/**
* A set of tests that validates support for Hive Explain command.
*/
class HiveExplainSuite extends QueryTest {
private def check(sqlCmd: String, exists: Boolean, keywords: String*) {
val outputs = sql(sqlCmd).collect().map(_.getString(0)).mkString
for (key <- keywords) {
if (exists) {
assert(outputs.contains(key), s"Failed for $sqlCmd ($key doens't exist in result)")
} else {
assert(!outputs.contains(key), s"Failed for $sqlCmd ($key existed in the result)")
}
}
}
test("explain extended command") {
check(" explain select * from src where key=123 ", true,
"== Physical Plan ==")
check(" explain select * from src where key=123 ", false,
"== Parsed Logical Plan ==",
"== Analyzed Logical Plan ==",
"== Optimized Logical Plan ==")
check(" explain extended select * from src where key=123 ", true,
"== Parsed Logical Plan ==",
"== Analyzed Logical Plan ==",
"== Optimized Logical Plan ==",
"== Physical Plan ==",
"Code Generation", "== RDD ==")
}
}

View file

@ -327,7 +327,7 @@ class HiveQuerySuite extends HiveComparisonTest {
def isExplanation(result: SchemaRDD) = {
val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
explanation.size > 1 && explanation.head.startsWith("Physical execution plan")
explanation.exists(_ == "== Physical Plan ==")
}
test("SPARK-1704: Explain commands as a SchemaRDD") {