[SPARK-11540][SQL] API audit for QueryExecutionListener.

Author: Reynold Xin <rxin@databricks.com>

Closes #9509 from rxin/SPARK-11540.
This commit is contained in:
Reynold Xin 2015-11-05 18:12:54 -08:00
parent 5e31db70bb
commit 3cc2c053b5
2 changed files with 88 additions and 75 deletions

View file

@ -17,6 +17,8 @@
package org.apache.spark.sql.execution package org.apache.spark.sql.execution
import com.google.common.annotations.VisibleForTesting
import org.apache.spark.rdd.RDD import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.InternalRow
@ -25,31 +27,33 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
/** /**
* The primary workflow for executing relational queries using Spark. Designed to allow easy * The primary workflow for executing relational queries using Spark. Designed to allow easy
* access to the intermediate phases of query execution for developers. * access to the intermediate phases of query execution for developers.
*
* While this is not a public class, we should avoid changing the function names for the sake of
* changing them, because a lot of developers use the feature for debugging.
*/ */
class QueryExecution(val sqlContext: SQLContext, val logical: LogicalPlan) { class QueryExecution(val sqlContext: SQLContext, val logical: LogicalPlan) {
val analyzer = sqlContext.analyzer
val optimizer = sqlContext.optimizer
val planner = sqlContext.planner
val cacheManager = sqlContext.cacheManager
val prepareForExecution = sqlContext.prepareForExecution
def assertAnalyzed(): Unit = analyzer.checkAnalysis(analyzed) @VisibleForTesting
def assertAnalyzed(): Unit = sqlContext.analyzer.checkAnalysis(analyzed)
lazy val analyzed: LogicalPlan = sqlContext.analyzer.execute(logical)
lazy val analyzed: LogicalPlan = analyzer.execute(logical)
lazy val withCachedData: LogicalPlan = { lazy val withCachedData: LogicalPlan = {
assertAnalyzed() assertAnalyzed()
cacheManager.useCachedData(analyzed) sqlContext.cacheManager.useCachedData(analyzed)
} }
lazy val optimizedPlan: LogicalPlan = optimizer.execute(withCachedData)
lazy val optimizedPlan: LogicalPlan = sqlContext.optimizer.execute(withCachedData)
// TODO: Don't just pick the first one... // TODO: Don't just pick the first one...
lazy val sparkPlan: SparkPlan = { lazy val sparkPlan: SparkPlan = {
SparkPlan.currentContext.set(sqlContext) SparkPlan.currentContext.set(sqlContext)
planner.plan(optimizedPlan).next() sqlContext.planner.plan(optimizedPlan).next()
} }
// executedPlan should not be used to initialize any SparkPlan. It should be // executedPlan should not be used to initialize any SparkPlan. It should be
// only used for execution. // only used for execution.
lazy val executedPlan: SparkPlan = prepareForExecution.execute(sparkPlan) lazy val executedPlan: SparkPlan = sqlContext.prepareForExecution.execute(sparkPlan)
/** Internal version of the RDD. Avoids copies and has no schema */ /** Internal version of the RDD. Avoids copies and has no schema */
lazy val toRdd: RDD[InternalRow] = executedPlan.execute() lazy val toRdd: RDD[InternalRow] = executedPlan.execute()
@ -57,11 +61,11 @@ class QueryExecution(val sqlContext: SQLContext, val logical: LogicalPlan) {
protected def stringOrError[A](f: => A): String = protected def stringOrError[A](f: => A): String =
try f.toString catch { case e: Throwable => e.toString } try f.toString catch { case e: Throwable => e.toString }
def simpleString: String = def simpleString: String = {
s"""== Physical Plan == s"""== Physical Plan ==
|${stringOrError(executedPlan)} |${stringOrError(executedPlan)}
""".stripMargin.trim """.stripMargin.trim
}
override def toString: String = { override def toString: String = {
def output = def output =

View file

@ -19,36 +19,38 @@ package org.apache.spark.sql.util
import java.util.concurrent.locks.ReentrantReadWriteLock import java.util.concurrent.locks.ReentrantReadWriteLock
import scala.collection.mutable.ListBuffer import scala.collection.mutable.ListBuffer
import scala.util.control.NonFatal
import org.apache.spark.annotation.{DeveloperApi, Experimental}
import org.apache.spark.Logging import org.apache.spark.Logging
import org.apache.spark.annotation.{DeveloperApi, Experimental}
import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.QueryExecution
/** /**
* :: Experimental ::
* The interface of query execution listener that can be used to analyze execution metrics. * The interface of query execution listener that can be used to analyze execution metrics.
* *
* Note that implementations should guarantee thread-safety as they will be used in a non * Note that implementations should guarantee thread-safety as they can be invoked by
* thread-safe way. * multiple different threads.
*/ */
@Experimental @Experimental
trait QueryExecutionListener { trait QueryExecutionListener {
/** /**
* A callback function that will be called when a query executed successfully. * A callback function that will be called when a query executed successfully.
* Implementations should guarantee thread-safe. * Note that this can be invoked by multiple different threads.
* *
* @param funcName the name of the action that triggered this query. * @param funcName name of the action that triggered this query.
* @param qe the QueryExecution object that carries detail information like logical plan, * @param qe the QueryExecution object that carries detail information like logical plan,
* physical plan, etc. * physical plan, etc.
* @param duration the execution time for this query in nanoseconds. * @param durationNs the execution time for this query in nanoseconds.
*/ */
@DeveloperApi @DeveloperApi
def onSuccess(funcName: String, qe: QueryExecution, duration: Long) def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit
/** /**
* A callback function that will be called when a query execution failed. * A callback function that will be called when a query execution failed.
* Implementations should guarantee thread-safe. * Note that this can be invoked by multiple different threads.
* *
* @param funcName the name of the action that triggered this query. * @param funcName the name of the action that triggered this query.
* @param qe the QueryExecution object that carries detail information like logical plan, * @param qe the QueryExecution object that carries detail information like logical plan,
@ -56,14 +58,73 @@ trait QueryExecutionListener {
* @param exception the exception that failed this query. * @param exception the exception that failed this query.
*/ */
@DeveloperApi @DeveloperApi
def onFailure(funcName: String, qe: QueryExecution, exception: Exception) def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit
} }
/**
* :: Experimental ::
*
* Manager for [[QueryExecutionListener]]. See [[org.apache.spark.sql.SQLContext.listenerManager]].
*/
@Experimental @Experimental
class ExecutionListenerManager extends Logging { class ExecutionListenerManager private[sql] () extends Logging {
/**
* Registers the specified [[QueryExecutionListener]].
*/
@DeveloperApi
def register(listener: QueryExecutionListener): Unit = writeLock {
listeners += listener
}
/**
* Unregisters the specified [[QueryExecutionListener]].
*/
@DeveloperApi
def unregister(listener: QueryExecutionListener): Unit = writeLock {
listeners -= listener
}
/**
* Removes all the registered [[QueryExecutionListener]].
*/
@DeveloperApi
def clear(): Unit = writeLock {
listeners.clear()
}
private[sql] def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
readLock {
withErrorHandling { listener =>
listener.onSuccess(funcName, qe, duration)
}
}
}
private[sql] def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
readLock {
withErrorHandling { listener =>
listener.onFailure(funcName, qe, exception)
}
}
}
private[this] val listeners = ListBuffer.empty[QueryExecutionListener] private[this] val listeners = ListBuffer.empty[QueryExecutionListener]
/** A lock to prevent updating the list of listeners while we are traversing through them. */
private[this] val lock = new ReentrantReadWriteLock() private[this] val lock = new ReentrantReadWriteLock()
private def withErrorHandling(f: QueryExecutionListener => Unit): Unit = {
for (listener <- listeners) {
try {
f(listener)
} catch {
case NonFatal(e) => logWarning("Error executing query execution listener", e)
}
}
}
/** Acquires a read lock on the cache for the duration of `f`. */ /** Acquires a read lock on the cache for the duration of `f`. */
private def readLock[A](f: => A): A = { private def readLock[A](f: => A): A = {
val rl = lock.readLock() val rl = lock.readLock()
@ -81,56 +142,4 @@ class ExecutionListenerManager extends Logging {
wl.unlock() wl.unlock()
} }
} }
/**
* Registers the specified QueryExecutionListener.
*/
@DeveloperApi
def register(listener: QueryExecutionListener): Unit = writeLock {
listeners += listener
}
/**
* Unregisters the specified QueryExecutionListener.
*/
@DeveloperApi
def unregister(listener: QueryExecutionListener): Unit = writeLock {
listeners -= listener
}
/**
* clears out all registered QueryExecutionListeners.
*/
@DeveloperApi
def clear(): Unit = writeLock {
listeners.clear()
}
private[sql] def onSuccess(
funcName: String,
qe: QueryExecution,
duration: Long): Unit = readLock {
withErrorHandling { listener =>
listener.onSuccess(funcName, qe, duration)
}
}
private[sql] def onFailure(
funcName: String,
qe: QueryExecution,
exception: Exception): Unit = readLock {
withErrorHandling { listener =>
listener.onFailure(funcName, qe, exception)
}
}
private def withErrorHandling(f: QueryExecutionListener => Unit): Unit = {
for (listener <- listeners) {
try {
f(listener)
} catch {
case e: Exception => logWarning("error executing query execution listener", e)
}
}
}
} }