[SPARK-11540][SQL] API audit for QueryExecutionListener.

Author: Reynold Xin <rxin@databricks.com> Closes #9509 from rxin/SPARK-11540.
2015-11-05 18:12:54 -08:00 · 2015-11-05 18:12:54 -08:00 · 3cc2c053b5
parent 5e31db70bb
commit 3cc2c053b5
2 changed files with 88 additions and 75 deletions
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@ -17,6 +17,8 @@

 package org.apache.spark.sql.execution

+import com.google.common.annotations.VisibleForTesting
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.InternalRow
@ -25,31 +27,33 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 /**
 * The primary workflow for executing relational queries using Spark.  Designed to allow easy
 * access to the intermediate phases of query execution for developers.
+ *
+ * While this is not a public class, we should avoid changing the function names for the sake of
+ * changing them, because a lot of developers use the feature for debugging.
 */
 class QueryExecution(val sqlContext: SQLContext, val logical: LogicalPlan) {
-  val analyzer = sqlContext.analyzer
-  val optimizer = sqlContext.optimizer
-  val planner = sqlContext.planner
-  val cacheManager = sqlContext.cacheManager
-  val prepareForExecution = sqlContext.prepareForExecution

-  def assertAnalyzed(): Unit = analyzer.checkAnalysis(analyzed)
+  @VisibleForTesting
+  def assertAnalyzed(): Unit = sqlContext.analyzer.checkAnalysis(analyzed)
+
+  lazy val analyzed: LogicalPlan = sqlContext.analyzer.execute(logical)

-  lazy val analyzed: LogicalPlan = analyzer.execute(logical)
  lazy val withCachedData: LogicalPlan = {
    assertAnalyzed()
-    cacheManager.useCachedData(analyzed)
+    sqlContext.cacheManager.useCachedData(analyzed)
  }
-  lazy val optimizedPlan: LogicalPlan = optimizer.execute(withCachedData)
+
+  lazy val optimizedPlan: LogicalPlan = sqlContext.optimizer.execute(withCachedData)

  // TODO: Don't just pick the first one...
  lazy val sparkPlan: SparkPlan = {
    SparkPlan.currentContext.set(sqlContext)
-    planner.plan(optimizedPlan).next()
+    sqlContext.planner.plan(optimizedPlan).next()
  }
+
  // executedPlan should not be used to initialize any SparkPlan. It should be
  // only used for execution.
-  lazy val executedPlan: SparkPlan = prepareForExecution.execute(sparkPlan)
+  lazy val executedPlan: SparkPlan = sqlContext.prepareForExecution.execute(sparkPlan)

  /** Internal version of the RDD. Avoids copies and has no schema */
  lazy val toRdd: RDD[InternalRow] = executedPlan.execute()
@ -57,11 +61,11 @@ class QueryExecution(val sqlContext: SQLContext, val logical: LogicalPlan) {
  protected def stringOrError[A](f: => A): String =
    try f.toString catch { case e: Throwable => e.toString }

-  def simpleString: String =
+  def simpleString: String = {
    s"""== Physical Plan ==
       |${stringOrError(executedPlan)}
      """.stripMargin.trim
-
+  }

  override def toString: String = {
    def output =
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@ -19,36 +19,38 @@ package org.apache.spark.sql.util

 import java.util.concurrent.locks.ReentrantReadWriteLock
 import scala.collection.mutable.ListBuffer
+import scala.util.control.NonFatal

-import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.Logging
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.sql.execution.QueryExecution


 /**
+ * :: Experimental ::
 * The interface of query execution listener that can be used to analyze execution metrics.
 *
- * Note that implementations should guarantee thread-safety as they will be used in a non
- * thread-safe way.
+ * Note that implementations should guarantee thread-safety as they can be invoked by
+ * multiple different threads.
 */
@Experimental
 trait QueryExecutionListener {

  /**
   * A callback function that will be called when a query executed successfully.
-   * Implementations should guarantee thread-safe.
+   * Note that this can be invoked by multiple different threads.
   *
-   * @param funcName the name of the action that triggered this query.
+   * @param funcName name of the action that triggered this query.
   * @param qe the QueryExecution object that carries detail information like logical plan,
   *           physical plan, etc.
-   * @param duration the execution time for this query in nanoseconds.
+   * @param durationNs the execution time for this query in nanoseconds.
   */
  @DeveloperApi
-  def onSuccess(funcName: String, qe: QueryExecution, duration: Long)
+  def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit

  /**
   * A callback function that will be called when a query execution failed.
-   * Implementations should guarantee thread-safe.
+   * Note that this can be invoked by multiple different threads.
   *
   * @param funcName the name of the action that triggered this query.
   * @param qe the QueryExecution object that carries detail information like logical plan,
@ -56,14 +58,73 @@ trait QueryExecutionListener {
   * @param exception the exception that failed this query.
   */
  @DeveloperApi
-  def onFailure(funcName: String, qe: QueryExecution, exception: Exception)
+  def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit
 }

+
+/**
+ * :: Experimental ::
+ *
+ * Manager for [[QueryExecutionListener]]. See [[org.apache.spark.sql.SQLContext.listenerManager]].
+ */
@Experimental
-class ExecutionListenerManager extends Logging {
+class ExecutionListenerManager private[sql] () extends Logging {
+
+  /**
+   * Registers the specified [[QueryExecutionListener]].
+   */
+  @DeveloperApi
+  def register(listener: QueryExecutionListener): Unit = writeLock {
+    listeners += listener
+  }
+
+  /**
+   * Unregisters the specified [[QueryExecutionListener]].
+   */
+  @DeveloperApi
+  def unregister(listener: QueryExecutionListener): Unit = writeLock {
+    listeners -= listener
+  }
+
+  /**
+   * Removes all the registered [[QueryExecutionListener]].
+   */
+  @DeveloperApi
+  def clear(): Unit = writeLock {
+    listeners.clear()
+  }
+
+  private[sql] def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+    readLock {
+      withErrorHandling { listener =>
+        listener.onSuccess(funcName, qe, duration)
+      }
+    }
+  }
+
+  private[sql] def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
+    readLock {
+      withErrorHandling { listener =>
+        listener.onFailure(funcName, qe, exception)
+      }
+    }
+  }
+
  private[this] val listeners = ListBuffer.empty[QueryExecutionListener]
+
+  /** A lock to prevent updating the list of listeners while we are traversing through them. */
  private[this] val lock = new ReentrantReadWriteLock()

+  private def withErrorHandling(f: QueryExecutionListener => Unit): Unit = {
+    for (listener <- listeners) {
+      try {
+        f(listener)
+      } catch {
+        case NonFatal(e) => logWarning("Error executing query execution listener", e)
+      }
+    }
+  }
+
  /** Acquires a read lock on the cache for the duration of `f`. */
  private def readLock[A](f: => A): A = {
    val rl = lock.readLock()
@ -81,56 +142,4 @@ class ExecutionListenerManager extends Logging {
      wl.unlock()
    }
  }
-
-  /**
-   * Registers the specified QueryExecutionListener.
-   */
-  @DeveloperApi
-  def register(listener: QueryExecutionListener): Unit = writeLock {
-    listeners += listener
-  }
-
-  /**
-   * Unregisters the specified QueryExecutionListener.
-   */
-  @DeveloperApi
-  def unregister(listener: QueryExecutionListener): Unit = writeLock {
-    listeners -= listener
-  }
-
-  /**
-   * clears out all registered QueryExecutionListeners.
-   */
-  @DeveloperApi
-  def clear(): Unit = writeLock {
-    listeners.clear()
-  }
-
-  private[sql] def onSuccess(
-      funcName: String,
-      qe: QueryExecution,
-      duration: Long): Unit = readLock {
-    withErrorHandling { listener =>
-      listener.onSuccess(funcName, qe, duration)
-    }
-  }
-
-  private[sql] def onFailure(
-      funcName: String,
-      qe: QueryExecution,
-      exception: Exception): Unit = readLock {
-    withErrorHandling { listener =>
-      listener.onFailure(funcName, qe, exception)
-    }
-  }
-
-  private def withErrorHandling(f: QueryExecutionListener => Unit): Unit = {
-    for (listener <- listeners) {
-      try {
-        f(listener)
-      } catch {
-        case e: Exception => logWarning("error executing query execution listener", e)
-      }
-    }
-  }
 }