diff --git a/docs/img/AllStagesPageDetail4.png b/docs/img/AllStagesPageDetail4.png index 7880f95034..2f038b3d61 100644 Binary files a/docs/img/AllStagesPageDetail4.png and b/docs/img/AllStagesPageDetail4.png differ diff --git a/docs/img/AllStagesPageDetail5.png b/docs/img/AllStagesPageDetail5.png index 0f90ffcb32..95d1f0e7f3 100644 Binary files a/docs/img/AllStagesPageDetail5.png and b/docs/img/AllStagesPageDetail5.png differ diff --git a/docs/img/JobPageDetail2.png b/docs/img/JobPageDetail2.png index ab6d7bdf15..5eb529eb7c 100644 Binary files a/docs/img/JobPageDetail2.png and b/docs/img/JobPageDetail2.png differ diff --git a/docs/web-ui.md b/docs/web-ui.md index 6f1afb2ed4..e28a689c8d 100644 --- a/docs/web-ui.md +++ b/docs/web-ui.md @@ -73,7 +73,8 @@ This page displays the details of a specific job identified by its job ID.

* DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD. - +* An example of DAG visualization for `sc.parallelize(1 to 100).toDF.count()` +

DAG

@@ -124,6 +125,8 @@ The stage detail page begins with information like total time across all tasks,

There is also a visual representation of the directed acyclic graph (DAG) of this stage, where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied. +Nodes are grouped by operation scope in the DAG visualization and labelled with the operation scope name (BatchScan, WholeStageCodegen, Exchange, etc). +Notably, Whole Stage Code Generation operations are also annotated with the code generation id. For stages belonging to Spark DataFrame or SQL execution, this allows to cross-reference Stage execution details to the relevant details in the Web-UI SQL Tab page where SQL plan graphs and execution plans are reported.

Stage DAG diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala index ac66a71fe7..5b72ec058e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala @@ -62,19 +62,13 @@ private[execution] object SparkPlanInfo { new SQLMetricInfo(metric.name.getOrElse(key), metric.id, metric.metricType) } - val nodeName = plan match { - case physicalOperator: WholeStageCodegenExec => - s"${plan.nodeName} (${physicalOperator.codegenStageId})" - case _ => plan.nodeName - } - // dump the file scan metadata (e.g file path) to event log val metadata = plan match { case fileScan: FileSourceScanExec => fileScan.metadata case _ => Map[String, String]() } new SparkPlanInfo( - nodeName, + plan.nodeName, plan.simpleString(SQLConf.get.maxToStringFields), children.map(fromSparkPlan), metadata, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index f723fcfac6..10fe0f2523 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -54,6 +54,7 @@ trait CodegenSupport extends SparkPlan { case _: RDDScanExec => "rdd" case _: DataSourceScanExec => "scan" case _: InMemoryTableScanExec => "memoryScan" + case _: WholeStageCodegenExec => "wholestagecodegen" case _ => nodeName.toLowerCase(Locale.ROOT) } @@ -613,6 +614,8 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int) "pipelineTime" -> SQLMetrics.createTimingMetric(sparkContext, WholeStageCodegenExec.PIPELINE_DURATION_METRIC)) + override def nodeName: String = s"WholeStageCodegen (${codegenStageId})" + def generatedClassName(): String = if (conf.wholeStageUseIdInClassName) { s"GeneratedIteratorForCodegenStage$codegenStageId" } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala index 3c187a2ed0..4cb845b248 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala @@ -86,7 +86,7 @@ class DebuggingSuite extends SharedSparkSession { """== BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false])), [id=#x] == |Tuples output: 0 | id LongType: {} - |== WholeStageCodegen == + |== WholeStageCodegen (1) == |Tuples output: 10 | id LongType: {java.lang.Long} |== Range (0, 10, step=1, splits=2) ==