From d61c6219cd692c3e6c70cfcc0467d875201d4268 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 27 Apr 2020 05:14:46 +0900 Subject: [PATCH] [SPARK-31534][WEBUI] Text for tooltip should be escaped ### What changes were proposed in this pull request? This PR escapes text for tooltip for DAG Viz and Timeline View. ### Why are the changes needed? This is a bug. Normally, DAG Viz and Timeline View show tooltip like as follows. dag-viz-tooltip timeline-tooltip They contain a callsite properly. However, if a callsite contains characters which should be escaped for HTML without escaping , the corresponding tooltips wouldn't show the callsite and its following text properly. dag-viz-tooltip-before-fixed timeline-tooltip-before-fixed The reason of this issue is that the source texts of the tooltip texts are not escaped. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? I tested manually. First, I ran a job `sc.parallelize(1 to 10).collect` in Spark Shell then, visited AllJobsPage and JobPage and confirmed tooltip texts. dag-viz-tooltip-fixed timeline-tooltip-fixed I also added a testcase. Closes #28317 from sarutak/fix-tooltip. Authored-by: Kousuke Saruta Signed-off-by: Kousuke Saruta --- .../apache/spark/ui/static/spark-dag-viz.js | 19 ++------- .../apache/spark/ui/jobs/AllJobsPage.scala | 7 ++-- .../org/apache/spark/ui/jobs/JobPage.scala | 7 ++-- .../spark/ui/scope/RDDOperationGraph.scala | 6 ++- .../org/apache/spark/ui/UISeleniumSuite.scala | 42 +++++++++++++++---- 5 files changed, 51 insertions(+), 30 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js index 25dec9d378..ae02defd9b 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js +++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js @@ -173,8 +173,8 @@ function renderDagViz(forJob) { }); metadataContainer().selectAll(".barrier-rdd").each(function() { - var rddId = d3.select(this).text().trim() - var clusterId = VizConstants.clusterPrefix + rddId + var rddId = d3.select(this).text().trim(); + var clusterId = VizConstants.clusterPrefix + rddId; svg.selectAll("g." + clusterId).classed("barrier", true) }); @@ -282,11 +282,7 @@ function renderDagVizForJob(svgContainer) { /* Render the dot file as an SVG in the given container. */ function renderDot(dot, container, forJob) { - var escaped_dot = dot - .replace(/</g, "<") - .replace(/>/g, ">") - .replace(/"/g, "\""); - var g = graphlibDot.read(escaped_dot); + var g = graphlibDot.read(dot); var renderer = new dagreD3.render(); preprocessGraphLayout(g, forJob); renderer(container, g); @@ -498,18 +494,11 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) { edgesContainer.append("path").datum(points).attr("d", line); } -/* - * Replace `/n` with `
` - */ -function replaceLineBreak(str) { - return str.replace("\\n", "
"); -} - /* (Job page only) Helper function to add tooltips for RDDs. */ function addTooltipsForRDDs(svgContainer) { svgContainer.selectAll("g.node").each(function() { var node = d3.select(this); - var tooltipText = replaceLineBreak(node.attr("name")); + var tooltipText = node.attr("name"); if (tooltipText) { node.select("circle") .attr("data-toggle", "tooltip") diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index 68c8d8260f..0b362201a7 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -88,7 +88,8 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We // The timeline library treats contents as HTML, so we have to escape them. We need to add // extra layers of escaping in order to embed this in a Javascript string literal. val escapedDesc = Utility.escape(jobDescription) - val jsEscapedDesc = StringEscapeUtils.escapeEcmaScript(escapedDesc) + val jsEscapedDescForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedDesc)) + val jsEscapedDescForLabel = StringEscapeUtils.escapeEcmaScript(escapedDesc) val jobEventJsonAsStr = s""" |{ @@ -98,7 +99,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We | 'end': new Date(${completionTime}), | 'content': '
' + | 'Status: ${status}
' + | 'Submitted: ${UIUtils.formatDate(new Date(submissionTime))}' + | '${ @@ -108,7 +109,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We "" } }">' + - | '${jsEscapedDesc} (Job ${jobId})
' + | '${jsEscapedDescForLabel} (Job ${jobId})' |} """.stripMargin jobEventJsonAsStr diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala index eacc6ce527..9be7124adc 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala @@ -69,7 +69,8 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP // The timeline library treats contents as HTML, so we have to escape them. We need to add // extra layers of escaping in order to embed this in a Javascript string literal. val escapedName = Utility.escape(name) - val jsEscapedName = StringEscapeUtils.escapeEcmaScript(escapedName) + val jsEscapedNameForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedName)) + val jsEscapedNameForLabel = StringEscapeUtils.escapeEcmaScript(escapedName) s""" |{ | 'className': 'stage job-timeline-object ${status}', @@ -78,7 +79,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP | 'end': new Date(${completionTime}), | 'content': '
' + | 'Status: ${status.toUpperCase(Locale.ROOT)}
' + | 'Submitted: ${UIUtils.formatDate(submissionTime)}' + | '${ @@ -88,7 +89,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP "" } }">' + - | '${jsEscapedName} (Stage ${stageId}.${attemptId})
', + | '${jsEscapedNameForLabel} (Stage ${stageId}.${attemptId})', |} """.stripMargin } diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala index 9ace324322..842ee7aaf4 100644 --- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala +++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala @@ -21,6 +21,7 @@ import java.util.Objects import scala.collection.mutable import scala.collection.mutable.{ListBuffer, StringBuilder} +import scala.xml.Utility import org.apache.commons.text.StringEscapeUtils @@ -245,8 +246,9 @@ private[spark] object RDDOperationGraph extends Logging { } else { "" } - val label = s"${node.name} [${node.id}]$isCached$isBarrier\n${node.callsite}" - s"""${node.id} [label="${StringEscapeUtils.escapeJava(label)}"]""" + val escapedCallsite = Utility.escape(node.callsite) + val label = s"${node.name} [${node.id}]$isCached$isBarrier
${escapedCallsite}" + s"""${node.id} [labelType="html" label="${StringEscapeUtils.escapeJava(label)}"]""" } /** Update the dot representation of the RDDOperationGraph in cluster to subgraph. */ diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index 757e03b2b5..3ec9385116 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -44,6 +44,7 @@ import org.apache.spark.internal.config.Status._ import org.apache.spark.internal.config.UI._ import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.status.api.v1.{JacksonMessageWriter, RDDDataDistribution, StageStatus} +import org.apache.spark.util.CallSite private[spark] class SparkUICssErrorHandler extends DefaultCssErrorHandler { @@ -687,29 +688,29 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B assert(stage0.contains("digraph G {\n subgraph clusterstage_0 {\n " + "label="Stage 0";\n subgraph ")) assert(stage0.contains("{\n label="parallelize";\n " + - "0 [label="ParallelCollectionRDD [0]")) + "0 [labelType="html" label="ParallelCollectionRDD [0]")) assert(stage0.contains("{\n label="map";\n " + - "1 [label="MapPartitionsRDD [1]")) + "1 [labelType="html" label="MapPartitionsRDD [1]")) assert(stage0.contains("{\n label="groupBy";\n " + - "2 [label="MapPartitionsRDD [2]")) + "2 [labelType="html" label="MapPartitionsRDD [2]")) val stage1 = Source.fromURL(sc.ui.get.webUrl + "/stages/stage/?id=1&attempt=0&expandDagViz=true").mkString assert(stage1.contains("digraph G {\n subgraph clusterstage_1 {\n " + "label="Stage 1";\n subgraph ")) assert(stage1.contains("{\n label="groupBy";\n " + - "3 [label="ShuffledRDD [3]")) + "3 [labelType="html" label="ShuffledRDD [3]")) assert(stage1.contains("{\n label="map";\n " + - "4 [label="MapPartitionsRDD [4]")) + "4 [labelType="html" label="MapPartitionsRDD [4]")) assert(stage1.contains("{\n label="groupBy";\n " + - "5 [label="MapPartitionsRDD [5]")) + "5 [labelType="html" label="MapPartitionsRDD [5]")) val stage2 = Source.fromURL(sc.ui.get.webUrl + "/stages/stage/?id=2&attempt=0&expandDagViz=true").mkString assert(stage2.contains("digraph G {\n subgraph clusterstage_2 {\n " + "label="Stage 2";\n subgraph ")) assert(stage2.contains("{\n label="groupBy";\n " + - "6 [label="ShuffledRDD [6]")) + "6 [labelType="html" label="ShuffledRDD [6]")) } } } @@ -772,6 +773,33 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B } } + test("SPARK-31534: text for tooltip should be escaped") { + withSpark(newSparkContext()) { sc => + sc.setLocalProperty(CallSite.LONG_FORM, "collect at :25") + sc.setLocalProperty(CallSite.SHORT_FORM, "collect at :25") + sc.parallelize(1 to 10).collect + + val driver = webDriver.asInstanceOf[HtmlUnitDriver] + driver.setJavascriptEnabled(true) + + eventually(timeout(10.seconds), interval(50.milliseconds)) { + goToUi(sc, "/jobs") + val jobDesc = + driver.findElement(By.cssSelector("div[class='application-timeline-content']")) + jobDesc.getAttribute("data-title") should include ("collect at <console>:25") + + goToUi(sc, "/jobs/job/?id=0") + val stageDesc = driver.findElement(By.cssSelector("div[class='job-timeline-content']")) + stageDesc.getAttribute("data-title") should include ("collect at <console>:25") + + // Open DAG Viz. + driver.findElement(By.id("job-dag-viz")).click() + val nodeDesc = driver.findElement(By.cssSelector("g[class='node_0 node']")) + nodeDesc.getAttribute("name") should include ("collect at <console>:25") + } + } + } + def goToUi(sc: SparkContext, path: String): Unit = { goToUi(sc.ui.get, path) }