[SPARK-31534][WEBUI] Text for tooltip should be escaped

### What changes were proposed in this pull request?

This PR escapes text for tooltip for DAG Viz and Timeline View.

### Why are the changes needed?

This is a bug.
Normally, DAG Viz and Timeline View show tooltip like as follows.

<img width="278" alt="dag-viz-tooltip" src="https://user-images.githubusercontent.com/4736016/80127481-5a6c6880-85cf-11ea-8daf-cfd59aa3ba09.png">
<img width="477" alt="timeline-tooltip" src="https://user-images.githubusercontent.com/4736016/80127500-60624980-85cf-11ea-9b0f-cce301019e3a.png">

They contain a callsite properly.
However, if a callsite contains characters which should be escaped for HTML without escaping , the corresponding tooltips wouldn't show the callsite and its following text properly.
<img width="179" alt="dag-viz-tooltip-before-fixed" src="https://user-images.githubusercontent.com/4736016/80128480-b1267200-85d0-11ea-8035-ad68ae5fbcab.png">
<img width="261" alt="timeline-tooltip-before-fixed" src="https://user-images.githubusercontent.com/4736016/80128492-b5eb2600-85d0-11ea-9556-c48490110244.png">

The reason of this issue is that the source texts of the tooltip texts are not escaped.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

I tested manually.
First, I ran a job `sc.parallelize(1 to 10).collect` in Spark Shell then, visited AllJobsPage and JobPage and confirmed tooltip texts.
<img width="196" alt="dag-viz-tooltip-fixed" src="https://user-images.githubusercontent.com/4736016/80128813-2db95080-85d1-11ea-82f8-90a1f4547f30.png">
<img width="363" alt="timeline-tooltip-fixed" src="https://user-images.githubusercontent.com/4736016/80128824-31e56e00-85d1-11ea-9818-492b72b1c56e.png">

I also added a testcase.

Closes #28317 from sarutak/fix-tooltip.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
This commit is contained in:
Kousuke Saruta 2020-04-27 05:14:46 +09:00
parent e01125db0d
commit d61c6219cd
5 changed files with 51 additions and 30 deletions

View file

@ -173,8 +173,8 @@ function renderDagViz(forJob) {
});
metadataContainer().selectAll(".barrier-rdd").each(function() {
var rddId = d3.select(this).text().trim()
var clusterId = VizConstants.clusterPrefix + rddId
var rddId = d3.select(this).text().trim();
var clusterId = VizConstants.clusterPrefix + rddId;
svg.selectAll("g." + clusterId).classed("barrier", true)
});
@ -282,11 +282,7 @@ function renderDagVizForJob(svgContainer) {
/* Render the dot file as an SVG in the given container. */
function renderDot(dot, container, forJob) {
var escaped_dot = dot
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, "\"");
var g = graphlibDot.read(escaped_dot);
var g = graphlibDot.read(dot);
var renderer = new dagreD3.render();
preprocessGraphLayout(g, forJob);
renderer(container, g);
@ -498,18 +494,11 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
edgesContainer.append("path").datum(points).attr("d", line);
}
/*
* Replace `/n` with `<br/>`
*/
function replaceLineBreak(str) {
return str.replace("\\n", "<br/>");
}
/* (Job page only) Helper function to add tooltips for RDDs. */
function addTooltipsForRDDs(svgContainer) {
svgContainer.selectAll("g.node").each(function() {
var node = d3.select(this);
var tooltipText = replaceLineBreak(node.attr("name"));
var tooltipText = node.attr("name");
if (tooltipText) {
node.select("circle")
.attr("data-toggle", "tooltip")

View file

@ -88,7 +88,8 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
// The timeline library treats contents as HTML, so we have to escape them. We need to add
// extra layers of escaping in order to embed this in a Javascript string literal.
val escapedDesc = Utility.escape(jobDescription)
val jsEscapedDesc = StringEscapeUtils.escapeEcmaScript(escapedDesc)
val jsEscapedDescForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedDesc))
val jsEscapedDescForLabel = StringEscapeUtils.escapeEcmaScript(escapedDesc)
val jobEventJsonAsStr =
s"""
|{
@ -98,7 +99,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
| 'end': new Date(${completionTime}),
| 'content': '<div class="application-timeline-content"' +
| 'data-html="true" data-placement="top" data-toggle="tooltip"' +
| 'data-title="${jsEscapedDesc} (Job ${jobId})<br>' +
| 'data-title="${jsEscapedDescForTooltip} (Job ${jobId})<br>' +
| 'Status: ${status}<br>' +
| 'Submitted: ${UIUtils.formatDate(new Date(submissionTime))}' +
| '${
@ -108,7 +109,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
""
}
}">' +
| '${jsEscapedDesc} (Job ${jobId})</div>'
| '${jsEscapedDescForLabel} (Job ${jobId})</div>'
|}
""".stripMargin
jobEventJsonAsStr

View file

@ -69,7 +69,8 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
// The timeline library treats contents as HTML, so we have to escape them. We need to add
// extra layers of escaping in order to embed this in a Javascript string literal.
val escapedName = Utility.escape(name)
val jsEscapedName = StringEscapeUtils.escapeEcmaScript(escapedName)
val jsEscapedNameForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedName))
val jsEscapedNameForLabel = StringEscapeUtils.escapeEcmaScript(escapedName)
s"""
|{
| 'className': 'stage job-timeline-object ${status}',
@ -78,7 +79,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
| 'end': new Date(${completionTime}),
| 'content': '<div class="job-timeline-content" data-toggle="tooltip"' +
| 'data-placement="top" data-html="true"' +
| 'data-title="${jsEscapedName} (Stage ${stageId}.${attemptId})<br>' +
| 'data-title="${jsEscapedNameForTooltip} (Stage ${stageId}.${attemptId})<br>' +
| 'Status: ${status.toUpperCase(Locale.ROOT)}<br>' +
| 'Submitted: ${UIUtils.formatDate(submissionTime)}' +
| '${
@ -88,7 +89,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
""
}
}">' +
| '${jsEscapedName} (Stage ${stageId}.${attemptId})</div>',
| '${jsEscapedNameForLabel} (Stage ${stageId}.${attemptId})</div>',
|}
""".stripMargin
}

View file

@ -21,6 +21,7 @@ import java.util.Objects
import scala.collection.mutable
import scala.collection.mutable.{ListBuffer, StringBuilder}
import scala.xml.Utility
import org.apache.commons.text.StringEscapeUtils
@ -245,8 +246,9 @@ private[spark] object RDDOperationGraph extends Logging {
} else {
""
}
val label = s"${node.name} [${node.id}]$isCached$isBarrier\n${node.callsite}"
s"""${node.id} [label="${StringEscapeUtils.escapeJava(label)}"]"""
val escapedCallsite = Utility.escape(node.callsite)
val label = s"${node.name} [${node.id}]$isCached$isBarrier<br>${escapedCallsite}"
s"""${node.id} [labelType="html" label="${StringEscapeUtils.escapeJava(label)}"]"""
}
/** Update the dot representation of the RDDOperationGraph in cluster to subgraph. */

View file

@ -44,6 +44,7 @@ import org.apache.spark.internal.config.Status._
import org.apache.spark.internal.config.UI._
import org.apache.spark.shuffle.FetchFailedException
import org.apache.spark.status.api.v1.{JacksonMessageWriter, RDDDataDistribution, StageStatus}
import org.apache.spark.util.CallSite
private[spark] class SparkUICssErrorHandler extends DefaultCssErrorHandler {
@ -687,29 +688,29 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
assert(stage0.contains("digraph G {\n subgraph clusterstage_0 {\n " +
"label=&quot;Stage 0&quot;;\n subgraph "))
assert(stage0.contains("{\n label=&quot;parallelize&quot;;\n " +
"0 [label=&quot;ParallelCollectionRDD [0]"))
"0 [labelType=&quot;html&quot; label=&quot;ParallelCollectionRDD [0]"))
assert(stage0.contains("{\n label=&quot;map&quot;;\n " +
"1 [label=&quot;MapPartitionsRDD [1]"))
"1 [labelType=&quot;html&quot; label=&quot;MapPartitionsRDD [1]"))
assert(stage0.contains("{\n label=&quot;groupBy&quot;;\n " +
"2 [label=&quot;MapPartitionsRDD [2]"))
"2 [labelType=&quot;html&quot; label=&quot;MapPartitionsRDD [2]"))
val stage1 = Source.fromURL(sc.ui.get.webUrl +
"/stages/stage/?id=1&attempt=0&expandDagViz=true").mkString
assert(stage1.contains("digraph G {\n subgraph clusterstage_1 {\n " +
"label=&quot;Stage 1&quot;;\n subgraph "))
assert(stage1.contains("{\n label=&quot;groupBy&quot;;\n " +
"3 [label=&quot;ShuffledRDD [3]"))
"3 [labelType=&quot;html&quot; label=&quot;ShuffledRDD [3]"))
assert(stage1.contains("{\n label=&quot;map&quot;;\n " +
"4 [label=&quot;MapPartitionsRDD [4]"))
"4 [labelType=&quot;html&quot; label=&quot;MapPartitionsRDD [4]"))
assert(stage1.contains("{\n label=&quot;groupBy&quot;;\n " +
"5 [label=&quot;MapPartitionsRDD [5]"))
"5 [labelType=&quot;html&quot; label=&quot;MapPartitionsRDD [5]"))
val stage2 = Source.fromURL(sc.ui.get.webUrl +
"/stages/stage/?id=2&attempt=0&expandDagViz=true").mkString
assert(stage2.contains("digraph G {\n subgraph clusterstage_2 {\n " +
"label=&quot;Stage 2&quot;;\n subgraph "))
assert(stage2.contains("{\n label=&quot;groupBy&quot;;\n " +
"6 [label=&quot;ShuffledRDD [6]"))
"6 [labelType=&quot;html&quot; label=&quot;ShuffledRDD [6]"))
}
}
}
@ -772,6 +773,33 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
}
}
test("SPARK-31534: text for tooltip should be escaped") {
withSpark(newSparkContext()) { sc =>
sc.setLocalProperty(CallSite.LONG_FORM, "collect at <console>:25")
sc.setLocalProperty(CallSite.SHORT_FORM, "collect at <console>:25")
sc.parallelize(1 to 10).collect
val driver = webDriver.asInstanceOf[HtmlUnitDriver]
driver.setJavascriptEnabled(true)
eventually(timeout(10.seconds), interval(50.milliseconds)) {
goToUi(sc, "/jobs")
val jobDesc =
driver.findElement(By.cssSelector("div[class='application-timeline-content']"))
jobDesc.getAttribute("data-title") should include ("collect at &lt;console&gt;:25")
goToUi(sc, "/jobs/job/?id=0")
val stageDesc = driver.findElement(By.cssSelector("div[class='job-timeline-content']"))
stageDesc.getAttribute("data-title") should include ("collect at &lt;console&gt;:25")
// Open DAG Viz.
driver.findElement(By.id("job-dag-viz")).click()
val nodeDesc = driver.findElement(By.cssSelector("g[class='node_0 node']"))
nodeDesc.getAttribute("name") should include ("collect at &lt;console&gt;:25")
}
}
}
def goToUi(sc: SparkContext, path: String): Unit = {
goToUi(sc.ui.get, path)
}