[SPARK-33690][SQL] Escape meta-characters in showString
### What changes were proposed in this pull request? This PR intends to escape meta-characters (e.g., \n and \t) in `Dataset.showString`. Before this PR: ``` scala> Seq("aaa\nbbb\t\tccccc").toDF("value").show() +--------------+ | value| +--------------+ |aaa bbb ccccc| +--------------+ ``` After this PR: ``` +-----------------+ | value| +-----------------+ |aaa\nbbb\t\tccccc| +-----------------+ ``` ### Why are the changes needed? For better output. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a unit test. Closes #30647 from maropu/EscapeMetaInShow. Authored-by: Takeshi Yamamuro <yamamuro@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
45af3c9688
commit
8197ee3b15
|
@ -26,6 +26,8 @@ license: |
|
|||
|
||||
- In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`.
|
||||
|
||||
- In Spark 3.2, the meta-characters `\n` and `\t` are escaped in the `show()` action. In Spark 3.1 or earlier, the two metacharacters are output as it is.
|
||||
|
||||
## Upgrading from Spark SQL 3.0 to 3.1
|
||||
|
||||
- In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
|
||||
|
|
|
@ -308,7 +308,9 @@ class Dataset[T] private[sql](
|
|||
val str = cell match {
|
||||
case null => "null"
|
||||
case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
|
||||
case _ => cell.toString
|
||||
case _ =>
|
||||
// Escapes meta-characters not to break the `showString` format
|
||||
cell.toString.replaceAll("\n", "\\\\n").replaceAll("\t", "\\\\t")
|
||||
}
|
||||
if (truncate > 0 && str.length > truncate) {
|
||||
// do not show ellipses for strings shorter than 4 characters.
|
||||
|
|
|
@ -1235,6 +1235,44 @@ class DataFrameSuite extends QueryTest
|
|||
assert(df.showString(10, vertical = true) === expectedAnswer)
|
||||
}
|
||||
|
||||
test("SPARK-33690: showString: escape meta-characters") {
|
||||
val df1 = Seq("aaa\nbbb\tccc").toDF("value")
|
||||
assert(df1.showString(1, truncate = 0) ===
|
||||
"""+-------------+
|
||||
||value |
|
||||
|+-------------+
|
||||
||aaa\nbbb\tccc|
|
||||
|+-------------+
|
||||
|""".stripMargin)
|
||||
|
||||
val df2 = Seq(Seq("aaa\nbbb\tccc")).toDF("value")
|
||||
assert(df2.showString(1, truncate = 0) ===
|
||||
"""+---------------+
|
||||
||value |
|
||||
|+---------------+
|
||||
||[aaa\nbbb\tccc]|
|
||||
|+---------------+
|
||||
|""".stripMargin)
|
||||
|
||||
val df3 = Seq(Map("aaa\nbbb\tccc" -> "aaa\nbbb\tccc")).toDF("value")
|
||||
assert(df3.showString(1, truncate = 0) ===
|
||||
"""+--------------------------------+
|
||||
||value |
|
||||
|+--------------------------------+
|
||||
||{aaa\nbbb\tccc -> aaa\nbbb\tccc}|
|
||||
|+--------------------------------+
|
||||
|""".stripMargin)
|
||||
|
||||
val df4 = Seq("aaa\nbbb\tccc").toDF("value").selectExpr("named_struct('v', value)")
|
||||
assert(df4.showString(1, truncate = 0) ===
|
||||
"""+----------------------+
|
||||
||named_struct(v, value)|
|
||||
|+----------------------+
|
||||
||{aaa\nbbb\tccc} |
|
||||
|+----------------------+
|
||||
|""".stripMargin)
|
||||
}
|
||||
|
||||
test("SPARK-7319 showString") {
|
||||
val expectedAnswer = """+---+-----+
|
||||
||key|value|
|
||||
|
|
|
@ -261,11 +261,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
|
|||
"PartitionFilters: \\[isnotnull\\(k#xL\\), dynamicpruningexpression\\(k#xL " +
|
||||
"IN subquery#x\\)\\]"
|
||||
val expected_pattern3 =
|
||||
"Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
|
||||
"/df2/.*, ... 99 entries\\]"
|
||||
"Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" +
|
||||
"/df2/\\S*, ... 99 entries\\]"
|
||||
val expected_pattern4 =
|
||||
"Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
|
||||
"/df1/.*, ... 999 entries\\]"
|
||||
"Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" +
|
||||
"/df1/\\S*, ... 999 entries\\]"
|
||||
withNormalizedExplain(sqlText) { normalizedOutput =>
|
||||
assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1)
|
||||
assert(expected_pattern2.r.findAllMatchIn(normalizedOutput).length == 1)
|
||||
|
|
Loading…
Reference in a new issue