diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 164bfd42d6..484823b7c0 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -26,6 +26,8 @@ license: | - In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`. + - In Spark 3.2, the meta-characters `\n` and `\t` are escaped in the `show()` action. In Spark 3.1 or earlier, the two metacharacters are output as it is. + ## Upgrading from Spark SQL 3.0 to 3.1 - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 6afbbce3ff..5c27359136 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -308,7 +308,9 @@ class Dataset[T] private[sql]( val str = cell match { case null => "null" case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]") - case _ => cell.toString + case _ => + // Escapes meta-characters not to break the `showString` format + cell.toString.replaceAll("\n", "\\\\n").replaceAll("\t", "\\\\t") } if (truncate > 0 && str.length > truncate) { // do not show ellipses for strings shorter than 4 characters. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 4fecd62503..d777cd45b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1235,6 +1235,44 @@ class DataFrameSuite extends QueryTest assert(df.showString(10, vertical = true) === expectedAnswer) } + test("SPARK-33690: showString: escape meta-characters") { + val df1 = Seq("aaa\nbbb\tccc").toDF("value") + assert(df1.showString(1, truncate = 0) === + """+-------------+ + ||value | + |+-------------+ + ||aaa\nbbb\tccc| + |+-------------+ + |""".stripMargin) + + val df2 = Seq(Seq("aaa\nbbb\tccc")).toDF("value") + assert(df2.showString(1, truncate = 0) === + """+---------------+ + ||value | + |+---------------+ + ||[aaa\nbbb\tccc]| + |+---------------+ + |""".stripMargin) + + val df3 = Seq(Map("aaa\nbbb\tccc" -> "aaa\nbbb\tccc")).toDF("value") + assert(df3.showString(1, truncate = 0) === + """+--------------------------------+ + ||value | + |+--------------------------------+ + ||{aaa\nbbb\tccc -> aaa\nbbb\tccc}| + |+--------------------------------+ + |""".stripMargin) + + val df4 = Seq("aaa\nbbb\tccc").toDF("value").selectExpr("named_struct('v', value)") + assert(df4.showString(1, truncate = 0) === + """+----------------------+ + ||named_struct(v, value)| + |+----------------------+ + ||{aaa\nbbb\tccc} | + |+----------------------+ + |""".stripMargin) + } + test("SPARK-7319 showString") { val expectedAnswer = """+---+-----+ ||key|value| diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index ddc4f1dab8..7d3285da25 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -261,11 +261,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite "PartitionFilters: \\[isnotnull\\(k#xL\\), dynamicpruningexpression\\(k#xL " + "IN subquery#x\\)\\]" val expected_pattern3 = - "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" + - "/df2/.*, ... 99 entries\\]" + "Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" + + "/df2/\\S*, ... 99 entries\\]" val expected_pattern4 = - "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" + - "/df1/.*, ... 999 entries\\]" + "Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" + + "/df1/\\S*, ... 999 entries\\]" withNormalizedExplain(sqlText) { normalizedOutput => assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1) assert(expected_pattern2.r.findAllMatchIn(normalizedOutput).length == 1)