[SPARK-8434][SQL]Add a "pretty" parameter to the "show" method to display long strings

Sometimes the user may want to show the complete content of cells. Now `sql("set -v").show()` displays:

![screen shot 2015-06-18 at 4 34 51 pm](https://cloud.githubusercontent.com/assets/1000778/8227339/14d3c5ea-15d9-11e5-99b9-f00b7e93beef.png)

The user needs to use something like `sql("set -v").collect().foreach(r => r.toSeq.mkString("\t"))` to show the complete content.

This PR adds a `pretty` parameter to show. If `pretty` is false, `show` won't truncate strings or align cells right.

![screen shot 2015-06-18 at 4 21 44 pm](https://cloud.githubusercontent.com/assets/1000778/8227407/b6f8dcac-15d9-11e5-8219-8079280d76fc.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #6877 from zsxwing/show and squashes the following commits:

22e28e9 [zsxwing] pretty -> truncate
e582628 [zsxwing] Add pretty parameter to the show method in R
a3cd55b [zsxwing] Fix calling showString in R
923cee4 [zsxwing] Add a "pretty" parameter to show to display long strings
This commit is contained in:
zsxwing 2015-06-29 23:44:11 -07:00 committed by Reynold Xin
parent 6c5a6db4d5
commit 12671dd5e4
4 changed files with 76 additions and 11 deletions

View file

@ -169,8 +169,8 @@ setMethod("isLocal",
#'}
setMethod("showDF",
signature(x = "DataFrame"),
function(x, numRows = 20) {
s <- callJMethod(x@sdf, "showString", numToInt(numRows))
function(x, numRows = 20, truncate = TRUE) {
s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
cat(s)
})

View file

@ -247,9 +247,12 @@ class DataFrame(object):
return self._jdf.isLocal()
@since(1.3)
def show(self, n=20):
def show(self, n=20, truncate=True):
"""Prints the first ``n`` rows to the console.
:param n: Number of rows to show.
:param truncate: Whether truncate long strings and align cells right.
>>> df
DataFrame[age: int, name: string]
>>> df.show()
@ -260,7 +263,7 @@ class DataFrame(object):
| 5| Bob|
+---+-----+
"""
print(self._jdf.showString(n))
print(self._jdf.showString(n, truncate))
def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))

View file

@ -169,8 +169,9 @@ class DataFrame private[sql](
/**
* Internal API for Python
* @param _numRows Number of rows to show
* @param truncate Whether truncate long strings and align cells right
*/
private[sql] def showString(_numRows: Int): String = {
private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
val numRows = _numRows.max(0)
val sb = new StringBuilder
val takeResult = take(numRows + 1)
@ -188,7 +189,7 @@ class DataFrame private[sql](
case seq: Seq[_] => seq.mkString("[", ", ", "]")
case _ => cell.toString
}
if (str.length > 20) str.substring(0, 17) + "..." else str
if (truncate && str.length > 20) str.substring(0, 17) + "..." else str
}: Seq[String]
}
@ -207,7 +208,11 @@ class DataFrame private[sql](
// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
sb.append(sep)
@ -215,7 +220,11 @@ class DataFrame private[sql](
// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
if (truncate) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}
@ -331,7 +340,8 @@ class DataFrame private[sql](
def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
/**
* Displays the [[DataFrame]] in a tabular form. For example:
* Displays the [[DataFrame]] in a tabular form. Strings more than 20 characters will be
* truncated, and all cells will be aligned right. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
* 1980 12 0.503218 0.595103
@ -345,15 +355,46 @@ class DataFrame private[sql](
* @group action
* @since 1.3.0
*/
def show(numRows: Int): Unit = println(showString(numRows))
def show(numRows: Int): Unit = show(numRows, true)
/**
* Displays the top 20 rows of [[DataFrame]] in a tabular form.
* Displays the top 20 rows of [[DataFrame]] in a tabular form. Strings more than 20 characters
* will be truncated, and all cells will be aligned right.
* @group action
* @since 1.3.0
*/
def show(): Unit = show(20)
/**
* Displays the top 20 rows of [[DataFrame]] in a tabular form.
*
* @param truncate Whether truncate long strings. If true, strings more than 20 characters will
* be truncated and all cells will be aligned right
*
* @group action
* @since 1.5.0
*/
def show(truncate: Boolean): Unit = show(20, truncate)
/**
* Displays the [[DataFrame]] in a tabular form. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
* 1980 12 0.503218 0.595103
* 1981 01 0.523289 0.570307
* 1982 02 0.436504 0.475256
* 1983 03 0.410516 0.442194
* 1984 04 0.450090 0.483521
* }}}
* @param numRows Number of rows to show
* @param truncate Whether truncate long strings. If true, strings more than 20 characters will
* be truncated and all cells will be aligned right
*
* @group action
* @since 1.5.0
*/
def show(numRows: Int, truncate: Boolean): Unit = println(showString(numRows, truncate))
/**
* Returns a [[DataFrameNaFunctions]] for working with missing data.
* {{{

View file

@ -492,6 +492,27 @@ class DataFrameSuite extends QueryTest {
testData.select($"*").show(1000)
}
test("showString: truncate = [true, false]") {
val longString = Array.fill(21)("1").mkString
val df = ctx.sparkContext.parallelize(Seq("1", longString)).toDF()
val expectedAnswerForFalse = """+---------------------+
||_1 |
|+---------------------+
||1 |
||111111111111111111111|
|+---------------------+
|""".stripMargin
assert(df.showString(10, false) === expectedAnswerForFalse)
val expectedAnswerForTrue = """+--------------------+
|| _1|
|+--------------------+
|| 1|
||11111111111111111...|
|+--------------------+
|""".stripMargin
assert(df.showString(10, true) === expectedAnswerForTrue)
}
test("showString(negative)") {
val expectedAnswer = """+---+-----+
||key|value|