[SPARK-6007][SQL] Add numRows param in DataFrame.show()

It is useful to let the user decide the number of rows to show in DataFrame.show

Author: Jacky Li <jacky.likun@huawei.com>

Closes #4767 from jackylk/show and squashes the following commits:

a0e0f4b [Jacky Li] fix testcase
7cdbe91 [Jacky Li] modify according to comment
bb54537 [Jacky Li] for Java compatibility
d7acc18 [Jacky Li] modify according to comments
981be52 [Jacky Li] add numRows param in DataFrame.show()
This commit is contained in:
Jacky Li 2015-02-26 10:40:58 -08:00 committed by Reynold Xin
parent df3d559b32
commit 2358657547
4 changed files with 27 additions and 6 deletions

View file

@ -272,9 +272,9 @@ class DataFrame(object):
"""
return self._jdf.isLocal()
def show(self):
def show(self, n=20):
"""
Print the first 20 rows.
Print the first n rows.
>>> df
DataFrame[age: int, name: string]
@ -283,7 +283,7 @@ class DataFrame(object):
2 Alice
5 Bob
"""
print self._jdf.showString().encode('utf8', 'ignore')
print self._jdf.showString(n).encode('utf8', 'ignore')
def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))

View file

@ -159,9 +159,10 @@ class DataFrame protected[sql](
/**
* Internal API for Python
* @param numRows Number of rows to show
*/
private[sql] def showString(): String = {
val data = take(20)
private[sql] def showString(numRows: Int): String = {
val data = take(numRows)
val numCols = schema.fieldNames.length
// For cells that are beyond 20 characters, replace it with the first 17 and "..."
@ -293,9 +294,15 @@ class DataFrame protected[sql](
* 1983 03 0.410516 0.442194
* 1984 04 0.450090 0.483521
* }}}
* @param numRows Number of rows to show
* @group basic
*/
def show(): Unit = println(showString())
def show(numRows: Int): Unit = println(showString(numRows))
/**
* Displays the top 20 rows of [[DataFrame]] in a tabular form.
*/
def show(): Unit = show(20)
/**
* Cartesian join with another [[DataFrame]].

View file

@ -20,6 +20,7 @@ package test.org.apache.spark.sql;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.apache.spark.sql.*;
@ -81,4 +82,12 @@ public class JavaDataFrameSuite {
df.groupBy().agg(countDistinct(col("key"), col("value")));
df.select(coalesce(col("key")));
}
@Ignore
public void testShow() {
// This test case is intended ignored, but to make sure it compiles correctly
DataFrame df = context.table("testData");
df.show();
df.show(1000);
}
}

View file

@ -441,4 +441,9 @@ class DataFrameSuite extends QueryTest {
checkAnswer(df.select(df("key")), testData.select('key).collect().toSeq)
}
ignore("show") {
// This test case is intended ignored, but to make sure it compiles correctly
testData.select($"*").show()
testData.select($"*").show(1000)
}
}