[SPARK-6007][SQL] Add numRows param in DataFrame.show()
It is useful to let the user decide the number of rows to show in DataFrame.show Author: Jacky Li <jacky.likun@huawei.com> Closes #4767 from jackylk/show and squashes the following commits: a0e0f4b [Jacky Li] fix testcase 7cdbe91 [Jacky Li] modify according to comment bb54537 [Jacky Li] for Java compatibility d7acc18 [Jacky Li] modify according to comments 981be52 [Jacky Li] add numRows param in DataFrame.show()
This commit is contained in:
parent
df3d559b32
commit
2358657547
|
@ -272,9 +272,9 @@ class DataFrame(object):
|
|||
"""
|
||||
return self._jdf.isLocal()
|
||||
|
||||
def show(self):
|
||||
def show(self, n=20):
|
||||
"""
|
||||
Print the first 20 rows.
|
||||
Print the first n rows.
|
||||
|
||||
>>> df
|
||||
DataFrame[age: int, name: string]
|
||||
|
@ -283,7 +283,7 @@ class DataFrame(object):
|
|||
2 Alice
|
||||
5 Bob
|
||||
"""
|
||||
print self._jdf.showString().encode('utf8', 'ignore')
|
||||
print self._jdf.showString(n).encode('utf8', 'ignore')
|
||||
|
||||
def __repr__(self):
|
||||
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
|
||||
|
|
|
@ -159,9 +159,10 @@ class DataFrame protected[sql](
|
|||
|
||||
/**
|
||||
* Internal API for Python
|
||||
* @param numRows Number of rows to show
|
||||
*/
|
||||
private[sql] def showString(): String = {
|
||||
val data = take(20)
|
||||
private[sql] def showString(numRows: Int): String = {
|
||||
val data = take(numRows)
|
||||
val numCols = schema.fieldNames.length
|
||||
|
||||
// For cells that are beyond 20 characters, replace it with the first 17 and "..."
|
||||
|
@ -293,9 +294,15 @@ class DataFrame protected[sql](
|
|||
* 1983 03 0.410516 0.442194
|
||||
* 1984 04 0.450090 0.483521
|
||||
* }}}
|
||||
* @param numRows Number of rows to show
|
||||
* @group basic
|
||||
*/
|
||||
def show(): Unit = println(showString())
|
||||
def show(numRows: Int): Unit = println(showString(numRows))
|
||||
|
||||
/**
|
||||
* Displays the top 20 rows of [[DataFrame]] in a tabular form.
|
||||
*/
|
||||
def show(): Unit = show(20)
|
||||
|
||||
/**
|
||||
* Cartesian join with another [[DataFrame]].
|
||||
|
|
|
@ -20,6 +20,7 @@ package test.org.apache.spark.sql;
|
|||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.spark.sql.*;
|
||||
|
@ -81,4 +82,12 @@ public class JavaDataFrameSuite {
|
|||
df.groupBy().agg(countDistinct(col("key"), col("value")));
|
||||
df.select(coalesce(col("key")));
|
||||
}
|
||||
|
||||
@Ignore
|
||||
public void testShow() {
|
||||
// This test case is intended ignored, but to make sure it compiles correctly
|
||||
DataFrame df = context.table("testData");
|
||||
df.show();
|
||||
df.show(1000);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -441,4 +441,9 @@ class DataFrameSuite extends QueryTest {
|
|||
checkAnswer(df.select(df("key")), testData.select('key).collect().toSeq)
|
||||
}
|
||||
|
||||
ignore("show") {
|
||||
// This test case is intended ignored, but to make sure it compiles correctly
|
||||
testData.select($"*").show()
|
||||
testData.select($"*").show(1000)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue