[SPARK-15850][SQL] Remove function grouping in SparkSession
## What changes were proposed in this pull request? SparkSession does not have that many functions due to better namespacing, and as a result we probably don't need the function grouping. This patch removes the grouping and also adds missing scaladocs for createDataset functions in SQLContext. Closes #13577. ## How was this patch tested? N/A - this is a documentation change. Author: Reynold Xin <rxin@databricks.com> Closes #13582 from rxin/SPARK-15850.
This commit is contained in:
parent
4d9d9cc585
commit
16df133d7f
|
@ -51,7 +51,7 @@ import org.apache.spark.sql.util.ExecutionListenerManager
|
|||
* @groupname specificdata Specific Data Sources
|
||||
* @groupname config Configuration
|
||||
* @groupname dataframes Custom DataFrame Creation
|
||||
* @groupname dataset Custom DataFrame Creation
|
||||
* @groupname dataset Custom Dataset Creation
|
||||
* @groupname Ungrouped Support functions for language integrated queries
|
||||
* @since 1.0.0
|
||||
*/
|
||||
|
@ -346,15 +346,73 @@ class SQLContext private[sql](val sparkSession: SparkSession)
|
|||
sparkSession.createDataFrame(rowRDD, schema, needsConversion)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* :: Experimental ::
|
||||
* Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
|
||||
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
|
||||
* that is generally created automatically through implicits from a `SparkSession`, or can be
|
||||
* created explicitly by calling static methods on [[Encoders]].
|
||||
*
|
||||
* == Example ==
|
||||
*
|
||||
* {{{
|
||||
*
|
||||
* import spark.implicits._
|
||||
* case class Person(name: String, age: Long)
|
||||
* val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
|
||||
* val ds = spark.createDataset(data)
|
||||
*
|
||||
* ds.show()
|
||||
* // +-------+---+
|
||||
* // | name|age|
|
||||
* // +-------+---+
|
||||
* // |Michael| 29|
|
||||
* // | Andy| 30|
|
||||
* // | Justin| 19|
|
||||
* // +-------+---+
|
||||
* }}}
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
|
||||
sparkSession.createDataset(data)
|
||||
}
|
||||
|
||||
/**
|
||||
* :: Experimental ::
|
||||
* Creates a [[Dataset]] from an RDD of a given type. This method requires an
|
||||
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
|
||||
* that is generally created automatically through implicits from a `SparkSession`, or can be
|
||||
* created explicitly by calling static methods on [[Encoders]].
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
|
||||
sparkSession.createDataset(data)
|
||||
}
|
||||
|
||||
/**
|
||||
* :: Experimental ::
|
||||
* Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an
|
||||
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
|
||||
* that is generally created automatically through implicits from a `SparkSession`, or can be
|
||||
* created explicitly by calling static methods on [[Encoders]].
|
||||
*
|
||||
* == Java Example ==
|
||||
*
|
||||
* {{{
|
||||
* List<String> data = Arrays.asList("hello", "world");
|
||||
* Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
|
||||
* }}}
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
|
||||
sparkSession.createDataset(data)
|
||||
}
|
||||
|
|
|
@ -119,7 +119,6 @@ class SparkSession private(
|
|||
* configurations that are relevant to Spark SQL. When getting the value of a config,
|
||||
* this defaults to the value set in the underlying [[SparkContext]], if any.
|
||||
*
|
||||
* @group config
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@transient lazy val conf: RuntimeConfig = new RuntimeConfig(sessionState.conf)
|
||||
|
@ -129,7 +128,6 @@ class SparkSession private(
|
|||
* An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
|
||||
* that listen for execution metrics.
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Experimental
|
||||
|
@ -140,7 +138,6 @@ class SparkSession private(
|
|||
* A collection of methods that are considered experimental, but can be used to hook into
|
||||
* the query planner for advanced functionality.
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Experimental
|
||||
|
@ -175,7 +172,6 @@ class SparkSession private(
|
|||
* DataTypes.StringType);
|
||||
* }}}
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def udf: UDFRegistration = sessionState.udf
|
||||
|
@ -185,7 +181,6 @@ class SparkSession private(
|
|||
* Returns a [[ContinuousQueryManager]] that allows managing all the
|
||||
* [[ContinuousQuery ContinuousQueries]] active on `this`.
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Experimental
|
||||
|
@ -200,7 +195,6 @@ class SparkSession private(
|
|||
* and child sessions are set up with the same shared state. If the underlying catalog
|
||||
* implementation is Hive, this will initialize the metastore, which may take some time.
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def newSession(): SparkSession = {
|
||||
|
@ -215,7 +209,6 @@ class SparkSession private(
|
|||
/**
|
||||
* Returns a [[DataFrame]] with no rows or columns.
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@transient
|
||||
|
@ -239,7 +232,6 @@ class SparkSession private(
|
|||
* :: Experimental ::
|
||||
* Creates a [[DataFrame]] from an RDD of Product (e.g. case classes, tuples).
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Experimental
|
||||
|
@ -255,7 +247,6 @@ class SparkSession private(
|
|||
* :: Experimental ::
|
||||
* Creates a [[DataFrame]] from a local Seq of Product.
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Experimental
|
||||
|
@ -295,7 +286,6 @@ class SparkSession private(
|
|||
* sparkSession.sql("select name from people").collect.foreach(println)
|
||||
* }}}
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@DeveloperApi
|
||||
|
@ -309,7 +299,6 @@ class SparkSession private(
|
|||
* It is important to make sure that the structure of every [[Row]] of the provided RDD matches
|
||||
* the provided schema. Otherwise, there will be runtime exception.
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@DeveloperApi
|
||||
|
@ -323,7 +312,6 @@ class SparkSession private(
|
|||
* It is important to make sure that the structure of every [[Row]] of the provided List matches
|
||||
* the provided schema. Otherwise, there will be runtime exception.
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@DeveloperApi
|
||||
|
@ -337,7 +325,6 @@ class SparkSession private(
|
|||
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
|
||||
* SELECT * queries will return the columns in an undefined order.
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
|
||||
|
@ -357,7 +344,6 @@ class SparkSession private(
|
|||
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
|
||||
* SELECT * queries will return the columns in an undefined order.
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
|
||||
|
@ -369,7 +355,6 @@ class SparkSession private(
|
|||
*
|
||||
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
|
||||
* SELECT * queries will return the columns in an undefined order.
|
||||
* @group dataframes
|
||||
* @since 1.6.0
|
||||
*/
|
||||
def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
|
||||
|
@ -382,7 +367,6 @@ class SparkSession private(
|
|||
/**
|
||||
* Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
|
||||
*
|
||||
* @group dataframes
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
|
||||
|
@ -420,7 +404,6 @@ class SparkSession private(
|
|||
* }}}
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
|
||||
|
@ -439,7 +422,6 @@ class SparkSession private(
|
|||
* created explicitly by calling static methods on [[Encoders]].
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
|
||||
|
@ -465,7 +447,6 @@ class SparkSession private(
|
|||
* }}}
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
|
||||
|
@ -478,7 +459,6 @@ class SparkSession private(
|
|||
* in a range from 0 to `end` (exclusive) with step value 1.
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def range(end: Long): Dataset[java.lang.Long] = range(0, end)
|
||||
|
@ -489,7 +469,6 @@ class SparkSession private(
|
|||
* in a range from `start` to `end` (exclusive) with step value 1.
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def range(start: Long, end: Long): Dataset[java.lang.Long] = {
|
||||
|
@ -502,7 +481,6 @@ class SparkSession private(
|
|||
* in a range from `start` to `end` (exclusive) with a step value.
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
|
||||
|
@ -516,7 +494,6 @@ class SparkSession private(
|
|||
* specified.
|
||||
*
|
||||
* @since 2.0.0
|
||||
* @group dataset
|
||||
*/
|
||||
@Experimental
|
||||
def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
|
||||
|
@ -565,7 +542,6 @@ class SparkSession private(
|
|||
* Interface through which the user may create, drop, alter or query underlying
|
||||
* databases, tables, functions etc.
|
||||
*
|
||||
* @group ddl_ops
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@transient lazy val catalog: Catalog = new CatalogImpl(self)
|
||||
|
@ -573,7 +549,6 @@ class SparkSession private(
|
|||
/**
|
||||
* Returns the specified table as a [[DataFrame]].
|
||||
*
|
||||
* @group ddl_ops
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def table(tableName: String): DataFrame = {
|
||||
|
@ -592,7 +567,6 @@ class SparkSession private(
|
|||
* Executes a SQL query using Spark, returning the result as a [[DataFrame]].
|
||||
* The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'.
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def sql(sqlText: String): DataFrame = {
|
||||
|
@ -606,7 +580,6 @@ class SparkSession private(
|
|||
* sparkSession.read.schema(schema).json("/path/to/file.json")
|
||||
* }}}
|
||||
*
|
||||
* @group genericdata
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def read: DataFrameReader = new DataFrameReader(self)
|
||||
|
@ -624,7 +597,6 @@ class SparkSession private(
|
|||
* import sparkSession.implicits._
|
||||
* }}}
|
||||
*
|
||||
* @group basic
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Experimental
|
||||
|
|
|
@ -2437,7 +2437,7 @@ object functions {
|
|||
*/
|
||||
def minute(e: Column): Column = withExpr { Minute(e.expr) }
|
||||
|
||||
/*
|
||||
/**
|
||||
* Returns number of months between dates `date1` and `date2`.
|
||||
* @group datetime_funcs
|
||||
* @since 1.5.0
|
||||
|
|
Loading…
Reference in a new issue