[SPARK-23619][DOCS] Add output description for some generator expressions / functions
## What changes were proposed in this pull request? This PR addresses SPARK-23619: https://issues.apache.org/jira/browse/SPARK-23619 It adds additional comments indicating the default column names for the `explode` and `posexplode` functions in Spark-SQL. Functions for which comments have been updated so far: * stack * inline * explode * posexplode * explode_outer * posexplode_outer ## How was this patch tested? This is just a change in the comments. The package builds and tests successfullly after the change. Closes #23748 from jashgala/SPARK-23619. Authored-by: Jash Gala <jashgala@amazon.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
6328be78f9
commit
90085a1847
|
@ -3589,6 +3589,8 @@ setMethod("element_at",
|
|||
|
||||
#' @details
|
||||
#' \code{explode}: Creates a new row for each element in the given array or map column.
|
||||
#' Uses the default column name \code{col} for elements in the array and
|
||||
#' \code{key} and \code{value} for elements in the map unless specified otherwise.
|
||||
#'
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases explode explode,Column-method
|
||||
|
@ -3649,7 +3651,9 @@ setMethod("sort_array",
|
|||
|
||||
#' @details
|
||||
#' \code{posexplode}: Creates a new row for each element with position in the given array
|
||||
#' or map column.
|
||||
#' or map column. Uses the default column name \code{pos} for position, and \code{col}
|
||||
#' for elements in the array and \code{key} and \code{value} for elements in the map
|
||||
#' unless specified otherwise.
|
||||
#'
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases posexplode posexplode,Column-method
|
||||
|
@ -3790,7 +3794,8 @@ setMethod("repeat_string",
|
|||
#' \code{explode}: Creates a new row for each element in the given array or map column.
|
||||
#' Unlike \code{explode}, if the array/map is \code{null} or empty
|
||||
#' then \code{null} is produced.
|
||||
#'
|
||||
#' Uses the default column name \code{col} for elements in the array and
|
||||
#' \code{key} and \code{value} for elements in the map unless specified otherwise.
|
||||
#'
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases explode_outer explode_outer,Column-method
|
||||
|
@ -3815,6 +3820,9 @@ setMethod("explode_outer",
|
|||
#' \code{posexplode_outer}: Creates a new row for each element with position in the given
|
||||
#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
|
||||
#' then the row (\code{null}, \code{null}) is produced.
|
||||
#' Uses the default column name \code{pos} for position, and \code{col}
|
||||
#' for elements in the array and \code{key} and \code{value} for elements in the map
|
||||
#' unless specified otherwise.
|
||||
#'
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases posexplode_outer posexplode_outer,Column-method
|
||||
|
|
|
@ -2142,7 +2142,10 @@ def array_except(col1, col2):
|
|||
|
||||
@since(1.4)
|
||||
def explode(col):
|
||||
"""Returns a new row for each element in the given array or map.
|
||||
"""
|
||||
Returns a new row for each element in the given array or map.
|
||||
Uses the default column name `col` for elements in the array and
|
||||
`key` and `value` for elements in the map unless specified otherwise.
|
||||
|
||||
>>> from pyspark.sql import Row
|
||||
>>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
|
||||
|
@ -2163,7 +2166,10 @@ def explode(col):
|
|||
|
||||
@since(2.1)
|
||||
def posexplode(col):
|
||||
"""Returns a new row for each element with position in the given array or map.
|
||||
"""
|
||||
Returns a new row for each element with position in the given array or map.
|
||||
Uses the default column name `pos` for position, and `col` for elements in the
|
||||
array and `key` and `value` for elements in the map unless specified otherwise.
|
||||
|
||||
>>> from pyspark.sql import Row
|
||||
>>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
|
||||
|
@ -2184,8 +2190,11 @@ def posexplode(col):
|
|||
|
||||
@since(2.3)
|
||||
def explode_outer(col):
|
||||
"""Returns a new row for each element in the given array or map.
|
||||
"""
|
||||
Returns a new row for each element in the given array or map.
|
||||
Unlike explode, if the array/map is null or empty then null is produced.
|
||||
Uses the default column name `col` for elements in the array and
|
||||
`key` and `value` for elements in the map unless specified otherwise.
|
||||
|
||||
>>> df = spark.createDataFrame(
|
||||
... [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
|
||||
|
@ -2217,8 +2226,11 @@ def explode_outer(col):
|
|||
|
||||
@since(2.3)
|
||||
def posexplode_outer(col):
|
||||
"""Returns a new row for each element with position in the given array or map.
|
||||
"""
|
||||
Returns a new row for each element with position in the given array or map.
|
||||
Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
|
||||
Uses the default column name `pos` for position, and `col` for elements in the
|
||||
array and `key` and `value` for elements in the map unless specified otherwise.
|
||||
|
||||
>>> df = spark.createDataFrame(
|
||||
... [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
|
||||
|
|
|
@ -127,14 +127,16 @@ case class UserDefinedGenerator(
|
|||
* 3 NULL
|
||||
* }}}
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows.",
|
||||
usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(2, 1, 2, 3);
|
||||
1 2
|
||||
3 NULL
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class Stack(children: Seq[Expression]) extends Generator {
|
||||
|
||||
private lazy val numRows = children.head.eval().asInstanceOf[Int]
|
||||
|
@ -352,7 +354,7 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
|
|||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns.",
|
||||
usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(array(10, 20));
|
||||
|
@ -375,7 +377,7 @@ case class Explode(child: Expression) extends ExplodeBase {
|
|||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions.",
|
||||
usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(array(10,20));
|
||||
|
@ -390,14 +392,16 @@ case class PosExplode(child: Expression) extends ExplodeBase {
|
|||
/**
|
||||
* Explodes an array of structs into a table.
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(expr) - Explodes an array of structs into a table.",
|
||||
usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
|
||||
1 a
|
||||
2 b
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
|
||||
override val inline: Boolean = true
|
||||
override val position: Boolean = false
|
||||
|
|
|
@ -3322,6 +3322,8 @@ object functions {
|
|||
|
||||
/**
|
||||
* Creates a new row for each element in the given array or map column.
|
||||
* Uses the default column name `col` for elements in the array and
|
||||
* `key` and `value` for elements in the map unless specified otherwise.
|
||||
*
|
||||
* @group collection_funcs
|
||||
* @since 1.3.0
|
||||
|
@ -3330,6 +3332,8 @@ object functions {
|
|||
|
||||
/**
|
||||
* Creates a new row for each element in the given array or map column.
|
||||
* Uses the default column name `col` for elements in the array and
|
||||
* `key` and `value` for elements in the map unless specified otherwise.
|
||||
* Unlike explode, if the array/map is null or empty then null is produced.
|
||||
*
|
||||
* @group collection_funcs
|
||||
|
@ -3339,6 +3343,8 @@ object functions {
|
|||
|
||||
/**
|
||||
* Creates a new row for each element with position in the given array or map column.
|
||||
* Uses the default column name `pos` for position, and `col` for elements in the array
|
||||
* and `key` and `value` for elements in the map unless specified otherwise.
|
||||
*
|
||||
* @group collection_funcs
|
||||
* @since 2.1.0
|
||||
|
@ -3347,6 +3353,8 @@ object functions {
|
|||
|
||||
/**
|
||||
* Creates a new row for each element with position in the given array or map column.
|
||||
* Uses the default column name `pos` for position, and `col` for elements in the array
|
||||
* and `key` and `value` for elements in the map unless specified otherwise.
|
||||
* Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
|
||||
*
|
||||
* @group collection_funcs
|
||||
|
|
Loading…
Reference in a new issue