From 90085a184797f8bddbff8ca6ec7a60f3899c1a86 Mon Sep 17 00:00:00 2001 From: Jash Gala Date: Sat, 27 Apr 2019 10:30:12 +0900 Subject: [PATCH] [SPARK-23619][DOCS] Add output description for some generator expressions / functions ## What changes were proposed in this pull request? This PR addresses SPARK-23619: https://issues.apache.org/jira/browse/SPARK-23619 It adds additional comments indicating the default column names for the `explode` and `posexplode` functions in Spark-SQL. Functions for which comments have been updated so far: * stack * inline * explode * posexplode * explode_outer * posexplode_outer ## How was this patch tested? This is just a change in the comments. The package builds and tests successfullly after the change. Closes #23748 from jashgala/SPARK-23619. Authored-by: Jash Gala Signed-off-by: HyukjinKwon --- R/pkg/R/functions.R | 12 +++++++++-- python/pyspark/sql/functions.py | 20 +++++++++++++++---- .../sql/catalyst/expressions/generators.scala | 12 +++++++---- .../org/apache/spark/sql/functions.scala | 8 ++++++++ 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 0566a47cc8..3bd1f544d7 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -3589,6 +3589,8 @@ setMethod("element_at", #' @details #' \code{explode}: Creates a new row for each element in the given array or map column. +#' Uses the default column name \code{col} for elements in the array and +#' \code{key} and \code{value} for elements in the map unless specified otherwise. #' #' @rdname column_collection_functions #' @aliases explode explode,Column-method @@ -3649,7 +3651,9 @@ setMethod("sort_array", #' @details #' \code{posexplode}: Creates a new row for each element with position in the given array -#' or map column. +#' or map column. Uses the default column name \code{pos} for position, and \code{col} +#' for elements in the array and \code{key} and \code{value} for elements in the map +#' unless specified otherwise. #' #' @rdname column_collection_functions #' @aliases posexplode posexplode,Column-method @@ -3790,7 +3794,8 @@ setMethod("repeat_string", #' \code{explode}: Creates a new row for each element in the given array or map column. #' Unlike \code{explode}, if the array/map is \code{null} or empty #' then \code{null} is produced. -#' +#' Uses the default column name \code{col} for elements in the array and +#' \code{key} and \code{value} for elements in the map unless specified otherwise. #' #' @rdname column_collection_functions #' @aliases explode_outer explode_outer,Column-method @@ -3815,6 +3820,9 @@ setMethod("explode_outer", #' \code{posexplode_outer}: Creates a new row for each element with position in the given #' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty #' then the row (\code{null}, \code{null}) is produced. +#' Uses the default column name \code{pos} for position, and \code{col} +#' for elements in the array and \code{key} and \code{value} for elements in the map +#' unless specified otherwise. #' #' @rdname column_collection_functions #' @aliases posexplode_outer posexplode_outer,Column-method diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 22163f52b4..613822b7ed 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2142,7 +2142,10 @@ def array_except(col1, col2): @since(1.4) def explode(col): - """Returns a new row for each element in the given array or map. + """ + Returns a new row for each element in the given array or map. + Uses the default column name `col` for elements in the array and + `key` and `value` for elements in the map unless specified otherwise. >>> from pyspark.sql import Row >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})]) @@ -2163,7 +2166,10 @@ def explode(col): @since(2.1) def posexplode(col): - """Returns a new row for each element with position in the given array or map. + """ + Returns a new row for each element with position in the given array or map. + Uses the default column name `pos` for position, and `col` for elements in the + array and `key` and `value` for elements in the map unless specified otherwise. >>> from pyspark.sql import Row >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})]) @@ -2184,8 +2190,11 @@ def posexplode(col): @since(2.3) def explode_outer(col): - """Returns a new row for each element in the given array or map. + """ + Returns a new row for each element in the given array or map. Unlike explode, if the array/map is null or empty then null is produced. + Uses the default column name `col` for elements in the array and + `key` and `value` for elements in the map unless specified otherwise. >>> df = spark.createDataFrame( ... [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)], @@ -2217,8 +2226,11 @@ def explode_outer(col): @since(2.3) def posexplode_outer(col): - """Returns a new row for each element with position in the given array or map. + """ + Returns a new row for each element with position in the given array or map. Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced. + Uses the default column name `pos` for position, and `col` for elements in the + array and `key` and `value` for elements in the map unless specified otherwise. >>> df = spark.createDataFrame( ... [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 6b6da1c8b4..82a7d9825e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -127,14 +127,16 @@ case class UserDefinedGenerator( * 3 NULL * }}} */ +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows.", + usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.", examples = """ Examples: > SELECT _FUNC_(2, 1, 2, 3); 1 2 3 NULL """) +// scalastyle:on line.size.limit case class Stack(children: Seq[Expression]) extends Generator { private lazy val numRows = children.head.eval().asInstanceOf[Int] @@ -352,7 +354,7 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns.", + usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.", examples = """ Examples: > SELECT _FUNC_(array(10, 20)); @@ -375,7 +377,7 @@ case class Explode(child: Expression) extends ExplodeBase { */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions.", + usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.", examples = """ Examples: > SELECT _FUNC_(array(10,20)); @@ -390,14 +392,16 @@ case class PosExplode(child: Expression) extends ExplodeBase { /** * Explodes an array of structs into a table. */ +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(expr) - Explodes an array of structs into a table.", + usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.", examples = """ Examples: > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b'))); 1 a 2 b """) +// scalastyle:on line.size.limit case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator { override val inline: Boolean = true override val position: Boolean = false diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 7ac3ed5a44..c1997b6fdf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3322,6 +3322,8 @@ object functions { /** * Creates a new row for each element in the given array or map column. + * Uses the default column name `col` for elements in the array and + * `key` and `value` for elements in the map unless specified otherwise. * * @group collection_funcs * @since 1.3.0 @@ -3330,6 +3332,8 @@ object functions { /** * Creates a new row for each element in the given array or map column. + * Uses the default column name `col` for elements in the array and + * `key` and `value` for elements in the map unless specified otherwise. * Unlike explode, if the array/map is null or empty then null is produced. * * @group collection_funcs @@ -3339,6 +3343,8 @@ object functions { /** * Creates a new row for each element with position in the given array or map column. + * Uses the default column name `pos` for position, and `col` for elements in the array + * and `key` and `value` for elements in the map unless specified otherwise. * * @group collection_funcs * @since 2.1.0 @@ -3347,6 +3353,8 @@ object functions { /** * Creates a new row for each element with position in the given array or map column. + * Uses the default column name `pos` for position, and `col` for elements in the array + * and `key` and `value` for elements in the map unless specified otherwise. * Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced. * * @group collection_funcs