[SPARK-20889][SPARKR] Grouped documentation for COLLECTION column methods

## What changes were proposed in this pull request? Grouped documentation for column collection methods. Author: actuaryzhang <actuaryzhang10@gmail.com> Author: Wayne Zhang <actuaryzhang10@gmail.com> Closes #18458 from actuaryzhang/sparkRDocCollection.
2017-06-29 23:00:50 -07:00 · 2017-06-29 23:00:50 -07:00 · 52981715bb
parent fddb63f463
commit 52981715bb
2 changed files with 108 additions and 123 deletions
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@ -171,6 +171,35 @@ NULL
 #' }
 NULL

+#' Collection functions for Column operations
+#'
+#' Collection functions defined for \code{Column}.
+#'
+#' @param x Column to compute on. Note the difference in the following methods:
+#'          \itemize{
+#'          \item \code{to_json}: it is the column containing the struct or array of the structs.
+#'          \item \code{from_json}: it is the column containing the JSON string.
+#'          }
+#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
+#'            additional named properties to control how it is converted, accepts the same
+#'            options as the JSON data source.
+#' @name column_collection_functions
+#' @rdname column_collection_functions
+#' @family collection functions
+#' @examples
+#' \dontrun{
+#' # Dataframe used throughout this doc
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
+#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
+#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
+#' head(tmp2)
+#' head(select(tmp, posexplode(tmp$v1)))
+#' head(select(tmp, sort_array(tmp$v1)))
+#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
+NULL
+
 #' @details
 #' \code{lit}: A new Column is created to represent the literal value.
 #' If the parameter is a Column, it is returned unchanged.
@ -1642,30 +1671,23 @@ setMethod("to_date",
            column(jc)
          })

-#' to_json
+#' @details
+#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
+#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
 #'
-#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
-#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
-#'
-#' @param x Column containing the struct or array of the structs
-#' @param ... additional named properties to control how it is converted, accepts the same options
-#'            as the JSON data source.
-#'
-#' @family non-aggregate functions
-#' @rdname to_json
-#' @name to_json
-#' @aliases to_json,Column-method
+#' @rdname column_collection_functions
+#' @aliases to_json to_json,Column-method
 #' @export
 #' @examples
+#'
 #' \dontrun{
 #' # Converts a struct into a JSON object
-#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
-#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
+#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
+#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
 #'
 #' # Converts an array of structs into a JSON array
-#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
-#' select(df, to_json(df$people))
-#'}
+#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
+#' df2 <- mutate(df2, people_json = to_json(df2$people))}
 #' @note to_json since 2.2.0
 setMethod("to_json", signature(x = "Column"),
          function(x, ...) {
@ -2120,28 +2142,28 @@ setMethod("date_format", signature(y = "Column", x = "character"),
            column(jc)
          })

-#' from_json
+#' @details
+#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
+#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
+#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
 #'
-#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
-#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
-#' If the string is unparseable, the Column will contains the value NA.
-#'
-#' @param x Column containing the JSON string.
+#' @rdname column_collection_functions
 #' @param schema a structType object to use as the schema to use when parsing the JSON string.
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
-#' @param ... additional named properties to control how the json is parsed, accepts the same
-#'            options as the JSON data source.
-#'
-#' @family non-aggregate functions
-#' @rdname from_json
-#' @name from_json
-#' @aliases from_json,Column,structType-method
+#' @aliases from_json from_json,Column,structType-method
 #' @export
 #' @examples
+#'
 #' \dontrun{
-#' schema <- structType(structField("name", "string"),
-#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
-#'}
+#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
+#' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
+#' schema <- structType(structField("date", "string"))
+#' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
+
+#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
+#' df2 <- mutate(df2, people_json = to_json(df2$people))
+#' schema <- structType(structField("name", "string"))
+#' head(select(df2, from_json(df2$people_json, schema)))}
 #' @note from_json since 2.2.0
 setMethod("from_json", signature(x = "Column", schema = "structType"),
          function(x, schema, as.json.array = FALSE, ...) {
@ -3101,18 +3123,14 @@ setMethod("row_number",

 ###################### Collection functions######################

-#' array_contains
+#' @details
+#' \code{array_contains}: Returns null if the array is null, true if the array contains
+#' the value, and false otherwise.
 #'
-#' Returns null if the array is null, true if the array contains the value, and false otherwise.
-#'
-#' @param x A Column
 #' @param value A value to be checked if contained in the column
-#' @rdname array_contains
-#' @aliases array_contains,Column-method
-#' @name array_contains
-#' @family collection functions
+#' @rdname column_collection_functions
+#' @aliases array_contains array_contains,Column-method
 #' @export
-#' @examples \dontrun{array_contains(df$c, 1)}
 #' @note array_contains since 1.6.0
 setMethod("array_contains",
          signature(x = "Column", value = "ANY"),
@ -3121,18 +3139,12 @@ setMethod("array_contains",
            column(jc)
          })

-#' explode
+#' @details
+#' \code{explode}: Creates a new row for each element in the given array or map column.
 #'
-#' Creates a new row for each element in the given array or map column.
-#'
-#' @param x Column to compute on
-#'
-#' @rdname explode
-#' @name explode
-#' @family collection functions
-#' @aliases explode,Column-method
+#' @rdname column_collection_functions
+#' @aliases explode explode,Column-method
 #' @export
-#' @examples \dontrun{explode(df$c)}
 #' @note explode since 1.5.0
 setMethod("explode",
          signature(x = "Column"),
@ -3141,18 +3153,12 @@ setMethod("explode",
            column(jc)
          })

-#' size
+#' @details
+#' \code{size}: Returns length of array or map.
 #'
-#' Returns length of array or map.
-#'
-#' @param x Column to compute on
-#'
-#' @rdname size
-#' @name size
-#' @aliases size,Column-method
-#' @family collection functions
+#' @rdname column_collection_functions
+#' @aliases size size,Column-method
 #' @export
-#' @examples \dontrun{size(df$c)}
 #' @note size since 1.5.0
 setMethod("size",
          signature(x = "Column"),
@ -3161,25 +3167,16 @@ setMethod("size",
            column(jc)
          })

-#' sort_array
-#'
-#' Sorts the input array in ascending or descending order according
+#' @details
+#' \code{sort_array}: Sorts the input array in ascending or descending order according
 #' to the natural ordering of the array elements.
 #'
-#' @param x A Column to sort
+#' @rdname column_collection_functions
 #' @param asc A logical flag indicating the sorting order.
 #'            TRUE, sorting is in ascending order.
 #'            FALSE, sorting is in descending order.
-#' @rdname sort_array
-#' @name sort_array
-#' @aliases sort_array,Column-method
-#' @family collection functions
+#' @aliases sort_array sort_array,Column-method
 #' @export
-#' @examples
-#' \dontrun{
-#' sort_array(df$c)
-#' sort_array(df$c, FALSE)
-#' }
 #' @note sort_array since 1.6.0
 setMethod("sort_array",
          signature(x = "Column"),
@ -3188,18 +3185,13 @@ setMethod("sort_array",
            column(jc)
          })

-#' posexplode
+#' @details
+#' \code{posexplode}: Creates a new row for each element with position in the given array
+#' or map column.
 #'
-#' Creates a new row for each element with position in the given array or map column.
-#'
-#' @param x Column to compute on
-#'
-#' @rdname posexplode
-#' @name posexplode
-#' @family collection functions
-#' @aliases posexplode,Column-method
+#' @rdname column_collection_functions
+#' @aliases posexplode posexplode,Column-method
 #' @export
-#' @examples \dontrun{posexplode(df$c)}
 #' @note posexplode since 2.1.0
 setMethod("posexplode",
          signature(x = "Column"),
@ -3325,27 +3317,24 @@ setMethod("repeat_string",
            column(jc)
          })

-#' explode_outer
-#'
-#' Creates a new row for each element in the given array or map column.
+#' @details
+#' \code{explode}: Creates a new row for each element in the given array or map column.
 #' Unlike \code{explode}, if the array/map is \code{null} or empty
 #' then \code{null} is produced.
 #'
-#' @param x Column to compute on
 #'
-#' @rdname explode_outer
-#' @name explode_outer
-#' @family collection functions
-#' @aliases explode_outer,Column-method
+#' @rdname column_collection_functions
+#' @aliases explode_outer explode_outer,Column-method
 #' @export
 #' @examples
+#'
 #' \dontrun{
-#' df <- createDataFrame(data.frame(
+#' df2 <- createDataFrame(data.frame(
 #'   id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
 #' ))
 #'
-#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
-#' }
+#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
+#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
 #' @note explode_outer since 2.3.0
 setMethod("explode_outer",
          signature(x = "Column"),
@ -3354,27 +3343,14 @@ setMethod("explode_outer",
            column(jc)
          })

-#' posexplode_outer
-#'
-#' Creates a new row for each element with position in the given array or map column.
-#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
+#' @details
+#' \code{posexplode_outer}: Creates a new row for each element with position in the given
+#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
 #' then the row (\code{null}, \code{null}) is produced.
 #'
-#' @param x Column to compute on
-#'
-#' @rdname posexplode_outer
-#' @name posexplode_outer
-#' @family collection functions
-#' @aliases posexplode_outer,Column-method
+#' @rdname column_collection_functions
+#' @aliases posexplode_outer posexplode_outer,Column-method
 #' @export
-#' @examples
-#' \dontrun{
-#' df <- createDataFrame(data.frame(
-#'   id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
-#' ))
-#'
-#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
-#' }
 #' @note posexplode_outer since 2.3.0
 setMethod("posexplode_outer",
          signature(x = "Column"),
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@ -913,8 +913,9 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 #' @name NULL
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })

-#' @rdname array_contains
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })

 #' @rdname column_string_functions
@ -1062,12 +1063,14 @@ setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank")
 #' @name NULL
 setGeneric("encode", function(x, charset) { standardGeneric("encode") })

-#' @rdname explode
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("explode", function(x) { standardGeneric("explode") })

-#' @rdname explode_outer
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })

 #' @rdname column_nonaggregate_functions
@ -1090,8 +1093,9 @@ setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
 #' @name NULL
 setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })

-#' @rdname from_json
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })

 #' @rdname column_datetime_functions
@ -1275,12 +1279,14 @@ setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_ra
 #' @name NULL
 setGeneric("pmod", function(y, x) { standardGeneric("pmod") })

-#' @rdname posexplode
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("posexplode", function(x) { standardGeneric("posexplode") })

-#' @rdname posexplode_outer
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })

 #' @rdname column_datetime_functions
@ -1383,8 +1389,9 @@ setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUns
 #' @name NULL
 setGeneric("signum", function(x) { standardGeneric("signum") })

-#' @rdname size
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("size", function(x) { standardGeneric("size") })

 #' @rdname column_aggregate_functions
@ -1392,8 +1399,9 @@ setGeneric("size", function(x) { standardGeneric("size") })
 #' @name NULL
 setGeneric("skewness", function(x) { standardGeneric("skewness") })

-#' @rdname sort_array
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })

 #' @rdname column_string_functions
@ -1456,8 +1464,9 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
 #' @name NULL
 setGeneric("to_date", function(x, format) { standardGeneric("to_date") })

-#' @rdname to_json
+#' @rdname column_collection_functions
 #' @export
+#' @name NULL
 setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })

 #' @rdname column_datetime_functions