[SPARK-20889][SPARKR] Grouped documentation for COLLECTION column methods

## What changes were proposed in this pull request?

Grouped documentation for column collection methods.

Author: actuaryzhang <actuaryzhang10@gmail.com>
Author: Wayne Zhang <actuaryzhang10@gmail.com>

Closes #18458 from actuaryzhang/sparkRDocCollection.
This commit is contained in:
actuaryzhang 2017-06-29 23:00:50 -07:00 committed by Felix Cheung
parent fddb63f463
commit 52981715bb
2 changed files with 108 additions and 123 deletions

View file

@ -171,6 +171,35 @@ NULL
#' }
NULL
#' Collection functions for Column operations
#'
#' Collection functions defined for \code{Column}.
#'
#' @param x Column to compute on. Note the difference in the following methods:
#' \itemize{
#' \item \code{to_json}: it is the column containing the struct or array of the structs.
#' \item \code{from_json}: it is the column containing the JSON string.
#' }
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
#' additional named properties to control how it is converted, accepts the same
#' options as the JSON data source.
#' @name column_collection_functions
#' @rdname column_collection_functions
#' @family collection functions
#' @examples
#' \dontrun{
#' # Dataframe used throughout this doc
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
NULL
#' @details
#' \code{lit}: A new Column is created to represent the literal value.
#' If the parameter is a Column, it is returned unchanged.
@ -1642,30 +1671,23 @@ setMethod("to_date",
column(jc)
})
#' to_json
#' @details
#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
#' @param x Column containing the struct or array of the structs
#' @param ... additional named properties to control how it is converted, accepts the same options
#' as the JSON data source.
#'
#' @family non-aggregate functions
#' @rdname to_json
#' @name to_json
#' @aliases to_json,Column-method
#' @rdname column_collection_functions
#' @aliases to_json to_json,Column-method
#' @export
#' @examples
#'
#' \dontrun{
#' # Converts a struct into a JSON object
#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
#'
#' # Converts an array of structs into a JSON array
#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
#' select(df, to_json(df$people))
#'}
#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
#' df2 <- mutate(df2, people_json = to_json(df2$people))}
#' @note to_json since 2.2.0
setMethod("to_json", signature(x = "Column"),
function(x, ...) {
@ -2120,28 +2142,28 @@ setMethod("date_format", signature(y = "Column", x = "character"),
column(jc)
})
#' from_json
#' @details
#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
#'
#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
#' If the string is unparseable, the Column will contains the value NA.
#'
#' @param x Column containing the JSON string.
#' @rdname column_collection_functions
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
#' @param as.json.array indicating if input string is JSON array of objects or a single object.
#' @param ... additional named properties to control how the json is parsed, accepts the same
#' options as the JSON data source.
#'
#' @family non-aggregate functions
#' @rdname from_json
#' @name from_json
#' @aliases from_json,Column,structType-method
#' @aliases from_json from_json,Column,structType-method
#' @export
#' @examples
#'
#' \dontrun{
#' schema <- structType(structField("name", "string"),
#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
#'}
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
#' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
#' schema <- structType(structField("date", "string"))
#' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
#' df2 <- mutate(df2, people_json = to_json(df2$people))
#' schema <- structType(structField("name", "string"))
#' head(select(df2, from_json(df2$people_json, schema)))}
#' @note from_json since 2.2.0
setMethod("from_json", signature(x = "Column", schema = "structType"),
function(x, schema, as.json.array = FALSE, ...) {
@ -3101,18 +3123,14 @@ setMethod("row_number",
###################### Collection functions######################
#' array_contains
#' @details
#' \code{array_contains}: Returns null if the array is null, true if the array contains
#' the value, and false otherwise.
#'
#' Returns null if the array is null, true if the array contains the value, and false otherwise.
#'
#' @param x A Column
#' @param value A value to be checked if contained in the column
#' @rdname array_contains
#' @aliases array_contains,Column-method
#' @name array_contains
#' @family collection functions
#' @rdname column_collection_functions
#' @aliases array_contains array_contains,Column-method
#' @export
#' @examples \dontrun{array_contains(df$c, 1)}
#' @note array_contains since 1.6.0
setMethod("array_contains",
signature(x = "Column", value = "ANY"),
@ -3121,18 +3139,12 @@ setMethod("array_contains",
column(jc)
})
#' explode
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#'
#' Creates a new row for each element in the given array or map column.
#'
#' @param x Column to compute on
#'
#' @rdname explode
#' @name explode
#' @family collection functions
#' @aliases explode,Column-method
#' @rdname column_collection_functions
#' @aliases explode explode,Column-method
#' @export
#' @examples \dontrun{explode(df$c)}
#' @note explode since 1.5.0
setMethod("explode",
signature(x = "Column"),
@ -3141,18 +3153,12 @@ setMethod("explode",
column(jc)
})
#' size
#' @details
#' \code{size}: Returns length of array or map.
#'
#' Returns length of array or map.
#'
#' @param x Column to compute on
#'
#' @rdname size
#' @name size
#' @aliases size,Column-method
#' @family collection functions
#' @rdname column_collection_functions
#' @aliases size size,Column-method
#' @export
#' @examples \dontrun{size(df$c)}
#' @note size since 1.5.0
setMethod("size",
signature(x = "Column"),
@ -3161,25 +3167,16 @@ setMethod("size",
column(jc)
})
#' sort_array
#'
#' Sorts the input array in ascending or descending order according
#' @details
#' \code{sort_array}: Sorts the input array in ascending or descending order according
#' to the natural ordering of the array elements.
#'
#' @param x A Column to sort
#' @rdname column_collection_functions
#' @param asc A logical flag indicating the sorting order.
#' TRUE, sorting is in ascending order.
#' FALSE, sorting is in descending order.
#' @rdname sort_array
#' @name sort_array
#' @aliases sort_array,Column-method
#' @family collection functions
#' @aliases sort_array sort_array,Column-method
#' @export
#' @examples
#' \dontrun{
#' sort_array(df$c)
#' sort_array(df$c, FALSE)
#' }
#' @note sort_array since 1.6.0
setMethod("sort_array",
signature(x = "Column"),
@ -3188,18 +3185,13 @@ setMethod("sort_array",
column(jc)
})
#' posexplode
#' @details
#' \code{posexplode}: Creates a new row for each element with position in the given array
#' or map column.
#'
#' Creates a new row for each element with position in the given array or map column.
#'
#' @param x Column to compute on
#'
#' @rdname posexplode
#' @name posexplode
#' @family collection functions
#' @aliases posexplode,Column-method
#' @rdname column_collection_functions
#' @aliases posexplode posexplode,Column-method
#' @export
#' @examples \dontrun{posexplode(df$c)}
#' @note posexplode since 2.1.0
setMethod("posexplode",
signature(x = "Column"),
@ -3325,27 +3317,24 @@ setMethod("repeat_string",
column(jc)
})
#' explode_outer
#'
#' Creates a new row for each element in the given array or map column.
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#' Unlike \code{explode}, if the array/map is \code{null} or empty
#' then \code{null} is produced.
#'
#' @param x Column to compute on
#'
#' @rdname explode_outer
#' @name explode_outer
#' @family collection functions
#' @aliases explode_outer,Column-method
#' @rdname column_collection_functions
#' @aliases explode_outer explode_outer,Column-method
#' @export
#' @examples
#'
#' \dontrun{
#' df <- createDataFrame(data.frame(
#' df2 <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
#' }
#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
#' @note explode_outer since 2.3.0
setMethod("explode_outer",
signature(x = "Column"),
@ -3354,27 +3343,14 @@ setMethod("explode_outer",
column(jc)
})
#' posexplode_outer
#'
#' Creates a new row for each element with position in the given array or map column.
#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' @details
#' \code{posexplode_outer}: Creates a new row for each element with position in the given
#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' then the row (\code{null}, \code{null}) is produced.
#'
#' @param x Column to compute on
#'
#' @rdname posexplode_outer
#' @name posexplode_outer
#' @family collection functions
#' @aliases posexplode_outer,Column-method
#' @rdname column_collection_functions
#' @aliases posexplode_outer posexplode_outer,Column-method
#' @export
#' @examples
#' \dontrun{
#' df <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
#' }
#' @note posexplode_outer since 2.3.0
setMethod("posexplode_outer",
signature(x = "Column"),

View file

@ -913,8 +913,9 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
#' @name NULL
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
#' @rdname array_contains
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
#' @rdname column_string_functions
@ -1062,12 +1063,14 @@ setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank")
#' @name NULL
setGeneric("encode", function(x, charset) { standardGeneric("encode") })
#' @rdname explode
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("explode", function(x) { standardGeneric("explode") })
#' @rdname explode_outer
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
#' @rdname column_nonaggregate_functions
@ -1090,8 +1093,9 @@ setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
#' @name NULL
setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })
#' @rdname from_json
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })
#' @rdname column_datetime_functions
@ -1275,12 +1279,14 @@ setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_ra
#' @name NULL
setGeneric("pmod", function(y, x) { standardGeneric("pmod") })
#' @rdname posexplode
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("posexplode", function(x) { standardGeneric("posexplode") })
#' @rdname posexplode_outer
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })
#' @rdname column_datetime_functions
@ -1383,8 +1389,9 @@ setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUns
#' @name NULL
setGeneric("signum", function(x) { standardGeneric("signum") })
#' @rdname size
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("size", function(x) { standardGeneric("size") })
#' @rdname column_aggregate_functions
@ -1392,8 +1399,9 @@ setGeneric("size", function(x) { standardGeneric("size") })
#' @name NULL
setGeneric("skewness", function(x) { standardGeneric("skewness") })
#' @rdname sort_array
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
#' @rdname column_string_functions
@ -1456,8 +1464,9 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
#' @name NULL
setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
#' @rdname to_json
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })
#' @rdname column_datetime_functions