[SPARK-20889][SPARKR] Grouped documentation for COLLECTION column methods
## What changes were proposed in this pull request? Grouped documentation for column collection methods. Author: actuaryzhang <actuaryzhang10@gmail.com> Author: Wayne Zhang <actuaryzhang10@gmail.com> Closes #18458 from actuaryzhang/sparkRDocCollection.
This commit is contained in:
parent
fddb63f463
commit
52981715bb
|
@ -171,6 +171,35 @@ NULL
|
|||
#' }
|
||||
NULL
|
||||
|
||||
#' Collection functions for Column operations
|
||||
#'
|
||||
#' Collection functions defined for \code{Column}.
|
||||
#'
|
||||
#' @param x Column to compute on. Note the difference in the following methods:
|
||||
#' \itemize{
|
||||
#' \item \code{to_json}: it is the column containing the struct or array of the structs.
|
||||
#' \item \code{from_json}: it is the column containing the JSON string.
|
||||
#' }
|
||||
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
|
||||
#' additional named properties to control how it is converted, accepts the same
|
||||
#' options as the JSON data source.
|
||||
#' @name column_collection_functions
|
||||
#' @rdname column_collection_functions
|
||||
#' @family collection functions
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' # Dataframe used throughout this doc
|
||||
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
|
||||
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
|
||||
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
|
||||
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
|
||||
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
|
||||
#' head(tmp2)
|
||||
#' head(select(tmp, posexplode(tmp$v1)))
|
||||
#' head(select(tmp, sort_array(tmp$v1)))
|
||||
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
|
||||
NULL
|
||||
|
||||
#' @details
|
||||
#' \code{lit}: A new Column is created to represent the literal value.
|
||||
#' If the parameter is a Column, it is returned unchanged.
|
||||
|
@ -1642,30 +1671,23 @@ setMethod("to_date",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' to_json
|
||||
#' @details
|
||||
#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
|
||||
#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
|
||||
#'
|
||||
#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
|
||||
#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
|
||||
#'
|
||||
#' @param x Column containing the struct or array of the structs
|
||||
#' @param ... additional named properties to control how it is converted, accepts the same options
|
||||
#' as the JSON data source.
|
||||
#'
|
||||
#' @family non-aggregate functions
|
||||
#' @rdname to_json
|
||||
#' @name to_json
|
||||
#' @aliases to_json,Column-method
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases to_json to_json,Column-method
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
#' \dontrun{
|
||||
#' # Converts a struct into a JSON object
|
||||
#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
|
||||
#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
|
||||
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
|
||||
#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
|
||||
#'
|
||||
#' # Converts an array of structs into a JSON array
|
||||
#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
|
||||
#' select(df, to_json(df$people))
|
||||
#'}
|
||||
#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
|
||||
#' df2 <- mutate(df2, people_json = to_json(df2$people))}
|
||||
#' @note to_json since 2.2.0
|
||||
setMethod("to_json", signature(x = "Column"),
|
||||
function(x, ...) {
|
||||
|
@ -2120,28 +2142,28 @@ setMethod("date_format", signature(y = "Column", x = "character"),
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' from_json
|
||||
#' @details
|
||||
#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
|
||||
#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
|
||||
#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
|
||||
#'
|
||||
#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
|
||||
#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
|
||||
#' If the string is unparseable, the Column will contains the value NA.
|
||||
#'
|
||||
#' @param x Column containing the JSON string.
|
||||
#' @rdname column_collection_functions
|
||||
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
|
||||
#' @param as.json.array indicating if input string is JSON array of objects or a single object.
|
||||
#' @param ... additional named properties to control how the json is parsed, accepts the same
|
||||
#' options as the JSON data source.
|
||||
#'
|
||||
#' @family non-aggregate functions
|
||||
#' @rdname from_json
|
||||
#' @name from_json
|
||||
#' @aliases from_json,Column,structType-method
|
||||
#' @aliases from_json from_json,Column,structType-method
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
#' \dontrun{
|
||||
#' schema <- structType(structField("name", "string"),
|
||||
#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
|
||||
#'}
|
||||
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
|
||||
#' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
|
||||
#' schema <- structType(structField("date", "string"))
|
||||
#' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
|
||||
|
||||
#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
|
||||
#' df2 <- mutate(df2, people_json = to_json(df2$people))
|
||||
#' schema <- structType(structField("name", "string"))
|
||||
#' head(select(df2, from_json(df2$people_json, schema)))}
|
||||
#' @note from_json since 2.2.0
|
||||
setMethod("from_json", signature(x = "Column", schema = "structType"),
|
||||
function(x, schema, as.json.array = FALSE, ...) {
|
||||
|
@ -3101,18 +3123,14 @@ setMethod("row_number",
|
|||
|
||||
###################### Collection functions######################
|
||||
|
||||
#' array_contains
|
||||
#' @details
|
||||
#' \code{array_contains}: Returns null if the array is null, true if the array contains
|
||||
#' the value, and false otherwise.
|
||||
#'
|
||||
#' Returns null if the array is null, true if the array contains the value, and false otherwise.
|
||||
#'
|
||||
#' @param x A Column
|
||||
#' @param value A value to be checked if contained in the column
|
||||
#' @rdname array_contains
|
||||
#' @aliases array_contains,Column-method
|
||||
#' @name array_contains
|
||||
#' @family collection functions
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases array_contains array_contains,Column-method
|
||||
#' @export
|
||||
#' @examples \dontrun{array_contains(df$c, 1)}
|
||||
#' @note array_contains since 1.6.0
|
||||
setMethod("array_contains",
|
||||
signature(x = "Column", value = "ANY"),
|
||||
|
@ -3121,18 +3139,12 @@ setMethod("array_contains",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' explode
|
||||
#' @details
|
||||
#' \code{explode}: Creates a new row for each element in the given array or map column.
|
||||
#'
|
||||
#' Creates a new row for each element in the given array or map column.
|
||||
#'
|
||||
#' @param x Column to compute on
|
||||
#'
|
||||
#' @rdname explode
|
||||
#' @name explode
|
||||
#' @family collection functions
|
||||
#' @aliases explode,Column-method
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases explode explode,Column-method
|
||||
#' @export
|
||||
#' @examples \dontrun{explode(df$c)}
|
||||
#' @note explode since 1.5.0
|
||||
setMethod("explode",
|
||||
signature(x = "Column"),
|
||||
|
@ -3141,18 +3153,12 @@ setMethod("explode",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' size
|
||||
#' @details
|
||||
#' \code{size}: Returns length of array or map.
|
||||
#'
|
||||
#' Returns length of array or map.
|
||||
#'
|
||||
#' @param x Column to compute on
|
||||
#'
|
||||
#' @rdname size
|
||||
#' @name size
|
||||
#' @aliases size,Column-method
|
||||
#' @family collection functions
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases size size,Column-method
|
||||
#' @export
|
||||
#' @examples \dontrun{size(df$c)}
|
||||
#' @note size since 1.5.0
|
||||
setMethod("size",
|
||||
signature(x = "Column"),
|
||||
|
@ -3161,25 +3167,16 @@ setMethod("size",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' sort_array
|
||||
#'
|
||||
#' Sorts the input array in ascending or descending order according
|
||||
#' @details
|
||||
#' \code{sort_array}: Sorts the input array in ascending or descending order according
|
||||
#' to the natural ordering of the array elements.
|
||||
#'
|
||||
#' @param x A Column to sort
|
||||
#' @rdname column_collection_functions
|
||||
#' @param asc A logical flag indicating the sorting order.
|
||||
#' TRUE, sorting is in ascending order.
|
||||
#' FALSE, sorting is in descending order.
|
||||
#' @rdname sort_array
|
||||
#' @name sort_array
|
||||
#' @aliases sort_array,Column-method
|
||||
#' @family collection functions
|
||||
#' @aliases sort_array sort_array,Column-method
|
||||
#' @export
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' sort_array(df$c)
|
||||
#' sort_array(df$c, FALSE)
|
||||
#' }
|
||||
#' @note sort_array since 1.6.0
|
||||
setMethod("sort_array",
|
||||
signature(x = "Column"),
|
||||
|
@ -3188,18 +3185,13 @@ setMethod("sort_array",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' posexplode
|
||||
#' @details
|
||||
#' \code{posexplode}: Creates a new row for each element with position in the given array
|
||||
#' or map column.
|
||||
#'
|
||||
#' Creates a new row for each element with position in the given array or map column.
|
||||
#'
|
||||
#' @param x Column to compute on
|
||||
#'
|
||||
#' @rdname posexplode
|
||||
#' @name posexplode
|
||||
#' @family collection functions
|
||||
#' @aliases posexplode,Column-method
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases posexplode posexplode,Column-method
|
||||
#' @export
|
||||
#' @examples \dontrun{posexplode(df$c)}
|
||||
#' @note posexplode since 2.1.0
|
||||
setMethod("posexplode",
|
||||
signature(x = "Column"),
|
||||
|
@ -3325,27 +3317,24 @@ setMethod("repeat_string",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' explode_outer
|
||||
#'
|
||||
#' Creates a new row for each element in the given array or map column.
|
||||
#' @details
|
||||
#' \code{explode}: Creates a new row for each element in the given array or map column.
|
||||
#' Unlike \code{explode}, if the array/map is \code{null} or empty
|
||||
#' then \code{null} is produced.
|
||||
#'
|
||||
#' @param x Column to compute on
|
||||
#'
|
||||
#' @rdname explode_outer
|
||||
#' @name explode_outer
|
||||
#' @family collection functions
|
||||
#' @aliases explode_outer,Column-method
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases explode_outer explode_outer,Column-method
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
#' \dontrun{
|
||||
#' df <- createDataFrame(data.frame(
|
||||
#' df2 <- createDataFrame(data.frame(
|
||||
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
|
||||
#' ))
|
||||
#'
|
||||
#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
|
||||
#' }
|
||||
#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
|
||||
#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
|
||||
#' @note explode_outer since 2.3.0
|
||||
setMethod("explode_outer",
|
||||
signature(x = "Column"),
|
||||
|
@ -3354,27 +3343,14 @@ setMethod("explode_outer",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' posexplode_outer
|
||||
#'
|
||||
#' Creates a new row for each element with position in the given array or map column.
|
||||
#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
|
||||
#' @details
|
||||
#' \code{posexplode_outer}: Creates a new row for each element with position in the given
|
||||
#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
|
||||
#' then the row (\code{null}, \code{null}) is produced.
|
||||
#'
|
||||
#' @param x Column to compute on
|
||||
#'
|
||||
#' @rdname posexplode_outer
|
||||
#' @name posexplode_outer
|
||||
#' @family collection functions
|
||||
#' @aliases posexplode_outer,Column-method
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases posexplode_outer posexplode_outer,Column-method
|
||||
#' @export
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' df <- createDataFrame(data.frame(
|
||||
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
|
||||
#' ))
|
||||
#'
|
||||
#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
|
||||
#' }
|
||||
#' @note posexplode_outer since 2.3.0
|
||||
setMethod("posexplode_outer",
|
||||
signature(x = "Column"),
|
||||
|
|
|
@ -913,8 +913,9 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
|
|||
#' @name NULL
|
||||
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
|
||||
|
||||
#' @rdname array_contains
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
|
||||
|
||||
#' @rdname column_string_functions
|
||||
|
@ -1062,12 +1063,14 @@ setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank")
|
|||
#' @name NULL
|
||||
setGeneric("encode", function(x, charset) { standardGeneric("encode") })
|
||||
|
||||
#' @rdname explode
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("explode", function(x) { standardGeneric("explode") })
|
||||
|
||||
#' @rdname explode_outer
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
|
||||
|
||||
#' @rdname column_nonaggregate_functions
|
||||
|
@ -1090,8 +1093,9 @@ setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
|
|||
#' @name NULL
|
||||
setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })
|
||||
|
||||
#' @rdname from_json
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })
|
||||
|
||||
#' @rdname column_datetime_functions
|
||||
|
@ -1275,12 +1279,14 @@ setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_ra
|
|||
#' @name NULL
|
||||
setGeneric("pmod", function(y, x) { standardGeneric("pmod") })
|
||||
|
||||
#' @rdname posexplode
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("posexplode", function(x) { standardGeneric("posexplode") })
|
||||
|
||||
#' @rdname posexplode_outer
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })
|
||||
|
||||
#' @rdname column_datetime_functions
|
||||
|
@ -1383,8 +1389,9 @@ setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUns
|
|||
#' @name NULL
|
||||
setGeneric("signum", function(x) { standardGeneric("signum") })
|
||||
|
||||
#' @rdname size
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("size", function(x) { standardGeneric("size") })
|
||||
|
||||
#' @rdname column_aggregate_functions
|
||||
|
@ -1392,8 +1399,9 @@ setGeneric("size", function(x) { standardGeneric("size") })
|
|||
#' @name NULL
|
||||
setGeneric("skewness", function(x) { standardGeneric("skewness") })
|
||||
|
||||
#' @rdname sort_array
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
|
||||
|
||||
#' @rdname column_string_functions
|
||||
|
@ -1456,8 +1464,9 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
|
|||
#' @name NULL
|
||||
setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
|
||||
|
||||
#' @rdname to_json
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })
|
||||
|
||||
#' @rdname column_datetime_functions
|
||||
|
|
Loading…
Reference in a new issue