[SPARK-21602][R] Add map_keys and map_values functions to R
## What changes were proposed in this pull request? This PR adds `map_values` and `map_keys` to R API. ```r > df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) > tmp <- mutate(df, v = create_map(df$model, df$cyl)) > head(select(tmp, map_keys(tmp$v))) ``` ``` map_keys(v) 1 Mazda RX4 2 Mazda RX4 Wag 3 Datsun 710 4 Hornet 4 Drive 5 Hornet Sportabout 6 Valiant ``` ```r > head(select(tmp, map_values(tmp$v))) ``` ``` map_values(v) 1 6 2 6 3 4 4 6 5 8 6 6 ``` ## How was this patch tested? Manual tests and unit tests in `R/pkg/tests/fulltests/test_sparkSQL.R` Author: hyukjinkwon <gurwls223@gmail.com> Closes #18809 from HyukjinKwon/map-keys-values-r.
This commit is contained in:
parent
e7c59b4177
commit
97ba491836
|
@ -286,6 +286,8 @@ exportMethods("%<=>%",
|
|||
"lower",
|
||||
"lpad",
|
||||
"ltrim",
|
||||
"map_keys",
|
||||
"map_values",
|
||||
"max",
|
||||
"md5",
|
||||
"mean",
|
||||
|
|
|
@ -195,7 +195,10 @@ NULL
|
|||
#' head(tmp2)
|
||||
#' head(select(tmp, posexplode(tmp$v1)))
|
||||
#' head(select(tmp, sort_array(tmp$v1)))
|
||||
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
|
||||
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
|
||||
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
|
||||
#' head(select(tmp3, map_keys(tmp3$v3)))
|
||||
#' head(select(tmp3, map_values(tmp3$v3)))}
|
||||
NULL
|
||||
|
||||
#' Window functions for Column operations
|
||||
|
@ -3055,6 +3058,34 @@ setMethod("array_contains",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' @details
|
||||
#' \code{map_keys}: Returns an unordered array containing the keys of the map.
|
||||
#'
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases map_keys map_keys,Column-method
|
||||
#' @export
|
||||
#' @note map_keys since 2.3.0
|
||||
setMethod("map_keys",
|
||||
signature(x = "Column"),
|
||||
function(x) {
|
||||
jc <- callJStatic("org.apache.spark.sql.functions", "map_keys", x@jc)
|
||||
column(jc)
|
||||
})
|
||||
|
||||
#' @details
|
||||
#' \code{map_values}: Returns an unordered array containing the values of the map.
|
||||
#'
|
||||
#' @rdname column_collection_functions
|
||||
#' @aliases map_values map_values,Column-method
|
||||
#' @export
|
||||
#' @note map_values since 2.3.0
|
||||
setMethod("map_values",
|
||||
signature(x = "Column"),
|
||||
function(x) {
|
||||
jc <- callJStatic("org.apache.spark.sql.functions", "map_values", x@jc)
|
||||
column(jc)
|
||||
})
|
||||
|
||||
#' @details
|
||||
#' \code{explode}: Creates a new row for each element in the given array or map column.
|
||||
#'
|
||||
|
|
|
@ -1213,6 +1213,16 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
|
|||
#' @name NULL
|
||||
setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
|
||||
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
|
||||
|
||||
#' @rdname column_collection_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
setGeneric("map_values", function(x) { standardGeneric("map_values") })
|
||||
|
||||
#' @rdname column_misc_functions
|
||||
#' @export
|
||||
#' @name NULL
|
||||
|
|
|
@ -1436,6 +1436,14 @@ test_that("column functions", {
|
|||
result <- collect(select(df, sort_array(df[[1]])))[[1]]
|
||||
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
|
||||
|
||||
# Test map_keys() and map_values()
|
||||
df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
|
||||
result <- collect(select(df, map_keys(df$map)))[[1]]
|
||||
expect_equal(result, list(list("x", "y")))
|
||||
|
||||
result <- collect(select(df, map_values(df$map)))[[1]]
|
||||
expect_equal(result, list(list(1, 2)))
|
||||
|
||||
# Test that stats::lag is working
|
||||
expect_equal(length(lag(ldeaths, 12)), 72)
|
||||
|
||||
|
|
Loading…
Reference in a new issue