[SPARK-21602][R] Add map_keys and map_values functions to R

## What changes were proposed in this pull request?

This PR adds `map_values` and `map_keys` to R API.

```r
> df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
> tmp <- mutate(df, v = create_map(df$model, df$cyl))
> head(select(tmp, map_keys(tmp$v)))
```
```
        map_keys(v)
1         Mazda RX4
2     Mazda RX4 Wag
3        Datsun 710
4    Hornet 4 Drive
5 Hornet Sportabout
6           Valiant
```
```r
> head(select(tmp, map_values(tmp$v)))
```
```
  map_values(v)
1             6
2             6
3             4
4             6
5             8
6             6
```

## How was this patch tested?

Manual tests and unit tests in `R/pkg/tests/fulltests/test_sparkSQL.R`

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18809 from HyukjinKwon/map-keys-values-r.
This commit is contained in:
hyukjinkwon 2017-08-03 23:00:00 +09:00
parent e7c59b4177
commit 97ba491836
4 changed files with 52 additions and 1 deletions

View file

@ -286,6 +286,8 @@ exportMethods("%<=>%",
"lower",
"lpad",
"ltrim",
"map_keys",
"map_values",
"max",
"md5",
"mean",

View file

@ -195,7 +195,10 @@ NULL
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
#' head(select(tmp3, map_keys(tmp3$v3)))
#' head(select(tmp3, map_values(tmp3$v3)))}
NULL
#' Window functions for Column operations
@ -3055,6 +3058,34 @@ setMethod("array_contains",
column(jc)
})
#' @details
#' \code{map_keys}: Returns an unordered array containing the keys of the map.
#'
#' @rdname column_collection_functions
#' @aliases map_keys map_keys,Column-method
#' @export
#' @note map_keys since 2.3.0
setMethod("map_keys",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "map_keys", x@jc)
column(jc)
})
#' @details
#' \code{map_values}: Returns an unordered array containing the values of the map.
#'
#' @rdname column_collection_functions
#' @aliases map_values map_values,Column-method
#' @export
#' @note map_values since 2.3.0
setMethod("map_values",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "map_values", x@jc)
column(jc)
})
#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#'

View file

@ -1213,6 +1213,16 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
#' @name NULL
setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
#' @rdname column_collection_functions
#' @export
#' @name NULL
setGeneric("map_values", function(x) { standardGeneric("map_values") })
#' @rdname column_misc_functions
#' @export
#' @name NULL

View file

@ -1436,6 +1436,14 @@ test_that("column functions", {
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
# Test map_keys() and map_values()
df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
result <- collect(select(df, map_keys(df$map)))[[1]]
expect_equal(result, list(list("x", "y")))
result <- collect(select(df, map_values(df$map)))[[1]]
expect_equal(result, list(list(1, 2)))
# Test that stats::lag is working
expect_equal(length(lag(ldeaths, 12)), 72)