[SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter

### Summary

- Add `lit` function
- Add `concat`, `greatest`, `least` functions

I think we need to improve `collect` function in order to implement `struct` function. Since `collect` doesn't work with arguments which includes a nested `list` variable. It seems that a list against `struct` still has `jobj` classes. So it would be better to solve this problem on another issue.

### JIRA
[[SPARK-9871] Add expression functions into SparkR which have a variable parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871)

Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com>

Closes #8194 from yu-iskw/SPARK-9856.
This commit is contained in:
Yu ISHIKAWA 2015-08-16 23:33:20 -07:00 committed by Shivaram Venkataraman
parent ae2370e72f
commit 26e760581f
4 changed files with 75 additions and 0 deletions

View file

@ -98,6 +98,7 @@ exportMethods("abs",
"contains",
"cos",
"cosh",
"concat",
"countDistinct",
"desc",
"endsWith",
@ -106,10 +107,13 @@ exportMethods("abs",
"floor",
"getField",
"getItem",
"greatest",
"hypot",
"isNotNull",
"isNull",
"lit",
"last",
"least",
"like",
"log",
"log10",

View file

@ -67,6 +67,14 @@ createFunctions <- function() {
createFunctions()
#' @rdname functions
#' @return Creates a Column class of literal value.
setMethod("lit", signature("ANY"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x))
column(jc)
})
#' Approx Count Distinct
#'
#' @rdname functions
@ -93,6 +101,40 @@ setMethod("countDistinct",
column(jc)
})
#' @rdname functions
#' @return Concatenates multiple input string columns together into a single string column.
setMethod("concat",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function(x) { x@jc })
jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols))
column(jc)
})
#' @rdname functions
#' @return Returns the greatest value of the list of column names, skipping null values.
#' This function takes at least 2 parameters. It will return null if all parameters are null.
setMethod("greatest",
signature(x = "Column"),
function(x, ...) {
stopifnot(length(list(...)) > 0)
jcols <- lapply(list(x, ...), function(x) { x@jc })
jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols))
column(jc)
})
#' @rdname functions
#' @return Returns the least value of the list of column names, skipping null values.
#' This function takes at least 2 parameters. It will return null iff all parameters are null.
setMethod("least",
signature(x = "Column"),
function(x, ...) {
stopifnot(length(list(...)) > 0)
jcols <- lapply(list(x, ...), function(x) { x@jc })
jc <- callJStatic("org.apache.spark.sql.functions", "least", listToSeq(jcols))
column(jc)
})
#' @rdname functions
#' @aliases ceil
setMethod("ceiling",

View file

@ -682,6 +682,10 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
#' @export
setGeneric("ceil", function(x) { standardGeneric("ceil") })
#' @rdname functions
#' @export
setGeneric("concat", function(x, ...) { standardGeneric("concat") })
#' @rdname functions
#' @export
setGeneric("crc32", function(x) { standardGeneric("crc32") })
@ -702,6 +706,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
#' @export
setGeneric("explode", function(x) { standardGeneric("explode") })
#' @rdname functions
#' @export
setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
#' @rdname functions
#' @export
setGeneric("hex", function(x) { standardGeneric("hex") })
@ -722,10 +730,18 @@ setGeneric("isNaN", function(x) { standardGeneric("isNaN") })
#' @export
setGeneric("last_day", function(x) { standardGeneric("last_day") })
#' @rdname functions
#' @export
setGeneric("least", function(x, ...) { standardGeneric("least") })
#' @rdname functions
#' @export
setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") })
#' @rdname functions
#' @export
setGeneric("lit", function(x) { standardGeneric("lit") })
#' @rdname functions
#' @export
setGeneric("lower", function(x) { standardGeneric("lower") })

View file

@ -580,6 +580,11 @@ test_that("select with column", {
df2 <- select(df, df$age)
expect_equal(columns(df2), c("age"))
expect_equal(count(df2), 3)
df3 <- select(df, lit("x"))
expect_equal(columns(df3), c("x"))
expect_equal(count(df3), 3)
expect_equal(collect(select(df3, "x"))[[1, 1]], "x")
})
test_that("selectExpr() on a DataFrame", {
@ -712,6 +717,14 @@ test_that("string operators", {
expect_equal(count(where(df, startsWith(df$name, "A"))), 1)
expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30")
})
test_that("greatest() and least() on a DataFrame", {
l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
df <- createDataFrame(sqlContext, l)
expect_equal(collect(select(df, greatest(df$a, df$b)))[, 1], c(2, 4))
expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
})
test_that("group by", {