[SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter
### Summary - Add `lit` function - Add `concat`, `greatest`, `least` functions I think we need to improve `collect` function in order to implement `struct` function. Since `collect` doesn't work with arguments which includes a nested `list` variable. It seems that a list against `struct` still has `jobj` classes. So it would be better to solve this problem on another issue. ### JIRA [[SPARK-9871] Add expression functions into SparkR which have a variable parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871) Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #8194 from yu-iskw/SPARK-9856.
This commit is contained in:
parent
ae2370e72f
commit
26e760581f
|
@ -98,6 +98,7 @@ exportMethods("abs",
|
|||
"contains",
|
||||
"cos",
|
||||
"cosh",
|
||||
"concat",
|
||||
"countDistinct",
|
||||
"desc",
|
||||
"endsWith",
|
||||
|
@ -106,10 +107,13 @@ exportMethods("abs",
|
|||
"floor",
|
||||
"getField",
|
||||
"getItem",
|
||||
"greatest",
|
||||
"hypot",
|
||||
"isNotNull",
|
||||
"isNull",
|
||||
"lit",
|
||||
"last",
|
||||
"least",
|
||||
"like",
|
||||
"log",
|
||||
"log10",
|
||||
|
|
|
@ -67,6 +67,14 @@ createFunctions <- function() {
|
|||
|
||||
createFunctions()
|
||||
|
||||
#' @rdname functions
|
||||
#' @return Creates a Column class of literal value.
|
||||
setMethod("lit", signature("ANY"),
|
||||
function(x) {
|
||||
jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x))
|
||||
column(jc)
|
||||
})
|
||||
|
||||
#' Approx Count Distinct
|
||||
#'
|
||||
#' @rdname functions
|
||||
|
@ -93,6 +101,40 @@ setMethod("countDistinct",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' @rdname functions
|
||||
#' @return Concatenates multiple input string columns together into a single string column.
|
||||
setMethod("concat",
|
||||
signature(x = "Column"),
|
||||
function(x, ...) {
|
||||
jcols <- lapply(list(x, ...), function(x) { x@jc })
|
||||
jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols))
|
||||
column(jc)
|
||||
})
|
||||
|
||||
#' @rdname functions
|
||||
#' @return Returns the greatest value of the list of column names, skipping null values.
|
||||
#' This function takes at least 2 parameters. It will return null if all parameters are null.
|
||||
setMethod("greatest",
|
||||
signature(x = "Column"),
|
||||
function(x, ...) {
|
||||
stopifnot(length(list(...)) > 0)
|
||||
jcols <- lapply(list(x, ...), function(x) { x@jc })
|
||||
jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols))
|
||||
column(jc)
|
||||
})
|
||||
|
||||
#' @rdname functions
|
||||
#' @return Returns the least value of the list of column names, skipping null values.
|
||||
#' This function takes at least 2 parameters. It will return null iff all parameters are null.
|
||||
setMethod("least",
|
||||
signature(x = "Column"),
|
||||
function(x, ...) {
|
||||
stopifnot(length(list(...)) > 0)
|
||||
jcols <- lapply(list(x, ...), function(x) { x@jc })
|
||||
jc <- callJStatic("org.apache.spark.sql.functions", "least", listToSeq(jcols))
|
||||
column(jc)
|
||||
})
|
||||
|
||||
#' @rdname functions
|
||||
#' @aliases ceil
|
||||
setMethod("ceiling",
|
||||
|
|
|
@ -682,6 +682,10 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
|
|||
#' @export
|
||||
setGeneric("ceil", function(x) { standardGeneric("ceil") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("concat", function(x, ...) { standardGeneric("concat") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("crc32", function(x) { standardGeneric("crc32") })
|
||||
|
@ -702,6 +706,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
|
|||
#' @export
|
||||
setGeneric("explode", function(x) { standardGeneric("explode") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("hex", function(x) { standardGeneric("hex") })
|
||||
|
@ -722,10 +730,18 @@ setGeneric("isNaN", function(x) { standardGeneric("isNaN") })
|
|||
#' @export
|
||||
setGeneric("last_day", function(x) { standardGeneric("last_day") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("least", function(x, ...) { standardGeneric("least") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("lit", function(x) { standardGeneric("lit") })
|
||||
|
||||
#' @rdname functions
|
||||
#' @export
|
||||
setGeneric("lower", function(x) { standardGeneric("lower") })
|
||||
|
|
|
@ -580,6 +580,11 @@ test_that("select with column", {
|
|||
df2 <- select(df, df$age)
|
||||
expect_equal(columns(df2), c("age"))
|
||||
expect_equal(count(df2), 3)
|
||||
|
||||
df3 <- select(df, lit("x"))
|
||||
expect_equal(columns(df3), c("x"))
|
||||
expect_equal(count(df3), 3)
|
||||
expect_equal(collect(select(df3, "x"))[[1, 1]], "x")
|
||||
})
|
||||
|
||||
test_that("selectExpr() on a DataFrame", {
|
||||
|
@ -712,6 +717,14 @@ test_that("string operators", {
|
|||
expect_equal(count(where(df, startsWith(df$name, "A"))), 1)
|
||||
expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
|
||||
expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
|
||||
expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30")
|
||||
})
|
||||
|
||||
test_that("greatest() and least() on a DataFrame", {
|
||||
l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
|
||||
df <- createDataFrame(sqlContext, l)
|
||||
expect_equal(collect(select(df, greatest(df$a, df$b)))[, 1], c(2, 4))
|
||||
expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
|
||||
})
|
||||
|
||||
test_that("group by", {
|
||||
|
|
Loading…
Reference in a new issue