[SPARK-16096][SPARKR] add union and deprecate unionAll
## What changes were proposed in this pull request? add union and deprecate unionAll, separate roxygen2 doc for rbind (since their usage and parameter lists are quite different) `explode` is also deprecated - but seems like replacement is a combination of calls; not sure if we should deprecate it in SparkR, yet. ## How was this patch tested? unit tests, manual checks for r doc Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #13805 from felixcheung/runion.
This commit is contained in:
parent
918c91954f
commit
dbfdae4e41
|
@ -107,6 +107,7 @@ exportMethods("arrange",
|
|||
"summary",
|
||||
"take",
|
||||
"transform",
|
||||
"union",
|
||||
"unionAll",
|
||||
"unique",
|
||||
"unpersist",
|
||||
|
|
|
@ -2251,7 +2251,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
|
|||
cols
|
||||
}
|
||||
|
||||
#' rbind
|
||||
#' Return a new SparkDataFrame containing the union of rows
|
||||
#'
|
||||
#' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
|
||||
#' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
|
||||
|
@ -2261,39 +2261,64 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
|
|||
#' @param y A SparkDataFrame
|
||||
#' @return A SparkDataFrame containing the result of the union.
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname rbind
|
||||
#' @name unionAll
|
||||
#' @rdname union
|
||||
#' @name union
|
||||
#' @seealso \link{rbind}
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sparkR.session()
|
||||
#' df1 <- read.json(path)
|
||||
#' df2 <- read.json(path2)
|
||||
#' unioned <- unionAll(df, df2)
|
||||
#' unioned <- union(df, df2)
|
||||
#' unions <- rbind(df, df2, df3, df4)
|
||||
#' }
|
||||
#' @note union since 2.0.0
|
||||
setMethod("union",
|
||||
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
|
||||
function(x, y) {
|
||||
unioned <- callJMethod(x@sdf, "union", y@sdf)
|
||||
dataFrame(unioned)
|
||||
})
|
||||
|
||||
#' unionAll is deprecated - use union instead
|
||||
#' @rdname union
|
||||
#' @name unionAll
|
||||
#' @export
|
||||
#' @note unionAll since 1.4.0
|
||||
setMethod("unionAll",
|
||||
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
|
||||
function(x, y) {
|
||||
unioned <- callJMethod(x@sdf, "unionAll", y@sdf)
|
||||
dataFrame(unioned)
|
||||
.Deprecated("union")
|
||||
union(x, y)
|
||||
})
|
||||
|
||||
#' Union two or more SparkDataFrames
|
||||
#'
|
||||
#' Returns a new SparkDataFrame containing rows of all parameters.
|
||||
#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
|
||||
#' Note that this does not remove duplicate rows across the two SparkDataFrames.
|
||||
#'
|
||||
#' @param x A SparkDataFrame
|
||||
#' @param ... Additional SparkDataFrame
|
||||
#' @return A SparkDataFrame containing the result of the union.
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname rbind
|
||||
#' @name rbind
|
||||
#' @seealso \link{union}
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sparkR.session()
|
||||
#' unions <- rbind(df, df2, df3, df4)
|
||||
#' }
|
||||
#' @note rbind since 1.5.0
|
||||
setMethod("rbind",
|
||||
signature(... = "SparkDataFrame"),
|
||||
function(x, ..., deparse.level = 1) {
|
||||
if (nargs() == 3) {
|
||||
unionAll(x, ...)
|
||||
union(x, ...)
|
||||
} else {
|
||||
unionAll(x, Recall(..., deparse.level = 1))
|
||||
union(x, Recall(..., deparse.level = 1))
|
||||
}
|
||||
})
|
||||
|
||||
|
|
|
@ -662,7 +662,11 @@ setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
|
|||
|
||||
setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
|
||||
|
||||
#' @rdname rbind
|
||||
#' @rdname union
|
||||
#' @export
|
||||
setGeneric("union", function(x, y) { standardGeneric("union") })
|
||||
|
||||
#' @rdname union
|
||||
#' @export
|
||||
setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ test_that("Check masked functions", {
|
|||
namesOfMaskedCompletely <- c("cov", "filter", "sample")
|
||||
namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var",
|
||||
"colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
|
||||
"summary", "transform", "drop", "window", "as.data.frame")
|
||||
"summary", "transform", "drop", "window", "as.data.frame", "union")
|
||||
if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
|
||||
namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
|
||||
}
|
||||
|
|
|
@ -1590,7 +1590,7 @@ test_that("isLocal()", {
|
|||
expect_false(isLocal(df))
|
||||
})
|
||||
|
||||
test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", {
|
||||
test_that("union(), rbind(), except(), and intersect() on a DataFrame", {
|
||||
df <- read.json(jsonPath)
|
||||
|
||||
lines <- c("{\"name\":\"Bob\", \"age\":24}",
|
||||
|
@ -1600,10 +1600,11 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", {
|
|||
writeLines(lines, jsonPath2)
|
||||
df2 <- read.df(jsonPath2, "json")
|
||||
|
||||
unioned <- arrange(unionAll(df, df2), df$age)
|
||||
unioned <- arrange(union(df, df2), df$age)
|
||||
expect_is(unioned, "SparkDataFrame")
|
||||
expect_equal(count(unioned), 6)
|
||||
expect_equal(first(unioned)$name, "Michael")
|
||||
expect_equal(count(arrange(suppressWarnings(unionAll(df, df2)), df$age)), 6)
|
||||
|
||||
unioned2 <- arrange(rbind(unioned, df, df2), df$age)
|
||||
expect_is(unioned2, "SparkDataFrame")
|
||||
|
@ -1620,6 +1621,9 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", {
|
|||
expect_equal(count(intersected), 1)
|
||||
expect_equal(first(intersected)$name, "Andy")
|
||||
|
||||
# Test base::union is working
|
||||
expect_equal(union(c(1:3), c(3:5)), c(1:5))
|
||||
|
||||
# Test base::rbind is working
|
||||
expect_equal(length(rbind(1:4, c = 2, a = 10, 10, deparse.level = 0)), 16)
|
||||
|
||||
|
|
Loading…
Reference in a new issue