[SPARK-19130][SPARKR] Support setting literal value as column implicitly

## What changes were proposed in this pull request?

```
df$foo <- 1
```

instead of
```
df$foo <- lit(1)
```

## How was this patch tested?

unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16510 from felixcheung/rlitcol.
This commit is contained in:
Felix Cheung 2017-01-11 08:29:09 -08:00 committed by Shivaram Venkataraman
parent 4239a1081a
commit d749c06677
3 changed files with 39 additions and 5 deletions

View file

@ -1727,14 +1727,21 @@ setMethod("$", signature(x = "SparkDataFrame"),
getColumn(x, name)
})
#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped.
#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
#' If \code{NULL}, the specified Column is dropped.
#' @rdname select
#' @name $<-
#' @aliases $<-,SparkDataFrame-method
#' @note $<- since 1.4.0
setMethod("$<-", signature(x = "SparkDataFrame"),
function(x, name, value) {
stopifnot(class(value) == "Column" || is.null(value))
if (class(value) != "Column" && !is.null(value)) {
if (isAtomicLengthOne(value)) {
value <- lit(value)
} else {
stop("value must be a Column, literal value as atomic in length of 1, or NULL")
}
}
if (is.null(value)) {
nx <- drop(x, name)
@ -1947,10 +1954,10 @@ setMethod("selectExpr",
#'
#' @param x a SparkDataFrame.
#' @param colName a column name.
#' @param col a Column expression.
#' @param col a Column expression, or an atomic vector in the length of 1 as literal value.
#' @return A SparkDataFrame with the new column added or the existing column replaced.
#' @family SparkDataFrame functions
#' @aliases withColumn,SparkDataFrame,character,Column-method
#' @aliases withColumn,SparkDataFrame,character-method
#' @rdname withColumn
#' @name withColumn
#' @seealso \link{rename} \link{mutate}
@ -1963,11 +1970,16 @@ setMethod("selectExpr",
#' newDF <- withColumn(df, "newCol", df$col1 * 5)
#' # Replace an existing column
#' newDF2 <- withColumn(newDF, "newCol", newDF$col1)
#' newDF3 <- withColumn(newDF, "newCol", 42)
#' }
#' @note withColumn since 1.4.0
setMethod("withColumn",
signature(x = "SparkDataFrame", colName = "character", col = "Column"),
signature(x = "SparkDataFrame", colName = "character"),
function(x, colName, col) {
if (class(col) != "Column") {
if (!isAtomicLengthOne(col)) stop("Literal value must be atomic in length of 1")
col <- lit(col)
}
sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc)
dataFrame(sdf)
})

View file

@ -863,3 +863,7 @@ basenameSansExtFromUrl <- function(url) {
# then, strip extension by the last '.'
sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename)
}
isAtomicLengthOne <- function(x) {
is.atomic(x) && length(x) == 1
}

View file

@ -1001,6 +1001,17 @@ test_that("select operators", {
expect_equal(columns(df), c("name", "age", "age2"))
expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
df$age2 <- 21
expect_equal(columns(df), c("name", "age", "age2"))
expect_equal(count(where(df, df$age2 == 21)), 3)
df$age2 <- c(22)
expect_equal(columns(df), c("name", "age", "age2"))
expect_equal(count(where(df, df$age2 == 22)), 3)
expect_error(df$age3 <- c(22, NA),
"value must be a Column, literal value as atomic in length of 1, or NULL")
# Test parameter drop
expect_equal(class(df[, 1]) == "SparkDataFrame", T)
expect_equal(class(df[, 1, drop = T]) == "Column", T)
@ -1778,6 +1789,13 @@ test_that("withColumn() and withColumnRenamed()", {
expect_equal(length(columns(newDF)), 2)
expect_equal(first(filter(newDF, df$name != "Michael"))$age, 32)
newDF <- withColumn(df, "age", 18)
expect_equal(length(columns(newDF)), 2)
expect_equal(first(newDF)$age, 18)
expect_error(withColumn(df, "age", list("a")),
"Literal value must be atomic in length of 1")
newDF2 <- withColumnRenamed(df, "age", "newerAge")
expect_equal(length(columns(newDF2)), 2)
expect_equal(columns(newDF2)[1], "newerAge")