[SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate

### What changes were proposed in this pull request?

Change the strategy for how the varargs are handled in the default `mutate` method

### Why are the changes needed?

Bugfix -- `deparse` + `sapply` not working as intended due to `width.cutoff`

### Does this PR introduce any user-facing change?

Yes, bugfix. Shouldn't change any working code.

### How was this patch tested?

None! yet.

Closes #28386 from MichaelChirico/r-mutate-deparse.

Lead-authored-by: Michael Chirico <michael.chirico@grabtaxi.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Michael Chirico 2020-12-17 17:20:45 +09:00 committed by HyukjinKwon
parent ed09673fb9
commit 12b69cc27c
2 changed files with 25 additions and 8 deletions

View file

@ -2277,16 +2277,17 @@ setMethod("mutate",
# For named arguments, use the names for arguments as the column names
# For unnamed arguments, use the argument symbols as the column names
args <- sapply(substitute(list(...))[-1], deparse)
ns <- names(cols)
if (!is.null(ns)) {
lapply(seq_along(args), function(i) {
if (ns[[i]] != "") {
args[[i]] <<- ns[[i]]
}
if (is.null(ns)) ns <- rep("", length(cols))
named_idx <- nzchar(ns)
if (!all(named_idx)) {
# SPARK-31517: deparse uses width.cutoff on wide input and the
# output is length>1, so need to collapse it to scalar
colsub <- substitute(list(...))[-1L]
ns[!named_idx] <- sapply(which(!named_idx), function(ii) {
paste(gsub("^\\s*|\\s*$", "", deparse(colsub[[ii]])), collapse = " ")
})
}
ns <- args
# The last column of the same name in the specific columns takes effect
deDupCols <- list()
@ -3444,7 +3445,8 @@ setMethod("as.data.frame",
#' @note attach since 1.6.0
setMethod("attach",
signature(what = "SparkDataFrame"),
function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE),
function(what, pos = 2L,
name = paste(deparse(substitute(what), backtick = FALSE), collapse = " "),
warn.conflicts = TRUE) {
args <- as.list(environment()) # capture all parameters - this must be the first line
newEnv <- assignNewEnv(args$what)

View file

@ -2884,6 +2884,15 @@ test_that("mutate(), transform(), rename() and names()", {
expect_equal(nrow(result), 153)
expect_equal(ncol(result), 2)
detach(airquality)
# ensure long inferred names are handled without error (SPARK-26199)
# test implicitly assumes eval(formals(deparse)$width.cutoff) = 60
# (which has always been true as of 2020-11-15)
newDF <- mutate(
df,
df$age + 12345678901234567890 + 12345678901234567890 + 12345678901234
)
expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE)
})
test_that("read/write ORC files", {
@ -3273,6 +3282,12 @@ test_that("attach() on a DataFrame", {
stat3 <- summary(df[, "age", drop = F])
expect_equal(collect(stat3)[8, "age"], "30")
expect_error(age)
# attach method uses deparse(); ensure no errors from a very long input
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop <- df # nolint
attach(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop)
expect_true(any(grepl("abcdefghijklmnopqrstuvwxyz", search())))
detach("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop")
})
test_that("with() on a DataFrame", {