[SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate
### What changes were proposed in this pull request? Change the strategy for how the varargs are handled in the default `mutate` method ### Why are the changes needed? Bugfix -- `deparse` + `sapply` not working as intended due to `width.cutoff` ### Does this PR introduce any user-facing change? Yes, bugfix. Shouldn't change any working code. ### How was this patch tested? None! yet. Closes #28386 from MichaelChirico/r-mutate-deparse. Lead-authored-by: Michael Chirico <michael.chirico@grabtaxi.com> Co-authored-by: Michael Chirico <michaelchirico4@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
ed09673fb9
commit
12b69cc27c
|
@ -2277,16 +2277,17 @@ setMethod("mutate",
|
|||
|
||||
# For named arguments, use the names for arguments as the column names
|
||||
# For unnamed arguments, use the argument symbols as the column names
|
||||
args <- sapply(substitute(list(...))[-1], deparse)
|
||||
ns <- names(cols)
|
||||
if (!is.null(ns)) {
|
||||
lapply(seq_along(args), function(i) {
|
||||
if (ns[[i]] != "") {
|
||||
args[[i]] <<- ns[[i]]
|
||||
}
|
||||
if (is.null(ns)) ns <- rep("", length(cols))
|
||||
named_idx <- nzchar(ns)
|
||||
if (!all(named_idx)) {
|
||||
# SPARK-31517: deparse uses width.cutoff on wide input and the
|
||||
# output is length>1, so need to collapse it to scalar
|
||||
colsub <- substitute(list(...))[-1L]
|
||||
ns[!named_idx] <- sapply(which(!named_idx), function(ii) {
|
||||
paste(gsub("^\\s*|\\s*$", "", deparse(colsub[[ii]])), collapse = " ")
|
||||
})
|
||||
}
|
||||
ns <- args
|
||||
|
||||
# The last column of the same name in the specific columns takes effect
|
||||
deDupCols <- list()
|
||||
|
@ -3444,7 +3445,8 @@ setMethod("as.data.frame",
|
|||
#' @note attach since 1.6.0
|
||||
setMethod("attach",
|
||||
signature(what = "SparkDataFrame"),
|
||||
function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE),
|
||||
function(what, pos = 2L,
|
||||
name = paste(deparse(substitute(what), backtick = FALSE), collapse = " "),
|
||||
warn.conflicts = TRUE) {
|
||||
args <- as.list(environment()) # capture all parameters - this must be the first line
|
||||
newEnv <- assignNewEnv(args$what)
|
||||
|
|
|
@ -2884,6 +2884,15 @@ test_that("mutate(), transform(), rename() and names()", {
|
|||
expect_equal(nrow(result), 153)
|
||||
expect_equal(ncol(result), 2)
|
||||
detach(airquality)
|
||||
|
||||
# ensure long inferred names are handled without error (SPARK-26199)
|
||||
# test implicitly assumes eval(formals(deparse)$width.cutoff) = 60
|
||||
# (which has always been true as of 2020-11-15)
|
||||
newDF <- mutate(
|
||||
df,
|
||||
df$age + 12345678901234567890 + 12345678901234567890 + 12345678901234
|
||||
)
|
||||
expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE)
|
||||
})
|
||||
|
||||
test_that("read/write ORC files", {
|
||||
|
@ -3273,6 +3282,12 @@ test_that("attach() on a DataFrame", {
|
|||
stat3 <- summary(df[, "age", drop = F])
|
||||
expect_equal(collect(stat3)[8, "age"], "30")
|
||||
expect_error(age)
|
||||
|
||||
# attach method uses deparse(); ensure no errors from a very long input
|
||||
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop <- df # nolint
|
||||
attach(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop)
|
||||
expect_true(any(grepl("abcdefghijklmnopqrstuvwxyz", search())))
|
||||
detach("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop")
|
||||
})
|
||||
|
||||
test_that("with() on a DataFrame", {
|
||||
|
|
Loading…
Reference in a new issue