From 42219af9062d4ea524a13c2a6ea40d0d99f96c66 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 5 Aug 2020 10:35:03 -0700 Subject: [PATCH] [SPARK-32543][R] Remove arrow::as_tibble usage in SparkR ### What changes were proposed in this pull request? SparkR increased the minimal version of Arrow R version to 1.0.0 at SPARK-32452, and Arrow R 0.14 dropped `as_tibble`. We can remove the usage in SparkR. ### Why are the changes needed? To remove codes unused anymore. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GitHub Actions will test them out. Closes #29361 from HyukjinKwon/SPARK-32543. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- R/pkg/R/DataFrame.R | 7 +------ R/pkg/R/deserialize.R | 13 +------------ 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 4d38f979c4..4eca5bd23c 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1234,12 +1234,7 @@ setMethod("collect", output <- tryCatch({ doServerAuth(conn, authSecret) arrowTable <- arrow::read_ipc_stream(readRaw(conn)) - # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190. - if (exists("as_tibble", envir = asNamespace("arrow"))) { - as.data.frame(arrow::as_tibble(arrowTable), stringsAsFactors = stringsAsFactors) - } else { - as.data.frame(arrowTable, stringsAsFactors = stringsAsFactors) - } + as.data.frame(arrowTable, stringsAsFactors = stringsAsFactors) }, finally = { close(conn) }) diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 3e7c456bd5..5d22340fb6 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -233,24 +233,13 @@ readMultipleObjectsWithKeys <- function(inputCon) { readDeserializeInArrow <- function(inputCon) { if (requireNamespace("arrow", quietly = TRUE)) { - # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190. - useAsTibble <- exists("as_tibble", envir = asNamespace("arrow")) - - # Currently, there looks no way to read batch by batch by socket connection in R side, # See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted binary at once # for now. dataLen <- readInt(inputCon) arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big") batches <- arrow::RecordBatchStreamReader$create(arrowData)$batches() - - if (useAsTibble) { - as_tibble <- get("as_tibble", envir = asNamespace("arrow")) - # Read all groupped batches. Tibble -> data.frame is cheap. - lapply(batches, function(batch) as.data.frame(as_tibble(batch))) - } else { - lapply(batches, function(batch) as.data.frame(batch)) - } + lapply(batches, function(batch) as.data.frame(batch)) } else { stop("'arrow' package should be installed.") }