[SPARK-32543][R] Remove arrow::as_tibble usage in SparkR
### What changes were proposed in this pull request? SparkR increased the minimal version of Arrow R version to 1.0.0 at SPARK-32452, and Arrow R 0.14 dropped `as_tibble`. We can remove the usage in SparkR. ### Why are the changes needed? To remove codes unused anymore. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GitHub Actions will test them out. Closes #29361 from HyukjinKwon/SPARK-32543. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
4a0427cbc1
commit
42219af906
|
@ -1234,12 +1234,7 @@ setMethod("collect",
|
|||
output <- tryCatch({
|
||||
doServerAuth(conn, authSecret)
|
||||
arrowTable <- arrow::read_ipc_stream(readRaw(conn))
|
||||
# Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
|
||||
if (exists("as_tibble", envir = asNamespace("arrow"))) {
|
||||
as.data.frame(arrow::as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
|
||||
} else {
|
||||
as.data.frame(arrowTable, stringsAsFactors = stringsAsFactors)
|
||||
}
|
||||
}, finally = {
|
||||
close(conn)
|
||||
})
|
||||
|
|
|
@ -233,24 +233,13 @@ readMultipleObjectsWithKeys <- function(inputCon) {
|
|||
|
||||
readDeserializeInArrow <- function(inputCon) {
|
||||
if (requireNamespace("arrow", quietly = TRUE)) {
|
||||
# Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
|
||||
useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
|
||||
|
||||
|
||||
# Currently, there looks no way to read batch by batch by socket connection in R side,
|
||||
# See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted binary at once
|
||||
# for now.
|
||||
dataLen <- readInt(inputCon)
|
||||
arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
|
||||
batches <- arrow::RecordBatchStreamReader$create(arrowData)$batches()
|
||||
|
||||
if (useAsTibble) {
|
||||
as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
|
||||
# Read all groupped batches. Tibble -> data.frame is cheap.
|
||||
lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
|
||||
} else {
|
||||
lapply(batches, function(batch) as.data.frame(batch))
|
||||
}
|
||||
} else {
|
||||
stop("'arrow' package should be installed.")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue