[SPARK-32543][R] Remove arrow::as_tibble usage in SparkR
### What changes were proposed in this pull request? SparkR increased the minimal version of Arrow R version to 1.0.0 at SPARK-32452, and Arrow R 0.14 dropped `as_tibble`. We can remove the usage in SparkR. ### Why are the changes needed? To remove codes unused anymore. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GitHub Actions will test them out. Closes #29361 from HyukjinKwon/SPARK-32543. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
4a0427cbc1
commit
42219af906
|
@ -1234,12 +1234,7 @@ setMethod("collect",
|
||||||
output <- tryCatch({
|
output <- tryCatch({
|
||||||
doServerAuth(conn, authSecret)
|
doServerAuth(conn, authSecret)
|
||||||
arrowTable <- arrow::read_ipc_stream(readRaw(conn))
|
arrowTable <- arrow::read_ipc_stream(readRaw(conn))
|
||||||
# Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
|
|
||||||
if (exists("as_tibble", envir = asNamespace("arrow"))) {
|
|
||||||
as.data.frame(arrow::as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
|
|
||||||
} else {
|
|
||||||
as.data.frame(arrowTable, stringsAsFactors = stringsAsFactors)
|
as.data.frame(arrowTable, stringsAsFactors = stringsAsFactors)
|
||||||
}
|
|
||||||
}, finally = {
|
}, finally = {
|
||||||
close(conn)
|
close(conn)
|
||||||
})
|
})
|
||||||
|
|
|
@ -233,24 +233,13 @@ readMultipleObjectsWithKeys <- function(inputCon) {
|
||||||
|
|
||||||
readDeserializeInArrow <- function(inputCon) {
|
readDeserializeInArrow <- function(inputCon) {
|
||||||
if (requireNamespace("arrow", quietly = TRUE)) {
|
if (requireNamespace("arrow", quietly = TRUE)) {
|
||||||
# Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
|
|
||||||
useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
|
|
||||||
|
|
||||||
|
|
||||||
# Currently, there looks no way to read batch by batch by socket connection in R side,
|
# Currently, there looks no way to read batch by batch by socket connection in R side,
|
||||||
# See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted binary at once
|
# See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted binary at once
|
||||||
# for now.
|
# for now.
|
||||||
dataLen <- readInt(inputCon)
|
dataLen <- readInt(inputCon)
|
||||||
arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
|
arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
|
||||||
batches <- arrow::RecordBatchStreamReader$create(arrowData)$batches()
|
batches <- arrow::RecordBatchStreamReader$create(arrowData)$batches()
|
||||||
|
|
||||||
if (useAsTibble) {
|
|
||||||
as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
|
|
||||||
# Read all groupped batches. Tibble -> data.frame is cheap.
|
|
||||||
lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
|
|
||||||
} else {
|
|
||||||
lapply(batches, function(batch) as.data.frame(batch))
|
lapply(batches, function(batch) as.data.frame(batch))
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
stop("'arrow' package should be installed.")
|
stop("'arrow' package should be installed.")
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue