[SPARK-17577][FOLLOW-UP][SPARKR] SparkR spark.addFile supports adding directory recursively
## What changes were proposed in this pull request? #15140 exposed ```JavaSparkContext.addFile(path: String, recursive: Boolean)``` to Python/R, then we can update SparkR ```spark.addFile``` to support adding directory recursively. ## How was this patch tested? Added unit test. Author: Yanbo Liang <ybliang8@gmail.com> Closes #15216 from yanboliang/spark-17577-2.
This commit is contained in:
parent
00be16df64
commit
93c743f1ac
|
@ -231,17 +231,22 @@ setCheckpointDir <- function(sc, dirName) {
|
|||
#' filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
|
||||
#' use spark.getSparkFiles(fileName) to find its download location.
|
||||
#'
|
||||
#' A directory can be given if the recursive option is set to true.
|
||||
#' Currently directories are only supported for Hadoop-supported filesystems.
|
||||
#' Refer Hadoop-supported filesystems at \url{https://wiki.apache.org/hadoop/HCFS}.
|
||||
#'
|
||||
#' @rdname spark.addFile
|
||||
#' @param path The path of the file to be added
|
||||
#' @param recursive Whether to add files recursively from the path. Default is FALSE.
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' spark.addFile("~/myfile")
|
||||
#'}
|
||||
#' @note spark.addFile since 2.1.0
|
||||
spark.addFile <- function(path) {
|
||||
spark.addFile <- function(path, recursive = FALSE) {
|
||||
sc <- getSparkContext()
|
||||
invisible(callJMethod(sc, "addFile", suppressWarnings(normalizePath(path))))
|
||||
invisible(callJMethod(sc, "addFile", suppressWarnings(normalizePath(path)), recursive))
|
||||
}
|
||||
|
||||
#' Get the root directory that contains files added through spark.addFile.
|
||||
|
|
|
@ -169,6 +169,7 @@ test_that("spark.lapply should perform simple transforms", {
|
|||
|
||||
test_that("add and get file to be downloaded with Spark job on every node", {
|
||||
sparkR.sparkContext()
|
||||
# Test add file.
|
||||
path <- tempfile(pattern = "hello", fileext = ".txt")
|
||||
filename <- basename(path)
|
||||
words <- "Hello World!"
|
||||
|
@ -177,5 +178,26 @@ test_that("add and get file to be downloaded with Spark job on every node", {
|
|||
download_path <- spark.getSparkFiles(filename)
|
||||
expect_equal(readLines(download_path), words)
|
||||
unlink(path)
|
||||
|
||||
# Test add directory recursively.
|
||||
path <- paste0(tempdir(), "/", "recursive_dir")
|
||||
dir.create(path)
|
||||
dir_name <- basename(path)
|
||||
path1 <- paste0(path, "/", "hello.txt")
|
||||
file.create(path1)
|
||||
sub_path <- paste0(path, "/", "sub_hello")
|
||||
dir.create(sub_path)
|
||||
path2 <- paste0(sub_path, "/", "sub_hello.txt")
|
||||
file.create(path2)
|
||||
words <- "Hello World!"
|
||||
sub_words <- "Sub Hello World!"
|
||||
writeLines(words, path1)
|
||||
writeLines(sub_words, path2)
|
||||
spark.addFile(path, recursive = TRUE)
|
||||
download_path1 <- spark.getSparkFiles(paste0(dir_name, "/", "hello.txt"))
|
||||
expect_equal(readLines(download_path1), words)
|
||||
download_path2 <- spark.getSparkFiles(paste0(dir_name, "/", "sub_hello/sub_hello.txt"))
|
||||
expect_equal(readLines(download_path2), sub_words)
|
||||
unlink(path, recursive = TRUE)
|
||||
sparkR.session.stop()
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue