[SPARK-31290][R] Add back the deprecated R APIs
### What changes were proposed in this pull request? Add back the deprecated R APIs removed by https://github.com/apache/spark/pull/22843/ and https://github.com/apache/spark/pull/22815. These APIs are - `sparkR.init` - `sparkRSQL.init` - `sparkRHive.init` - `registerTempTable` - `createExternalTable` - `dropTempTable` No need to port the function such as ```r createExternalTable <- function(x, ...) { dispatchFunc("createExternalTable(tableName, path = NULL, source = NULL, ...)", x, ...) } ``` because this was for the backward compatibility when SQLContext exists before assuming from https://github.com/apache/spark/pull/9192, but seems we don't need it anymore since SparkR replaced SQLContext with Spark Session at https://github.com/apache/spark/pull/13635. ### Why are the changes needed? Amend Spark's Semantic Versioning Policy ### Does this PR introduce any user-facing change? Yes The removed R APIs are put back. ### How was this patch tested? Add back the removed tests Closes #28058 from huaxingao/r. Authored-by: Huaxin Gao <huaxing@us.ibm.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
20fc6fa839
commit
fd0b228127
|
@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u
|
|||
|
||||
# S3 methods exported
|
||||
export("sparkR.session")
|
||||
export("sparkR.init")
|
||||
export("sparkR.session.stop")
|
||||
export("sparkR.stop")
|
||||
export("sparkR.conf")
|
||||
|
@ -41,6 +42,9 @@ export("sparkR.callJStatic")
|
|||
|
||||
export("install.spark")
|
||||
|
||||
export("sparkRSQL.init",
|
||||
"sparkRHive.init")
|
||||
|
||||
# MLlib integration
|
||||
exportMethods("glm",
|
||||
"spark.glm",
|
||||
|
@ -148,6 +152,7 @@ exportMethods("arrange",
|
|||
"printSchema",
|
||||
"randomSplit",
|
||||
"rbind",
|
||||
"registerTempTable",
|
||||
"rename",
|
||||
"repartition",
|
||||
"repartitionByRange",
|
||||
|
@ -431,8 +436,10 @@ export("as.DataFrame",
|
|||
"cacheTable",
|
||||
"clearCache",
|
||||
"createDataFrame",
|
||||
"createExternalTable",
|
||||
"createTable",
|
||||
"currentDatabase",
|
||||
"dropTempTable",
|
||||
"dropTempView",
|
||||
"listColumns",
|
||||
"listDatabases",
|
||||
|
|
|
@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView",
|
|||
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
|
||||
})
|
||||
|
||||
#' (Deprecated) Register Temporary Table
|
||||
#'
|
||||
#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
|
||||
#' @param x A SparkDataFrame
|
||||
#' @param tableName A character vector containing the name of the table
|
||||
#'
|
||||
#' @seealso \link{createOrReplaceTempView}
|
||||
#' @rdname registerTempTable-deprecated
|
||||
#' @name registerTempTable
|
||||
#' @aliases registerTempTable,SparkDataFrame,character-method
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sparkR.session()
|
||||
#' path <- "path/to/file.json"
|
||||
#' df <- read.json(path)
|
||||
#' registerTempTable(df, "json_df")
|
||||
#' new_df <- sql("SELECT * FROM json_df")
|
||||
#'}
|
||||
#' @note registerTempTable since 1.4.0
|
||||
setMethod("registerTempTable",
|
||||
signature(x = "SparkDataFrame", tableName = "character"),
|
||||
function(x, tableName) {
|
||||
.Deprecated("createOrReplaceTempView")
|
||||
invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
|
||||
})
|
||||
|
||||
#' insertInto
|
||||
#'
|
||||
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
|
||||
|
|
|
@ -17,6 +17,35 @@
|
|||
|
||||
# catalog.R: SparkSession catalog functions
|
||||
|
||||
#' (Deprecated) Create an external table
|
||||
#'
|
||||
#' Creates an external table based on the dataset in a data source,
|
||||
#' Returns a SparkDataFrame associated with the external table.
|
||||
#'
|
||||
#' The data source is specified by the \code{source} and a set of options(...).
|
||||
#' If \code{source} is not specified, the default data source configured by
|
||||
#' "spark.sql.sources.default" will be used.
|
||||
#'
|
||||
#' @param tableName a name of the table.
|
||||
#' @param path the path of files to load.
|
||||
#' @param source the name of external data source.
|
||||
#' @param schema the schema of the data required for some data sources.
|
||||
#' @param ... additional argument(s) passed to the method.
|
||||
#' @return A SparkDataFrame.
|
||||
#' @rdname createExternalTable-deprecated
|
||||
#' @seealso \link{createTable}
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sparkR.session()
|
||||
#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
|
||||
#' }
|
||||
#' @name createExternalTable
|
||||
#' @note createExternalTable since 1.4.0
|
||||
createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
|
||||
.Deprecated("createTable", old = "createExternalTable")
|
||||
createTable(tableName, path, source, schema, ...)
|
||||
}
|
||||
|
||||
#' Creates a table based on the dataset in a data source
|
||||
#'
|
||||
#' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
|
||||
|
@ -130,6 +159,31 @@ clearCache <- function() {
|
|||
invisible(callJMethod(catalog, "clearCache"))
|
||||
}
|
||||
|
||||
#' (Deprecated) Drop Temporary Table
|
||||
#'
|
||||
#' Drops the temporary table with the given table name in the catalog.
|
||||
#' If the table has been cached/persisted before, it's also unpersisted.
|
||||
#'
|
||||
#' @param tableName The name of the SparkSQL table to be dropped.
|
||||
#' @seealso \link{dropTempView}
|
||||
#' @rdname dropTempTable-deprecated
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' sparkR.session()
|
||||
#' df <- read.df(path, "parquet")
|
||||
#' createOrReplaceTempView(df, "table")
|
||||
#' dropTempTable("table")
|
||||
#' }
|
||||
#' @name dropTempTable
|
||||
#' @note dropTempTable since 1.4.0
|
||||
dropTempTable <- function(tableName) {
|
||||
.Deprecated("dropTempView", old = "dropTempTable")
|
||||
if (class(tableName) != "character") {
|
||||
stop("tableName must be a string.")
|
||||
}
|
||||
dropTempView(tableName)
|
||||
}
|
||||
|
||||
#' Drops the temporary view with the given view name in the catalog.
|
||||
#'
|
||||
#' Drops the temporary view with the given view name in the catalog.
|
||||
|
|
|
@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
|
|||
#' @rdname printSchema
|
||||
setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
|
||||
|
||||
#' @rdname registerTempTable-deprecated
|
||||
setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
|
||||
|
||||
#' @rdname rename
|
||||
setGeneric("rename", function(x, ...) { standardGeneric("rename") })
|
||||
|
||||
|
|
|
@ -88,6 +88,49 @@ sparkR.stop <- function() {
|
|||
sparkR.session.stop()
|
||||
}
|
||||
|
||||
#' (Deprecated) Initialize a new Spark Context
|
||||
#'
|
||||
#' This function initializes a new SparkContext.
|
||||
#'
|
||||
#' @param master The Spark master URL
|
||||
#' @param appName Application name to register with cluster manager
|
||||
#' @param sparkHome Spark Home directory
|
||||
#' @param sparkEnvir Named list of environment variables to set on worker nodes
|
||||
#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
|
||||
#' @param sparkJars Character vector of jar files to pass to the worker nodes
|
||||
#' @param sparkPackages Character vector of package coordinates
|
||||
#' @seealso \link{sparkR.session}
|
||||
#' @rdname sparkR.init-deprecated
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark")
|
||||
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
|
||||
#' list(spark.executor.memory="1g"))
|
||||
#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
|
||||
#' list(spark.executor.memory="4g"),
|
||||
#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
|
||||
#' c("one.jar", "two.jar", "three.jar"),
|
||||
#' c("com.databricks:spark-avro_2.11:2.0.1"))
|
||||
#'}
|
||||
#' @note sparkR.init since 1.4.0
|
||||
sparkR.init <- function(
|
||||
master = "",
|
||||
appName = "SparkR",
|
||||
sparkHome = Sys.getenv("SPARK_HOME"),
|
||||
sparkEnvir = list(),
|
||||
sparkExecutorEnv = list(),
|
||||
sparkJars = "",
|
||||
sparkPackages = "") {
|
||||
.Deprecated("sparkR.session")
|
||||
sparkR.sparkContext(master,
|
||||
appName,
|
||||
sparkHome,
|
||||
convertNamedListToEnv(sparkEnvir),
|
||||
convertNamedListToEnv(sparkExecutorEnv),
|
||||
sparkJars,
|
||||
sparkPackages)
|
||||
}
|
||||
|
||||
# Internal function to handle creating the SparkContext.
|
||||
sparkR.sparkContext <- function(
|
||||
master = "",
|
||||
|
@ -229,6 +272,61 @@ sparkR.sparkContext <- function(
|
|||
sc
|
||||
}
|
||||
|
||||
#' (Deprecated) Initialize a new SQLContext
|
||||
#'
|
||||
#' This function creates a SparkContext from an existing JavaSparkContext and
|
||||
#' then uses it to initialize a new SQLContext
|
||||
#'
|
||||
#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
|
||||
#' This API is deprecated and kept for backward compatibility only.
|
||||
#'
|
||||
#' @param jsc The existing JavaSparkContext created with SparkR.init()
|
||||
#' @seealso \link{sparkR.session}
|
||||
#' @rdname sparkRSQL.init-deprecated
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' sqlContext <- sparkRSQL.init(sc)
|
||||
#'}
|
||||
#' @note sparkRSQL.init since 1.4.0
|
||||
sparkRSQL.init <- function(jsc = NULL) {
|
||||
.Deprecated("sparkR.session")
|
||||
|
||||
if (exists(".sparkRsession", envir = .sparkREnv)) {
|
||||
return(get(".sparkRsession", envir = .sparkREnv))
|
||||
}
|
||||
|
||||
# Default to without Hive support for backward compatibility.
|
||||
sparkR.session(enableHiveSupport = FALSE)
|
||||
}
|
||||
|
||||
#' (Deprecated) Initialize a new HiveContext
|
||||
#'
|
||||
#' This function creates a HiveContext from an existing JavaSparkContext
|
||||
#'
|
||||
#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
|
||||
#' This API is deprecated and kept for backward compatibility only.
|
||||
#'
|
||||
#' @param jsc The existing JavaSparkContext created with SparkR.init()
|
||||
#' @seealso \link{sparkR.session}
|
||||
#' @rdname sparkRHive.init-deprecated
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' sqlContext <- sparkRHive.init(sc)
|
||||
#'}
|
||||
#' @note sparkRHive.init since 1.4.0
|
||||
sparkRHive.init <- function(jsc = NULL) {
|
||||
.Deprecated("sparkR.session")
|
||||
|
||||
if (exists(".sparkRsession", envir = .sparkREnv)) {
|
||||
return(get(".sparkRsession", envir = .sparkREnv))
|
||||
}
|
||||
|
||||
# Default to without Hive support for backward compatibility.
|
||||
sparkR.session(enableHiveSupport = TRUE)
|
||||
}
|
||||
|
||||
#' Get the existing SparkSession or initialize a new SparkSession.
|
||||
#'
|
||||
#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing
|
||||
|
|
|
@ -106,6 +106,15 @@ if (is_windows()) {
|
|||
Sys.setenv(TZ = "GMT")
|
||||
}
|
||||
|
||||
test_that("calling sparkRSQL.init returns existing SQL context", {
|
||||
sqlContext <- suppressWarnings(sparkRSQL.init(sc))
|
||||
expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
|
||||
})
|
||||
|
||||
test_that("calling sparkRSQL.init returns existing SparkSession", {
|
||||
expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
|
||||
})
|
||||
|
||||
test_that("calling sparkR.session returns existing SparkSession", {
|
||||
expect_equal(sparkR.session(), sparkSession)
|
||||
})
|
||||
|
@ -656,10 +665,10 @@ test_that("test tableNames and tables", {
|
|||
expect_true("tableName" %in% colnames(tables()))
|
||||
expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))
|
||||
|
||||
createOrReplaceTempView(df, "table2")
|
||||
suppressWarnings(registerTempTable(df, "table2"))
|
||||
tables <- listTables()
|
||||
expect_equal(count(tables), count + 2)
|
||||
dropTempView("table1")
|
||||
suppressWarnings(dropTempTable("table1"))
|
||||
expect_true(dropTempView("table2"))
|
||||
|
||||
tables <- listTables()
|
||||
|
|
|
@ -28,8 +28,7 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.
|
|||
|
||||
## Upgrading from SparkR 2.4 to 3.0
|
||||
|
||||
- The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
|
||||
- The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
|
||||
- The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `jsonRDD` have been removed. Use `read.parquet`, `write.parquet`, `read.json` instead.
|
||||
|
||||
## Upgrading from SparkR 2.3 to 2.4
|
||||
|
||||
|
|
Loading…
Reference in a new issue