diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 56eceb8343..fb879e4885 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u # S3 methods exported export("sparkR.session") +export("sparkR.init") export("sparkR.session.stop") export("sparkR.stop") export("sparkR.conf") @@ -41,6 +42,9 @@ export("sparkR.callJStatic") export("install.spark") +export("sparkRSQL.init", + "sparkRHive.init") + # MLlib integration exportMethods("glm", "spark.glm", @@ -148,6 +152,7 @@ exportMethods("arrange", "printSchema", "randomSplit", "rbind", + "registerTempTable", "rename", "repartition", "repartitionByRange", @@ -431,8 +436,10 @@ export("as.DataFrame", "cacheTable", "clearCache", "createDataFrame", + "createExternalTable", "createTable", "currentDatabase", + "dropTempTable", "dropTempView", "listColumns", "listDatabases", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 593d3ca162..14d2076e88 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView", invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) +#' (Deprecated) Register Temporary Table +#' +#' Registers a SparkDataFrame as a Temporary Table in the SparkSession +#' @param x A SparkDataFrame +#' @param tableName A character vector containing the name of the table +#' +#' @seealso \link{createOrReplaceTempView} +#' @rdname registerTempTable-deprecated +#' @name registerTempTable +#' @aliases registerTempTable,SparkDataFrame,character-method +#' @examples +#'\dontrun{ +#' sparkR.session() +#' path <- "path/to/file.json" +#' df <- read.json(path) +#' registerTempTable(df, "json_df") +#' new_df <- sql("SELECT * FROM json_df") +#'} +#' @note registerTempTable since 1.4.0 +setMethod("registerTempTable", + signature(x = "SparkDataFrame", tableName = "character"), + function(x, tableName) { + .Deprecated("createOrReplaceTempView") + invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName)) + }) + #' insertInto #' #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession. diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R index 7641f8a7a0..275737f804 100644 --- a/R/pkg/R/catalog.R +++ b/R/pkg/R/catalog.R @@ -17,6 +17,35 @@ # catalog.R: SparkSession catalog functions +#' (Deprecated) Create an external table +#' +#' Creates an external table based on the dataset in a data source, +#' Returns a SparkDataFrame associated with the external table. +#' +#' The data source is specified by the \code{source} and a set of options(...). +#' If \code{source} is not specified, the default data source configured by +#' "spark.sql.sources.default" will be used. +#' +#' @param tableName a name of the table. +#' @param path the path of files to load. +#' @param source the name of external data source. +#' @param schema the schema of the data required for some data sources. +#' @param ... additional argument(s) passed to the method. +#' @return A SparkDataFrame. +#' @rdname createExternalTable-deprecated +#' @seealso \link{createTable} +#' @examples +#'\dontrun{ +#' sparkR.session() +#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema) +#' } +#' @name createExternalTable +#' @note createExternalTable since 1.4.0 +createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) { + .Deprecated("createTable", old = "createExternalTable") + createTable(tableName, path, source, schema, ...) +} + #' Creates a table based on the dataset in a data source #' #' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with @@ -130,6 +159,31 @@ clearCache <- function() { invisible(callJMethod(catalog, "clearCache")) } +#' (Deprecated) Drop Temporary Table +#' +#' Drops the temporary table with the given table name in the catalog. +#' If the table has been cached/persisted before, it's also unpersisted. +#' +#' @param tableName The name of the SparkSQL table to be dropped. +#' @seealso \link{dropTempView} +#' @rdname dropTempTable-deprecated +#' @examples +#' \dontrun{ +#' sparkR.session() +#' df <- read.df(path, "parquet") +#' createOrReplaceTempView(df, "table") +#' dropTempTable("table") +#' } +#' @name dropTempTable +#' @note dropTempTable since 1.4.0 +dropTempTable <- function(tableName) { + .Deprecated("dropTempView", old = "dropTempTable") + if (class(tableName) != "character") { + stop("tableName must be a string.") + } + dropTempView(tableName) +} + #' Drops the temporary view with the given view name in the catalog. #' #' Drops the temporary view with the given view name in the catalog. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6f6ef6f363..d924b2af4c 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) #' @rdname printSchema setGeneric("printSchema", function(x) { standardGeneric("printSchema") }) +#' @rdname registerTempTable-deprecated +setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") }) + #' @rdname rename setGeneric("rename", function(x, ...) { standardGeneric("rename") }) diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index cdb5909378..cc8c92b8ab 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -88,6 +88,49 @@ sparkR.stop <- function() { sparkR.session.stop() } +#' (Deprecated) Initialize a new Spark Context +#' +#' This function initializes a new SparkContext. +#' +#' @param master The Spark master URL +#' @param appName Application name to register with cluster manager +#' @param sparkHome Spark Home directory +#' @param sparkEnvir Named list of environment variables to set on worker nodes +#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors +#' @param sparkJars Character vector of jar files to pass to the worker nodes +#' @param sparkPackages Character vector of package coordinates +#' @seealso \link{sparkR.session} +#' @rdname sparkR.init-deprecated +#' @examples +#'\dontrun{ +#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark") +#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark", +#' list(spark.executor.memory="1g")) +#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark", +#' list(spark.executor.memory="4g"), +#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"), +#' c("one.jar", "two.jar", "three.jar"), +#' c("com.databricks:spark-avro_2.11:2.0.1")) +#'} +#' @note sparkR.init since 1.4.0 +sparkR.init <- function( + master = "", + appName = "SparkR", + sparkHome = Sys.getenv("SPARK_HOME"), + sparkEnvir = list(), + sparkExecutorEnv = list(), + sparkJars = "", + sparkPackages = "") { + .Deprecated("sparkR.session") + sparkR.sparkContext(master, + appName, + sparkHome, + convertNamedListToEnv(sparkEnvir), + convertNamedListToEnv(sparkExecutorEnv), + sparkJars, + sparkPackages) +} + # Internal function to handle creating the SparkContext. sparkR.sparkContext <- function( master = "", @@ -229,6 +272,61 @@ sparkR.sparkContext <- function( sc } +#' (Deprecated) Initialize a new SQLContext +#' +#' This function creates a SparkContext from an existing JavaSparkContext and +#' then uses it to initialize a new SQLContext +#' +#' Starting SparkR 2.0, a SparkSession is initialized and returned instead. +#' This API is deprecated and kept for backward compatibility only. +#' +#' @param jsc The existing JavaSparkContext created with SparkR.init() +#' @seealso \link{sparkR.session} +#' @rdname sparkRSQL.init-deprecated +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#'} +#' @note sparkRSQL.init since 1.4.0 +sparkRSQL.init <- function(jsc = NULL) { + .Deprecated("sparkR.session") + + if (exists(".sparkRsession", envir = .sparkREnv)) { + return(get(".sparkRsession", envir = .sparkREnv)) + } + + # Default to without Hive support for backward compatibility. + sparkR.session(enableHiveSupport = FALSE) +} + +#' (Deprecated) Initialize a new HiveContext +#' +#' This function creates a HiveContext from an existing JavaSparkContext +#' +#' Starting SparkR 2.0, a SparkSession is initialized and returned instead. +#' This API is deprecated and kept for backward compatibility only. +#' +#' @param jsc The existing JavaSparkContext created with SparkR.init() +#' @seealso \link{sparkR.session} +#' @rdname sparkRHive.init-deprecated +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRHive.init(sc) +#'} +#' @note sparkRHive.init since 1.4.0 +sparkRHive.init <- function(jsc = NULL) { + .Deprecated("sparkR.session") + + if (exists(".sparkRsession", envir = .sparkREnv)) { + return(get(".sparkRsession", envir = .sparkREnv)) + } + + # Default to without Hive support for backward compatibility. + sparkR.session(enableHiveSupport = TRUE) +} + #' Get the existing SparkSession or initialize a new SparkSession. #' #' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index a5527298b4..c892feb61d 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -106,6 +106,15 @@ if (is_windows()) { Sys.setenv(TZ = "GMT") } +test_that("calling sparkRSQL.init returns existing SQL context", { + sqlContext <- suppressWarnings(sparkRSQL.init(sc)) + expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext) +}) + +test_that("calling sparkRSQL.init returns existing SparkSession", { + expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession) +}) + test_that("calling sparkR.session returns existing SparkSession", { expect_equal(sparkR.session(), sparkSession) }) @@ -656,10 +665,10 @@ test_that("test tableNames and tables", { expect_true("tableName" %in% colnames(tables())) expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables()))) - createOrReplaceTempView(df, "table2") + suppressWarnings(registerTempTable(df, "table2")) tables <- listTables() expect_equal(count(tables), count + 2) - dropTempView("table1") + suppressWarnings(dropTempTable("table1")) expect_true(dropTempView("table2")) tables <- listTables() diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md index 6fbc4c03ae..32836cdac5 100644 --- a/docs/sparkr-migration-guide.md +++ b/docs/sparkr-migration-guide.md @@ -28,8 +28,7 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide. ## Upgrading from SparkR 2.4 to 3.0 - - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead. - - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead. + - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `jsonRDD` have been removed. Use `read.parquet`, `write.parquet`, `read.json` instead. ## Upgrading from SparkR 2.3 to 2.4