[SPARK-25821][SQL] Remove SQLContext methods deprecated in 1.4
## What changes were proposed in this pull request? Remove SQLContext methods deprecated in 1.4 ## How was this patch tested? Existing tests. Closes #22815 from srowen/SPARK-25821. Authored-by: Sean Owen <sean.owen@databricks.com> Signed-off-by: Sean Owen <sean.owen@databricks.com>
This commit is contained in:
parent
d325ffbf3a
commit
ca545f7941
|
@ -420,13 +420,11 @@ export("as.DataFrame",
|
|||
"currentDatabase",
|
||||
"dropTempTable",
|
||||
"dropTempView",
|
||||
"jsonFile",
|
||||
"listColumns",
|
||||
"listDatabases",
|
||||
"listFunctions",
|
||||
"listTables",
|
||||
"loadDF",
|
||||
"parquetFile",
|
||||
"read.df",
|
||||
"read.jdbc",
|
||||
"read.json",
|
||||
|
|
|
@ -343,7 +343,6 @@ setMethod("toDF", signature(x = "RDD"),
|
|||
#' path <- "path/to/file.json"
|
||||
#' df <- read.json(path)
|
||||
#' df <- read.json(path, multiLine = TRUE)
|
||||
#' df <- jsonFile(path)
|
||||
#' }
|
||||
#' @name read.json
|
||||
#' @method read.json default
|
||||
|
@ -363,51 +362,6 @@ read.json <- function(x, ...) {
|
|||
dispatchFunc("read.json(path)", x, ...)
|
||||
}
|
||||
|
||||
#' @rdname read.json
|
||||
#' @name jsonFile
|
||||
#' @method jsonFile default
|
||||
#' @note jsonFile since 1.4.0
|
||||
jsonFile.default <- function(path) {
|
||||
.Deprecated("read.json")
|
||||
read.json(path)
|
||||
}
|
||||
|
||||
jsonFile <- function(x, ...) {
|
||||
dispatchFunc("jsonFile(path)", x, ...)
|
||||
}
|
||||
|
||||
#' JSON RDD
|
||||
#'
|
||||
#' Loads an RDD storing one JSON object per string as a SparkDataFrame.
|
||||
#'
|
||||
#' @param sqlContext SQLContext to use
|
||||
#' @param rdd An RDD of JSON string
|
||||
#' @param schema A StructType object to use as schema
|
||||
#' @param samplingRatio The ratio of simpling used to infer the schema
|
||||
#' @return A SparkDataFrame
|
||||
#' @noRd
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sparkR.session()
|
||||
#' rdd <- texFile(sc, "path/to/json")
|
||||
#' df <- jsonRDD(sqlContext, rdd)
|
||||
#'}
|
||||
|
||||
# TODO: remove - this method is no longer exported
|
||||
# TODO: support schema
|
||||
jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
|
||||
.Deprecated("read.json")
|
||||
rdd <- serializeToString(rdd)
|
||||
if (is.null(schema)) {
|
||||
read <- callJMethod(sqlContext, "read")
|
||||
# samplingRatio is deprecated
|
||||
sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd"))
|
||||
dataFrame(sdf)
|
||||
} else {
|
||||
stop("not implemented")
|
||||
}
|
||||
}
|
||||
|
||||
#' Create a SparkDataFrame from an ORC file.
|
||||
#'
|
||||
#' Loads an ORC file, returning the result as a SparkDataFrame.
|
||||
|
@ -434,6 +388,7 @@ read.orc <- function(path, ...) {
|
|||
#' Loads a Parquet file, returning the result as a SparkDataFrame.
|
||||
#'
|
||||
#' @param path path of file to read. A vector of multiple paths is allowed.
|
||||
#' @param ... additional external data source specific named properties.
|
||||
#' @return SparkDataFrame
|
||||
#' @rdname read.parquet
|
||||
#' @name read.parquet
|
||||
|
@ -454,20 +409,6 @@ read.parquet <- function(x, ...) {
|
|||
dispatchFunc("read.parquet(...)", x, ...)
|
||||
}
|
||||
|
||||
#' @param ... argument(s) passed to the method.
|
||||
#' @rdname read.parquet
|
||||
#' @name parquetFile
|
||||
#' @method parquetFile default
|
||||
#' @note parquetFile since 1.4.0
|
||||
parquetFile.default <- function(...) {
|
||||
.Deprecated("read.parquet")
|
||||
read.parquet(unlist(list(...)))
|
||||
}
|
||||
|
||||
parquetFile <- function(x, ...) {
|
||||
dispatchFunc("parquetFile(...)", x, ...)
|
||||
}
|
||||
|
||||
#' Create a SparkDataFrame from a text file.
|
||||
#'
|
||||
#' Loads text files and returns a SparkDataFrame whose schema starts with
|
||||
|
|
|
@ -628,14 +628,10 @@ test_that("read/write json files", {
|
|||
jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
|
||||
write.json(df, jsonPath3)
|
||||
|
||||
# Test read.json()/jsonFile() works with multiple input paths
|
||||
# Test read.json() works with multiple input paths
|
||||
jsonDF1 <- read.json(c(jsonPath2, jsonPath3))
|
||||
expect_is(jsonDF1, "SparkDataFrame")
|
||||
expect_equal(count(jsonDF1), 6)
|
||||
# Suppress warnings because jsonFile is deprecated
|
||||
jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3)))
|
||||
expect_is(jsonDF2, "SparkDataFrame")
|
||||
expect_equal(count(jsonDF2), 6)
|
||||
|
||||
unlink(jsonPath2)
|
||||
unlink(jsonPath3)
|
||||
|
@ -655,20 +651,6 @@ test_that("read/write json files - compression option", {
|
|||
unlink(jsonPath)
|
||||
})
|
||||
|
||||
test_that("jsonRDD() on a RDD with json string", {
|
||||
sqlContext <- suppressWarnings(sparkRSQL.init(sc))
|
||||
rdd <- parallelize(sc, mockLines)
|
||||
expect_equal(countRDD(rdd), 3)
|
||||
df <- suppressWarnings(jsonRDD(sqlContext, rdd))
|
||||
expect_is(df, "SparkDataFrame")
|
||||
expect_equal(count(df), 3)
|
||||
|
||||
rdd2 <- flatMap(rdd, function(x) c(x, x))
|
||||
df <- suppressWarnings(jsonRDD(sqlContext, rdd2))
|
||||
expect_is(df, "SparkDataFrame")
|
||||
expect_equal(count(df), 6)
|
||||
})
|
||||
|
||||
test_that("test tableNames and tables", {
|
||||
count <- count(listTables())
|
||||
|
||||
|
@ -2658,7 +2640,7 @@ test_that("read/write Parquet files", {
|
|||
expect_is(df2, "SparkDataFrame")
|
||||
expect_equal(count(df2), 3)
|
||||
|
||||
# Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
|
||||
# Test write.parquet/saveAsParquetFile and read.parquet
|
||||
parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
|
||||
write.parquet(df, parquetPath2)
|
||||
parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
|
||||
|
@ -2666,9 +2648,6 @@ test_that("read/write Parquet files", {
|
|||
parquetDF <- read.parquet(c(parquetPath2, parquetPath3))
|
||||
expect_is(parquetDF, "SparkDataFrame")
|
||||
expect_equal(count(parquetDF), count(df) * 2)
|
||||
parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3))
|
||||
expect_is(parquetDF2, "SparkDataFrame")
|
||||
expect_equal(count(parquetDF2), count(df) * 2)
|
||||
|
||||
# Test if varargs works with variables
|
||||
saveMode <- "overwrite"
|
||||
|
|
|
@ -709,8 +709,12 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
|
|||
|
||||
## Upgrading to SparkR 2.3.1 and above
|
||||
|
||||
- In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-base. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
|
||||
- In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-based. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
|
||||
|
||||
## Upgrading to SparkR 2.4.0
|
||||
|
||||
- Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does.
|
||||
|
||||
## Upgrading to SparkR 3.0.0
|
||||
|
||||
- The deprecated methods `parquetFile`, `jsonRDD` and `jsonFile` in `SQLContext` have been removed. Use `read.parquet` and `read.json`.
|
||||
|
|
|
@ -755,289 +755,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
|
|||
sessionState.catalog.listTables(databaseName).map(_.table).toArray
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Deprecated methods
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
|
||||
*/
|
||||
@deprecated("Use createDataFrame instead.", "1.3.0")
|
||||
def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
|
||||
createDataFrame(rowRDD, schema)
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
|
||||
*/
|
||||
@deprecated("Use createDataFrame instead.", "1.3.0")
|
||||
def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
|
||||
createDataFrame(rowRDD, schema)
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
|
||||
*/
|
||||
@deprecated("Use createDataFrame instead.", "1.3.0")
|
||||
def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
|
||||
createDataFrame(rdd, beanClass)
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
|
||||
*/
|
||||
@deprecated("Use createDataFrame instead.", "1.3.0")
|
||||
def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
|
||||
createDataFrame(rdd, beanClass)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
|
||||
* `DataFrame` if no paths are passed in.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().parquet()`.
|
||||
*/
|
||||
@deprecated("Use read.parquet() instead.", "1.4.0")
|
||||
@scala.annotation.varargs
|
||||
def parquetFile(paths: String*): DataFrame = {
|
||||
if (paths.isEmpty) {
|
||||
emptyDataFrame
|
||||
} else {
|
||||
read.parquet(paths : _*)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a JSON file (one object per line), returning the result as a `DataFrame`.
|
||||
* It goes through the entire dataset once to determine the schema.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonFile(path: String): DataFrame = {
|
||||
read.json(path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a JSON file (one object per line) and applies the given schema,
|
||||
* returning the result as a `DataFrame`.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonFile(path: String, schema: StructType): DataFrame = {
|
||||
read.schema(schema).json(path)
|
||||
}
|
||||
|
||||
/**
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonFile(path: String, samplingRatio: Double): DataFrame = {
|
||||
read.option("samplingRatio", samplingRatio.toString).json(path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
|
||||
* `DataFrame`.
|
||||
* It goes through the entire dataset once to determine the schema.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
|
||||
|
||||
/**
|
||||
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
|
||||
* `DataFrame`.
|
||||
* It goes through the entire dataset once to determine the schema.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
|
||||
|
||||
/**
|
||||
* Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
|
||||
* returning the result as a `DataFrame`.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
|
||||
read.schema(schema).json(json)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an JavaRDD[String] storing JSON objects (one object per record) and applies the given
|
||||
* schema, returning the result as a `DataFrame`.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
|
||||
read.schema(schema).json(json)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an RDD[String] storing JSON objects (one object per record) inferring the
|
||||
* schema, returning the result as a `DataFrame`.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
|
||||
read.option("samplingRatio", samplingRatio.toString).json(json)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
|
||||
* schema, returning the result as a `DataFrame`.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().json()`.
|
||||
*/
|
||||
@deprecated("Use read.json() instead.", "1.4.0")
|
||||
def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
|
||||
read.option("samplingRatio", samplingRatio.toString).json(json)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dataset stored at path as a DataFrame,
|
||||
* using the default data source configured by spark.sql.sources.default.
|
||||
*
|
||||
* @group genericdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().load(path)`.
|
||||
*/
|
||||
@deprecated("Use read.load(path) instead.", "1.4.0")
|
||||
def load(path: String): DataFrame = {
|
||||
read.load(path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dataset stored at path as a DataFrame, using the given data source.
|
||||
*
|
||||
* @group genericdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
|
||||
*/
|
||||
@deprecated("Use read.format(source).load(path) instead.", "1.4.0")
|
||||
def load(path: String, source: String): DataFrame = {
|
||||
read.format(source).load(path)
|
||||
}
|
||||
|
||||
/**
|
||||
* (Java-specific) Returns the dataset specified by the given data source and
|
||||
* a set of options as a DataFrame.
|
||||
*
|
||||
* @group genericdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
|
||||
*/
|
||||
@deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
|
||||
def load(source: String, options: java.util.Map[String, String]): DataFrame = {
|
||||
read.options(options).format(source).load()
|
||||
}
|
||||
|
||||
/**
|
||||
* (Scala-specific) Returns the dataset specified by the given data source and
|
||||
* a set of options as a DataFrame.
|
||||
*
|
||||
* @group genericdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
|
||||
*/
|
||||
@deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
|
||||
def load(source: String, options: Map[String, String]): DataFrame = {
|
||||
read.options(options).format(source).load()
|
||||
}
|
||||
|
||||
/**
|
||||
* (Java-specific) Returns the dataset specified by the given data source and
|
||||
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
|
||||
*
|
||||
* @group genericdata
|
||||
* @deprecated As of 1.4.0, replaced by
|
||||
* `read().format(source).schema(schema).options(options).load()`.
|
||||
*/
|
||||
@deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
|
||||
def load(
|
||||
source: String,
|
||||
schema: StructType,
|
||||
options: java.util.Map[String, String]): DataFrame = {
|
||||
read.format(source).schema(schema).options(options).load()
|
||||
}
|
||||
|
||||
/**
|
||||
* (Scala-specific) Returns the dataset specified by the given data source and
|
||||
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
|
||||
*
|
||||
* @group genericdata
|
||||
* @deprecated As of 1.4.0, replaced by
|
||||
* `read().format(source).schema(schema).options(options).load()`.
|
||||
*/
|
||||
@deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
|
||||
def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
|
||||
read.format(source).schema(schema).options(options).load()
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a `DataFrame` representing the database table accessible via JDBC URL
|
||||
* url named table.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
|
||||
*/
|
||||
@deprecated("Use read.jdbc() instead.", "1.4.0")
|
||||
def jdbc(url: String, table: String): DataFrame = {
|
||||
read.jdbc(url, table, new Properties)
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a `DataFrame` representing the database table accessible via JDBC URL
|
||||
* url named table. Partitions of the table will be retrieved in parallel based on the parameters
|
||||
* passed to this function.
|
||||
*
|
||||
* @param columnName the name of a column of integral type that will be used for partitioning.
|
||||
* @param lowerBound the minimum value of `columnName` used to decide partition stride
|
||||
* @param upperBound the maximum value of `columnName` used to decide partition stride
|
||||
* @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split
|
||||
* evenly into this many partitions
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
|
||||
*/
|
||||
@deprecated("Use read.jdbc() instead.", "1.4.0")
|
||||
def jdbc(
|
||||
url: String,
|
||||
table: String,
|
||||
columnName: String,
|
||||
lowerBound: Long,
|
||||
upperBound: Long,
|
||||
numPartitions: Int): DataFrame = {
|
||||
read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a `DataFrame` representing the database table accessible via JDBC URL
|
||||
* url named table. The theParts parameter gives a list expressions
|
||||
* suitable for inclusion in WHERE clauses; each one defines one partition
|
||||
* of the `DataFrame`.
|
||||
*
|
||||
* @group specificdata
|
||||
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
|
||||
*/
|
||||
@deprecated("Use read.jdbc() instead.", "1.4.0")
|
||||
def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
|
||||
read.jdbc(url, table, theParts, new Properties)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue