[SPARK-25821][SQL] Remove SQLContext methods deprecated in 1.4

## What changes were proposed in this pull request?

Remove SQLContext methods deprecated in 1.4

## How was this patch tested?

Existing tests.

Closes #22815 from srowen/SPARK-25821.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
This commit is contained in:
Sean Owen 2018-10-26 16:49:48 -05:00
parent d325ffbf3a
commit ca545f7941
5 changed files with 8 additions and 369 deletions

View file

@ -420,13 +420,11 @@ export("as.DataFrame",
"currentDatabase",
"dropTempTable",
"dropTempView",
"jsonFile",
"listColumns",
"listDatabases",
"listFunctions",
"listTables",
"loadDF",
"parquetFile",
"read.df",
"read.jdbc",
"read.json",

View file

@ -343,7 +343,6 @@ setMethod("toDF", signature(x = "RDD"),
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' df <- read.json(path, multiLine = TRUE)
#' df <- jsonFile(path)
#' }
#' @name read.json
#' @method read.json default
@ -363,51 +362,6 @@ read.json <- function(x, ...) {
dispatchFunc("read.json(path)", x, ...)
}
#' @rdname read.json
#' @name jsonFile
#' @method jsonFile default
#' @note jsonFile since 1.4.0
jsonFile.default <- function(path) {
.Deprecated("read.json")
read.json(path)
}
jsonFile <- function(x, ...) {
dispatchFunc("jsonFile(path)", x, ...)
}
#' JSON RDD
#'
#' Loads an RDD storing one JSON object per string as a SparkDataFrame.
#'
#' @param sqlContext SQLContext to use
#' @param rdd An RDD of JSON string
#' @param schema A StructType object to use as schema
#' @param samplingRatio The ratio of simpling used to infer the schema
#' @return A SparkDataFrame
#' @noRd
#' @examples
#'\dontrun{
#' sparkR.session()
#' rdd <- texFile(sc, "path/to/json")
#' df <- jsonRDD(sqlContext, rdd)
#'}
# TODO: remove - this method is no longer exported
# TODO: support schema
jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
.Deprecated("read.json")
rdd <- serializeToString(rdd)
if (is.null(schema)) {
read <- callJMethod(sqlContext, "read")
# samplingRatio is deprecated
sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd"))
dataFrame(sdf)
} else {
stop("not implemented")
}
}
#' Create a SparkDataFrame from an ORC file.
#'
#' Loads an ORC file, returning the result as a SparkDataFrame.
@ -434,6 +388,7 @@ read.orc <- function(path, ...) {
#' Loads a Parquet file, returning the result as a SparkDataFrame.
#'
#' @param path path of file to read. A vector of multiple paths is allowed.
#' @param ... additional external data source specific named properties.
#' @return SparkDataFrame
#' @rdname read.parquet
#' @name read.parquet
@ -454,20 +409,6 @@ read.parquet <- function(x, ...) {
dispatchFunc("read.parquet(...)", x, ...)
}
#' @param ... argument(s) passed to the method.
#' @rdname read.parquet
#' @name parquetFile
#' @method parquetFile default
#' @note parquetFile since 1.4.0
parquetFile.default <- function(...) {
.Deprecated("read.parquet")
read.parquet(unlist(list(...)))
}
parquetFile <- function(x, ...) {
dispatchFunc("parquetFile(...)", x, ...)
}
#' Create a SparkDataFrame from a text file.
#'
#' Loads text files and returns a SparkDataFrame whose schema starts with

View file

@ -628,14 +628,10 @@ test_that("read/write json files", {
jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
write.json(df, jsonPath3)
# Test read.json()/jsonFile() works with multiple input paths
# Test read.json() works with multiple input paths
jsonDF1 <- read.json(c(jsonPath2, jsonPath3))
expect_is(jsonDF1, "SparkDataFrame")
expect_equal(count(jsonDF1), 6)
# Suppress warnings because jsonFile is deprecated
jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3)))
expect_is(jsonDF2, "SparkDataFrame")
expect_equal(count(jsonDF2), 6)
unlink(jsonPath2)
unlink(jsonPath3)
@ -655,20 +651,6 @@ test_that("read/write json files - compression option", {
unlink(jsonPath)
})
test_that("jsonRDD() on a RDD with json string", {
sqlContext <- suppressWarnings(sparkRSQL.init(sc))
rdd <- parallelize(sc, mockLines)
expect_equal(countRDD(rdd), 3)
df <- suppressWarnings(jsonRDD(sqlContext, rdd))
expect_is(df, "SparkDataFrame")
expect_equal(count(df), 3)
rdd2 <- flatMap(rdd, function(x) c(x, x))
df <- suppressWarnings(jsonRDD(sqlContext, rdd2))
expect_is(df, "SparkDataFrame")
expect_equal(count(df), 6)
})
test_that("test tableNames and tables", {
count <- count(listTables())
@ -2658,7 +2640,7 @@ test_that("read/write Parquet files", {
expect_is(df2, "SparkDataFrame")
expect_equal(count(df2), 3)
# Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
# Test write.parquet/saveAsParquetFile and read.parquet
parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
write.parquet(df, parquetPath2)
parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
@ -2666,9 +2648,6 @@ test_that("read/write Parquet files", {
parquetDF <- read.parquet(c(parquetPath2, parquetPath3))
expect_is(parquetDF, "SparkDataFrame")
expect_equal(count(parquetDF), count(df) * 2)
parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3))
expect_is(parquetDF2, "SparkDataFrame")
expect_equal(count(parquetDF2), count(df) * 2)
# Test if varargs works with variables
saveMode <- "overwrite"

View file

@ -709,8 +709,12 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
## Upgrading to SparkR 2.3.1 and above
- In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-base. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
- In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-based. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
## Upgrading to SparkR 2.4.0
- Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does.
## Upgrading to SparkR 3.0.0
- The deprecated methods `parquetFile`, `jsonRDD` and `jsonFile` in `SQLContext` have been removed. Use `read.parquet` and `read.json`.

View file

@ -755,289 +755,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
sessionState.catalog.listTables(databaseName).map(_.table).toArray
}
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// Deprecated methods
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
/**
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
*/
@deprecated("Use createDataFrame instead.", "1.3.0")
def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
createDataFrame(rowRDD, schema)
}
/**
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
*/
@deprecated("Use createDataFrame instead.", "1.3.0")
def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
createDataFrame(rowRDD, schema)
}
/**
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
*/
@deprecated("Use createDataFrame instead.", "1.3.0")
def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
createDataFrame(rdd, beanClass)
}
/**
* @deprecated As of 1.3.0, replaced by `createDataFrame()`.
*/
@deprecated("Use createDataFrame instead.", "1.3.0")
def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
createDataFrame(rdd, beanClass)
}
/**
* Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
* `DataFrame` if no paths are passed in.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().parquet()`.
*/
@deprecated("Use read.parquet() instead.", "1.4.0")
@scala.annotation.varargs
def parquetFile(paths: String*): DataFrame = {
if (paths.isEmpty) {
emptyDataFrame
} else {
read.parquet(paths : _*)
}
}
/**
* Loads a JSON file (one object per line), returning the result as a `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonFile(path: String): DataFrame = {
read.json(path)
}
/**
* Loads a JSON file (one object per line) and applies the given schema,
* returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonFile(path: String, schema: StructType): DataFrame = {
read.schema(schema).json(path)
}
/**
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonFile(path: String, samplingRatio: Double): DataFrame = {
read.option("samplingRatio", samplingRatio.toString).json(path)
}
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
* `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
* `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
/**
* Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
* returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
read.schema(schema).json(json)
}
/**
* Loads an JavaRDD[String] storing JSON objects (one object per record) and applies the given
* schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
read.schema(schema).json(json)
}
/**
* Loads an RDD[String] storing JSON objects (one object per record) inferring the
* schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
read.option("samplingRatio", samplingRatio.toString).json(json)
}
/**
* Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
* schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
*/
@deprecated("Use read.json() instead.", "1.4.0")
def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
read.option("samplingRatio", samplingRatio.toString).json(json)
}
/**
* Returns the dataset stored at path as a DataFrame,
* using the default data source configured by spark.sql.sources.default.
*
* @group genericdata
* @deprecated As of 1.4.0, replaced by `read().load(path)`.
*/
@deprecated("Use read.load(path) instead.", "1.4.0")
def load(path: String): DataFrame = {
read.load(path)
}
/**
* Returns the dataset stored at path as a DataFrame, using the given data source.
*
* @group genericdata
* @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
*/
@deprecated("Use read.format(source).load(path) instead.", "1.4.0")
def load(path: String, source: String): DataFrame = {
read.format(source).load(path)
}
/**
* (Java-specific) Returns the dataset specified by the given data source and
* a set of options as a DataFrame.
*
* @group genericdata
* @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
*/
@deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
def load(source: String, options: java.util.Map[String, String]): DataFrame = {
read.options(options).format(source).load()
}
/**
* (Scala-specific) Returns the dataset specified by the given data source and
* a set of options as a DataFrame.
*
* @group genericdata
* @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
*/
@deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
def load(source: String, options: Map[String, String]): DataFrame = {
read.options(options).format(source).load()
}
/**
* (Java-specific) Returns the dataset specified by the given data source and
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
*
* @group genericdata
* @deprecated As of 1.4.0, replaced by
* `read().format(source).schema(schema).options(options).load()`.
*/
@deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
def load(
source: String,
schema: StructType,
options: java.util.Map[String, String]): DataFrame = {
read.format(source).schema(schema).options(options).load()
}
/**
* (Scala-specific) Returns the dataset specified by the given data source and
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
*
* @group genericdata
* @deprecated As of 1.4.0, replaced by
* `read().format(source).schema(schema).options(options).load()`.
*/
@deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
read.format(source).schema(schema).options(options).load()
}
/**
* Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
*/
@deprecated("Use read.jdbc() instead.", "1.4.0")
def jdbc(url: String, table: String): DataFrame = {
read.jdbc(url, table, new Properties)
}
/**
* Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table. Partitions of the table will be retrieved in parallel based on the parameters
* passed to this function.
*
* @param columnName the name of a column of integral type that will be used for partitioning.
* @param lowerBound the minimum value of `columnName` used to decide partition stride
* @param upperBound the maximum value of `columnName` used to decide partition stride
* @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split
* evenly into this many partitions
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
*/
@deprecated("Use read.jdbc() instead.", "1.4.0")
def jdbc(
url: String,
table: String,
columnName: String,
lowerBound: Long,
upperBound: Long,
numPartitions: Int): DataFrame = {
read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
}
/**
* Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table. The theParts parameter gives a list expressions
* suitable for inclusion in WHERE clauses; each one defines one partition
* of the `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
*/
@deprecated("Use read.jdbc() instead.", "1.4.0")
def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
read.jdbc(url, table, theParts, new Properties)
}
}
/**