[SPARK-11294][SPARKR] Improve R doc for read.df, write.df, saveAsTable
Add examples for read.df, write.df; fix grouping for read.df, loadDF; fix formatting and text truncation for write.df, saveAsTable. Several text issues: ![image](https://cloud.githubusercontent.com/assets/8969467/10708590/1303a44e-79c3-11e5-854f-3a2e16854cd7.png) - text collapsed into a single paragraph - text truncated at 2 places, eg. "overwrite: Existing data is expected to be overwritten by the contents of error:" shivaram Author: felixcheung <felixcheung_m@hotmail.com> Closes #9261 from felixcheung/rdocreadwritedf.
This commit is contained in:
parent
2462dbcce8
commit
5e45812501
|
@ -1572,18 +1572,17 @@ setMethod("except",
|
||||||
#' spark.sql.sources.default will be used.
|
#' spark.sql.sources.default will be used.
|
||||||
#'
|
#'
|
||||||
#' Additionally, mode is used to specify the behavior of the save operation when
|
#' Additionally, mode is used to specify the behavior of the save operation when
|
||||||
#' data already exists in the data source. There are four modes:
|
#' data already exists in the data source. There are four modes: \cr
|
||||||
#' append: Contents of this DataFrame are expected to be appended to existing data.
|
#' append: Contents of this DataFrame are expected to be appended to existing data. \cr
|
||||||
#' overwrite: Existing data is expected to be overwritten by the contents of
|
#' overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
|
||||||
# this DataFrame.
|
#' error: An exception is expected to be thrown. \cr
|
||||||
#' error: An exception is expected to be thrown.
|
|
||||||
#' ignore: The save operation is expected to not save the contents of the DataFrame
|
#' ignore: The save operation is expected to not save the contents of the DataFrame
|
||||||
# and to not change the existing data.
|
#' and to not change the existing data. \cr
|
||||||
#'
|
#'
|
||||||
#' @param df A SparkSQL DataFrame
|
#' @param df A SparkSQL DataFrame
|
||||||
#' @param path A name for the table
|
#' @param path A name for the table
|
||||||
#' @param source A name for external data source
|
#' @param source A name for external data source
|
||||||
#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
|
#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode
|
||||||
#'
|
#'
|
||||||
#' @rdname write.df
|
#' @rdname write.df
|
||||||
#' @name write.df
|
#' @name write.df
|
||||||
|
@ -1596,6 +1595,7 @@ setMethod("except",
|
||||||
#' path <- "path/to/file.json"
|
#' path <- "path/to/file.json"
|
||||||
#' df <- jsonFile(sqlContext, path)
|
#' df <- jsonFile(sqlContext, path)
|
||||||
#' write.df(df, "myfile", "parquet", "overwrite")
|
#' write.df(df, "myfile", "parquet", "overwrite")
|
||||||
|
#' saveDF(df, parquetPath2, "parquet", mode = saveMode, mergeSchema = mergeSchema)
|
||||||
#' }
|
#' }
|
||||||
setMethod("write.df",
|
setMethod("write.df",
|
||||||
signature(df = "DataFrame", path = "character"),
|
signature(df = "DataFrame", path = "character"),
|
||||||
|
@ -1637,18 +1637,17 @@ setMethod("saveDF",
|
||||||
#' spark.sql.sources.default will be used.
|
#' spark.sql.sources.default will be used.
|
||||||
#'
|
#'
|
||||||
#' Additionally, mode is used to specify the behavior of the save operation when
|
#' Additionally, mode is used to specify the behavior of the save operation when
|
||||||
#' data already exists in the data source. There are four modes:
|
#' data already exists in the data source. There are four modes: \cr
|
||||||
#' append: Contents of this DataFrame are expected to be appended to existing data.
|
#' append: Contents of this DataFrame are expected to be appended to existing data. \cr
|
||||||
#' overwrite: Existing data is expected to be overwritten by the contents of
|
#' overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
|
||||||
# this DataFrame.
|
#' error: An exception is expected to be thrown. \cr
|
||||||
#' error: An exception is expected to be thrown.
|
|
||||||
#' ignore: The save operation is expected to not save the contents of the DataFrame
|
#' ignore: The save operation is expected to not save the contents of the DataFrame
|
||||||
# and to not change the existing data.
|
#' and to not change the existing data. \cr
|
||||||
#'
|
#'
|
||||||
#' @param df A SparkSQL DataFrame
|
#' @param df A SparkSQL DataFrame
|
||||||
#' @param tableName A name for the table
|
#' @param tableName A name for the table
|
||||||
#' @param source A name for external data source
|
#' @param source A name for external data source
|
||||||
#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
|
#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode
|
||||||
#'
|
#'
|
||||||
#' @rdname saveAsTable
|
#' @rdname saveAsTable
|
||||||
#' @name saveAsTable
|
#' @name saveAsTable
|
||||||
|
|
|
@ -452,14 +452,21 @@ dropTempTable <- function(sqlContext, tableName) {
|
||||||
#'
|
#'
|
||||||
#' @param sqlContext SQLContext to use
|
#' @param sqlContext SQLContext to use
|
||||||
#' @param path The path of files to load
|
#' @param path The path of files to load
|
||||||
#' @param source the name of external data source
|
#' @param source The name of external data source
|
||||||
|
#' @param schema The data schema defined in structType
|
||||||
#' @return DataFrame
|
#' @return DataFrame
|
||||||
|
#' @rdname read.df
|
||||||
|
#' @name read.df
|
||||||
#' @export
|
#' @export
|
||||||
#' @examples
|
#' @examples
|
||||||
#'\dontrun{
|
#'\dontrun{
|
||||||
#' sc <- sparkR.init()
|
#' sc <- sparkR.init()
|
||||||
#' sqlContext <- sparkRSQL.init(sc)
|
#' sqlContext <- sparkRSQL.init(sc)
|
||||||
#' df <- read.df(sqlContext, "path/to/file.json", source = "json")
|
#' df1 <- read.df(sqlContext, "path/to/file.json", source = "json")
|
||||||
|
#' schema <- structType(structField("name", "string"),
|
||||||
|
#' structField("info", "map<string,double>"))
|
||||||
|
#' df2 <- read.df(sqlContext, mapTypeJsonPath, "json", schema)
|
||||||
|
#' df3 <- loadDF(sqlContext, "data/test_table", "parquet", mergeSchema = "true")
|
||||||
#' }
|
#' }
|
||||||
|
|
||||||
read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
|
read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
|
||||||
|
@ -482,9 +489,8 @@ read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...)
|
||||||
dataFrame(sdf)
|
dataFrame(sdf)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @aliases loadDF
|
#' @rdname read.df
|
||||||
#' @export
|
#' @name loadDF
|
||||||
|
|
||||||
loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
|
loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
|
||||||
read.df(sqlContext, path, source, schema, ...)
|
read.df(sqlContext, path, source, schema, ...)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue