[SPARKR][DOCS] R code doc cleanup
## What changes were proposed in this pull request? I ran a full pass from A to Z and fixed the obvious duplications, improper grouping etc. There are still more doc issues to be cleaned up. ## How was this patch tested? manual tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #13798 from felixcheung/rdocseealso.
This commit is contained in:
parent
41e0ffb19f
commit
09f4ceaeb0
|
@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView",
|
|||
})
|
||||
|
||||
#' (Deprecated) Register Temporary Table
|
||||
#'
|
||||
#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
|
||||
#' @param x A SparkDataFrame
|
||||
#' @param tableName A character vector containing the name of the table
|
||||
|
@ -606,10 +607,10 @@ setMethod("unpersist",
|
|||
#'
|
||||
#' The following options for repartition are possible:
|
||||
#' \itemize{
|
||||
#' \item{"Option 1"} {Return a new SparkDataFrame partitioned by
|
||||
#' \item{1.} {Return a new SparkDataFrame partitioned by
|
||||
#' the given columns into `numPartitions`.}
|
||||
#' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.}
|
||||
#' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given column(s),
|
||||
#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
|
||||
#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
|
||||
#' using `spark.sql.shuffle.partitions` as number of partitions.}
|
||||
#'}
|
||||
#' @param x A SparkDataFrame
|
||||
|
@ -1053,7 +1054,7 @@ setMethod("limit",
|
|||
dataFrame(res)
|
||||
})
|
||||
|
||||
#' Take the first NUM rows of a SparkDataFrame and return a the results as a data.frame
|
||||
#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame
|
||||
#'
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname take
|
||||
|
@ -1076,7 +1077,7 @@ setMethod("take",
|
|||
|
||||
#' Head
|
||||
#'
|
||||
#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is NULL,
|
||||
#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL,
|
||||
#' then head() returns the first 6 rows in keeping with the current data.frame
|
||||
#' convention in R.
|
||||
#'
|
||||
|
@ -1157,7 +1158,6 @@ setMethod("toRDD",
|
|||
#'
|
||||
#' @param x a SparkDataFrame
|
||||
#' @return a GroupedData
|
||||
#' @seealso GroupedData
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname groupBy
|
||||
#' @name groupBy
|
||||
|
@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) {
|
|||
#'
|
||||
#' @param x A SparkDataFrame
|
||||
#' @param func A function to be applied to each partition of the SparkDataFrame.
|
||||
#' func should have only one parameter, to which a data.frame corresponds
|
||||
#' func should have only one parameter, to which a R data.frame corresponds
|
||||
#' to each partition will be passed.
|
||||
#' The output of func should be a data.frame.
|
||||
#' The output of func should be a R data.frame.
|
||||
#' @param schema The schema of the resulting SparkDataFrame after the function is applied.
|
||||
#' It must match the output of func.
|
||||
#' @family SparkDataFrame functions
|
||||
|
@ -1291,9 +1291,9 @@ setMethod("dapply",
|
|||
#'
|
||||
#' @param x A SparkDataFrame
|
||||
#' @param func A function to be applied to each partition of the SparkDataFrame.
|
||||
#' func should have only one parameter, to which a data.frame corresponds
|
||||
#' func should have only one parameter, to which a R data.frame corresponds
|
||||
#' to each partition will be passed.
|
||||
#' The output of func should be a data.frame.
|
||||
#' The output of func should be a R data.frame.
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname dapplyCollect
|
||||
#' @name dapplyCollect
|
||||
|
@ -1641,7 +1641,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = "character"),
|
|||
}
|
||||
})
|
||||
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname select
|
||||
#' @export
|
||||
#' @note select(SparkDataFrame, Column) since 1.4.0
|
||||
|
@ -1654,7 +1653,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
|
|||
dataFrame(sdf)
|
||||
})
|
||||
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname select
|
||||
#' @export
|
||||
#' @note select(SparkDataFrame, list) since 1.4.0
|
||||
|
@ -2001,7 +1999,6 @@ setMethod("filter",
|
|||
dataFrame(sdf)
|
||||
})
|
||||
|
||||
#' @family SparkDataFrame functions
|
||||
#' @rdname filter
|
||||
#' @name where
|
||||
#' @note where since 1.4.0
|
||||
|
@ -2222,11 +2219,13 @@ setMethod("merge",
|
|||
joinRes
|
||||
})
|
||||
|
||||
#' Creates a list of columns by replacing the intersected ones with aliases
|
||||
#'
|
||||
#' Creates a list of columns by replacing the intersected ones with aliases.
|
||||
#' The name of the alias column is formed by concatanating the original column name and a suffix.
|
||||
#'
|
||||
#' @param x a SparkDataFrame on which the
|
||||
#' @param intersectedColNames a list of intersected column names
|
||||
#' @param x a SparkDataFrame
|
||||
#' @param intersectedColNames a list of intersected column names of the SparkDataFrame
|
||||
#' @param suffix a suffix for the column name
|
||||
#' @return list of columns
|
||||
#'
|
||||
|
@ -2513,9 +2512,9 @@ setMethod("summary",
|
|||
})
|
||||
|
||||
|
||||
#' dropna
|
||||
#' A set of SparkDataFrame functions working with NA values
|
||||
#'
|
||||
#' Returns a new SparkDataFrame omitting rows with null values.
|
||||
#' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null values.
|
||||
#'
|
||||
#' @param x A SparkDataFrame.
|
||||
#' @param how "any" or "all".
|
||||
|
@ -2567,9 +2566,7 @@ setMethod("na.omit",
|
|||
dropna(object, how, minNonNulls, cols)
|
||||
})
|
||||
|
||||
#' fillna
|
||||
#'
|
||||
#' Replace null values.
|
||||
#' fillna - Replace null values.
|
||||
#'
|
||||
#' @param x A SparkDataFrame.
|
||||
#' @param value Value to replace null values with.
|
||||
|
@ -2640,7 +2637,7 @@ setMethod("fillna",
|
|||
dataFrame(sdf)
|
||||
})
|
||||
|
||||
#' Download data from a SparkDataFrame into a data.frame
|
||||
#' Download data from a SparkDataFrame into a R data.frame
|
||||
#'
|
||||
#' This function downloads the contents of a SparkDataFrame into an R's data.frame.
|
||||
#' Since data.frames are held in memory, ensure that you have enough memory
|
||||
|
|
|
@ -67,7 +67,7 @@ dispatchFunc <- function(newFuncSig, x, ...) {
|
|||
}
|
||||
|
||||
#' return the SparkSession
|
||||
#' @note getSparkSession since 2.0.0
|
||||
#' @noRd
|
||||
getSparkSession <- function() {
|
||||
if (exists(".sparkRsession", envir = .sparkREnv)) {
|
||||
get(".sparkRsession", envir = .sparkREnv)
|
||||
|
@ -77,7 +77,7 @@ getSparkSession <- function() {
|
|||
}
|
||||
|
||||
#' infer the SQL type
|
||||
#' @note infer_type since 1.4.0
|
||||
#' @noRd
|
||||
infer_type <- function(x) {
|
||||
if (is.null(x)) {
|
||||
stop("can not infer type from NULL")
|
||||
|
@ -451,7 +451,7 @@ sql <- function(x, ...) {
|
|||
#' Create a SparkDataFrame from a SparkSQL Table
|
||||
#'
|
||||
#' Returns the specified Table as a SparkDataFrame. The Table must have already been registered
|
||||
#' in the SQLContext.
|
||||
#' in the SparkSession.
|
||||
#'
|
||||
#' @param tableName The SparkSQL Table to convert to a SparkDataFrame.
|
||||
#' @return SparkDataFrame
|
||||
|
|
|
@ -34,6 +34,11 @@ setOldClass("jobj")
|
|||
setClass("Column",
|
||||
slots = list(jc = "jobj"))
|
||||
|
||||
#' A set of operations working with SparkDataFrame columns
|
||||
#' @rdname columnfunctions
|
||||
#' @name columnfunctions
|
||||
NULL
|
||||
|
||||
setMethod("initialize", "Column", function(.Object, jc) {
|
||||
.Object@jc <- jc
|
||||
.Object
|
||||
|
@ -47,6 +52,7 @@ setMethod("column",
|
|||
|
||||
#' @rdname show
|
||||
#' @name show
|
||||
#' @export
|
||||
#' @note show(Column) since 1.4.0
|
||||
setMethod("show", "Column",
|
||||
function(object) {
|
||||
|
|
|
@ -225,9 +225,10 @@ setCheckpointDir <- function(sc, dirName) {
|
|||
invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
|
||||
}
|
||||
|
||||
#' Run a function over a list of elements, distributing the computations with Spark.
|
||||
#' Run a function over a list of elements, distributing the computations with Spark
|
||||
#'
|
||||
#' Applies a function in a manner that is similar to doParallel or lapply to elements of a list.
|
||||
#' Run a function over a list of elements, distributing the computations with Spark. Applies a
|
||||
#' function in a manner that is similar to doParallel or lapply to elements of a list.
|
||||
#' The computations are distributed using Spark. It is conceptually the same as the following code:
|
||||
#' lapply(list, func)
|
||||
#'
|
||||
|
|
|
@ -77,13 +77,14 @@ setMethod("acos",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' approxCountDistinct
|
||||
#' Returns the approximate number of distinct items in a group
|
||||
#'
|
||||
#' Aggregate function: returns the approximate number of distinct items in a group.
|
||||
#' Returns the approximate number of distinct items in a group. This is a column
|
||||
#' aggregate function.
|
||||
#'
|
||||
#' @rdname approxCountDistinct
|
||||
#' @name approxCountDistinct
|
||||
#' @family agg_funcs
|
||||
#' @return the approximate number of distinct items in a group.
|
||||
#' @export
|
||||
#' @examples \dontrun{approxCountDistinct(df$c)}
|
||||
#' @note approxCountDistinct(Column) since 1.4.0
|
||||
|
@ -234,7 +235,7 @@ setMethod("cbrt",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' ceil
|
||||
#' Computes the ceiling of the given value
|
||||
#'
|
||||
#' Computes the ceiling of the given value.
|
||||
#'
|
||||
|
@ -254,15 +255,16 @@ setMethod("ceil",
|
|||
#' Though scala functions has "col" function, we don't expose it in SparkR
|
||||
#' because we don't want to conflict with the "col" function in the R base
|
||||
#' package and we also have "column" function exported which is an alias of "col".
|
||||
#' @noRd
|
||||
col <- function(x) {
|
||||
column(callJStatic("org.apache.spark.sql.functions", "col", x))
|
||||
}
|
||||
|
||||
#' column
|
||||
#' Returns a Column based on the given column name
|
||||
#'
|
||||
#' Returns a Column based on the given column name.
|
||||
#'
|
||||
#' @rdname col
|
||||
#' @rdname column
|
||||
#' @name column
|
||||
#' @family normal_funcs
|
||||
#' @export
|
||||
|
@ -385,9 +387,9 @@ setMethod("cosh",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' count
|
||||
#' Returns the number of items in a group
|
||||
#'
|
||||
#' Aggregate function: returns the number of items in a group.
|
||||
#' Returns the number of items in a group. This is a column aggregate function.
|
||||
#'
|
||||
#' @rdname count
|
||||
#' @name count
|
||||
|
@ -1193,7 +1195,7 @@ setMethod("sha1",
|
|||
#'
|
||||
#' Computes the signum of the given value.
|
||||
#'
|
||||
#' @rdname signum
|
||||
#' @rdname sign
|
||||
#' @name signum
|
||||
#' @family math_funcs
|
||||
#' @export
|
||||
|
@ -1717,7 +1719,7 @@ setMethod("datediff", signature(y = "Column"),
|
|||
|
||||
#' hypot
|
||||
#'
|
||||
#' Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
|
||||
#' Computes "sqrt(a^2 + b^2)" without intermediate overflow or underflow.
|
||||
#'
|
||||
#' @rdname hypot
|
||||
#' @name hypot
|
||||
|
@ -1813,12 +1815,8 @@ setMethod("pmod", signature(y = "Column"),
|
|||
})
|
||||
|
||||
|
||||
#' Approx Count Distinct
|
||||
#'
|
||||
#' @family agg_funcs
|
||||
#' @rdname approxCountDistinct
|
||||
#' @name approxCountDistinct
|
||||
#' @return the approximate number of distinct items in a group.
|
||||
#' @export
|
||||
#' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
|
||||
#' @note approxCountDistinct(Column, numeric) since 1.4.0
|
||||
|
@ -1918,10 +1916,6 @@ setMethod("least",
|
|||
column(jc)
|
||||
})
|
||||
|
||||
#' ceiling
|
||||
#'
|
||||
#' Computes the ceiling of the given value.
|
||||
#'
|
||||
#' @rdname ceil
|
||||
#' @name ceiling
|
||||
#' @export
|
||||
|
@ -1933,11 +1927,7 @@ setMethod("ceiling",
|
|||
ceil(x)
|
||||
})
|
||||
|
||||
#' sign
|
||||
#'
|
||||
#' Computes the signum of the given value.
|
||||
#'
|
||||
#' @rdname signum
|
||||
#' @rdname sign
|
||||
#' @name sign
|
||||
#' @export
|
||||
#' @examples \dontrun{sign(df$c)}
|
||||
|
@ -1961,10 +1951,6 @@ setMethod("n_distinct", signature(x = "Column"),
|
|||
countDistinct(x, ...)
|
||||
})
|
||||
|
||||
#' n
|
||||
#'
|
||||
#' Aggregate function: returns the number of items in a group.
|
||||
#'
|
||||
#' @rdname count
|
||||
#' @name n
|
||||
#' @export
|
||||
|
|
|
@ -430,7 +430,7 @@ setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
|
|||
#' @export
|
||||
setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
|
||||
|
||||
#' @rdname schema
|
||||
#' @rdname columns
|
||||
#' @export
|
||||
setGeneric("columns", function(x) {standardGeneric("columns") })
|
||||
|
||||
|
@ -495,7 +495,7 @@ setGeneric("na.omit",
|
|||
standardGeneric("na.omit")
|
||||
})
|
||||
|
||||
#' @rdname schema
|
||||
#' @rdname dtypes
|
||||
#' @export
|
||||
setGeneric("dtypes", function(x) { standardGeneric("dtypes") })
|
||||
|
||||
|
@ -551,7 +551,7 @@ setGeneric("mutate", function(.data, ...) {standardGeneric("mutate") })
|
|||
#' @export
|
||||
setGeneric("orderBy", function(x, col, ...) { standardGeneric("orderBy") })
|
||||
|
||||
#' @rdname schema
|
||||
#' @rdname printSchema
|
||||
#' @export
|
||||
setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
|
||||
|
||||
|
@ -638,7 +638,7 @@ setGeneric("schema", function(x) { standardGeneric("schema") })
|
|||
#' @export
|
||||
setGeneric("select", function(x, col, ...) { standardGeneric("select") } )
|
||||
|
||||
#' @rdname select
|
||||
#' @rdname selectExpr
|
||||
#' @export
|
||||
setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") })
|
||||
|
||||
|
@ -693,67 +693,67 @@ setGeneric("randomSplit", function(x, weights, seed) { standardGeneric("randomSp
|
|||
|
||||
###################### Column Methods ##########################
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("asc", function(x) { standardGeneric("asc") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname between
|
||||
#' @export
|
||||
setGeneric("between", function(x, bounds) { standardGeneric("between") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname cast
|
||||
#' @export
|
||||
setGeneric("cast", function(x, dataType) { standardGeneric("cast") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("contains", function(x, ...) { standardGeneric("contains") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("desc", function(x) { standardGeneric("desc") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname endsWith
|
||||
#' @export
|
||||
setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("getField", function(x, ...) { standardGeneric("getField") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("getItem", function(x, ...) { standardGeneric("getItem") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("isNaN", function(x) { standardGeneric("isNaN") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("isNull", function(x) { standardGeneric("isNull") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("isNotNull", function(x) { standardGeneric("isNotNull") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("like", function(x, ...) { standardGeneric("like") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname columnfunctions
|
||||
#' @export
|
||||
setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname startsWith
|
||||
#' @export
|
||||
setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname when
|
||||
#' @export
|
||||
setGeneric("when", function(condition, value) { standardGeneric("when") })
|
||||
|
||||
#' @rdname column
|
||||
#' @rdname otherwise
|
||||
#' @export
|
||||
setGeneric("otherwise", function(x, value) { standardGeneric("otherwise") })
|
||||
|
||||
|
@ -825,7 +825,7 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
|
|||
#' @export
|
||||
setGeneric("ceil", function(x) { standardGeneric("ceil") })
|
||||
|
||||
#' @rdname col
|
||||
#' @rdname column
|
||||
#' @export
|
||||
setGeneric("column", function(x) { standardGeneric("column") })
|
||||
|
||||
|
@ -1119,7 +1119,7 @@ setGeneric("shiftRight", function(y, x) { standardGeneric("shiftRight") })
|
|||
#' @export
|
||||
setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUnsigned") })
|
||||
|
||||
#' @rdname signum
|
||||
#' @rdname sign
|
||||
#' @export
|
||||
setGeneric("signum", function(x) { standardGeneric("signum") })
|
||||
|
||||
|
|
|
@ -235,8 +235,6 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
|
|||
#' similarly to R package e1071's predict.
|
||||
#'
|
||||
#' @param object A fitted naive Bayes model
|
||||
#' @param newData SparkDataFrame for testing
|
||||
#' @return SparkDataFrame containing predicted labels in a column named "prediction"
|
||||
#' @rdname predict
|
||||
#' @export
|
||||
#' @examples
|
||||
|
@ -378,8 +376,6 @@ setMethod("summary", signature(object = "KMeansModel"),
|
|||
#' Makes predictions from a k-means model or a model produced by spark.kmeans().
|
||||
#'
|
||||
#' @param object A fitted k-means model
|
||||
#' @param newData SparkDataFrame for testing
|
||||
#' @return SparkDataFrame containing predicted labels in a column named "prediction"
|
||||
#' @rdname predict
|
||||
#' @export
|
||||
#' @examples
|
||||
|
@ -621,8 +617,6 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
|
|||
#' similarly to R package survival's predict.
|
||||
#'
|
||||
#' @param object A fitted AFT survival regression model
|
||||
#' @param newData SparkDataFrame for testing
|
||||
#' @return SparkDataFrame containing predicted labels in a column named "prediction"
|
||||
#' @rdname predict
|
||||
#' @export
|
||||
#' @examples
|
||||
|
|
|
@ -36,6 +36,8 @@ sparkR.stop <- function() {
|
|||
sparkR.session.stop()
|
||||
}
|
||||
|
||||
#' Stop the Spark Session and Spark Context
|
||||
#'
|
||||
#' Stop the Spark Session and Spark Context.
|
||||
#'
|
||||
#' Also terminates the backend this R session is connected to.
|
||||
|
@ -88,7 +90,7 @@ sparkR.session.stop <- function() {
|
|||
clearJobjs()
|
||||
}
|
||||
|
||||
#' (Deprecated) Initialize a new Spark Context.
|
||||
#' (Deprecated) Initialize a new Spark Context
|
||||
#'
|
||||
#' This function initializes a new SparkContext.
|
||||
#'
|
||||
|
@ -249,7 +251,7 @@ sparkR.sparkContext <- function(
|
|||
sc
|
||||
}
|
||||
|
||||
#' (Deprecated) Initialize a new SQLContext.
|
||||
#' (Deprecated) Initialize a new SQLContext
|
||||
#'
|
||||
#' This function creates a SparkContext from an existing JavaSparkContext and
|
||||
#' then uses it to initialize a new SQLContext
|
||||
|
@ -278,7 +280,7 @@ sparkRSQL.init <- function(jsc = NULL) {
|
|||
sparkR.session(enableHiveSupport = FALSE)
|
||||
}
|
||||
|
||||
#' (Deprecated) Initialize a new HiveContext.
|
||||
#' (Deprecated) Initialize a new HiveContext
|
||||
#'
|
||||
#' This function creates a HiveContext from an existing JavaSparkContext
|
||||
#'
|
||||
|
|
Loading…
Reference in a new issue