[SPARK-6824] Fill the docs for DataFrame API in SparkR
This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete " ' '" of " \#' ". Author: hqzizania <qian.huang@intel.com> Author: qhuang <qian.huang@intel.com> Closes #5969 from hqzizania/R1 and squashes the following commits: 6d27696 [qhuang] fixes in NAMESPACE eb4b095 [qhuang] remove more docs 6394579 [qhuang] remove RDD docs in generics.R 6813860 [hqzizania] Fill the docs for DataFrame API in SparkR 857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen
This commit is contained in:
parent
65afd3ce8b
commit
008a60dd37
|
@ -15,11 +15,11 @@ Suggests:
|
|||
Description: R frontend for Spark
|
||||
License: Apache License (== 2.0)
|
||||
Collate:
|
||||
'schema.R'
|
||||
'generics.R'
|
||||
'jobj.R'
|
||||
'RDD.R'
|
||||
'pairRDD.R'
|
||||
'schema.R'
|
||||
'column.R'
|
||||
'group.R'
|
||||
'DataFrame.R'
|
||||
|
|
|
@ -26,7 +26,6 @@ exportMethods("cache",
|
|||
"intersect",
|
||||
"isLocal",
|
||||
"join",
|
||||
"length",
|
||||
"limit",
|
||||
"orderBy",
|
||||
"names",
|
||||
|
@ -101,9 +100,6 @@ export("cacheTable",
|
|||
"tables",
|
||||
"uncacheTable")
|
||||
|
||||
export("sparkRSQL.init",
|
||||
"sparkRHive.init")
|
||||
|
||||
export("structField",
|
||||
"structField.jobj",
|
||||
"structField.character",
|
||||
|
|
|
@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
|
|||
|
||||
#' @rdname DataFrame
|
||||
#' @export
|
||||
#'
|
||||
#' @param sdf A Java object reference to the backing Scala DataFrame
|
||||
#' @param isCached TRUE if the dataFrame is cached
|
||||
dataFrame <- function(sdf, isCached = FALSE) {
|
||||
new("DataFrame", sdf, isCached)
|
||||
}
|
||||
|
@ -244,7 +247,7 @@ setMethod("columns",
|
|||
})
|
||||
|
||||
#' @rdname columns
|
||||
#' @export
|
||||
#' @aliases names,DataFrame,function-method
|
||||
setMethod("names",
|
||||
signature(x = "DataFrame"),
|
||||
function(x) {
|
||||
|
@ -399,23 +402,23 @@ setMethod("repartition",
|
|||
dataFrame(sdf)
|
||||
})
|
||||
|
||||
#' toJSON
|
||||
#'
|
||||
#' Convert the rows of a DataFrame into JSON objects and return an RDD where
|
||||
#' each element contains a JSON string.
|
||||
#'
|
||||
#' @param x A SparkSQL DataFrame
|
||||
#' @return A StringRRDD of JSON objects
|
||||
#' @rdname tojson
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' sqlCtx <- sparkRSQL.init(sc)
|
||||
#' path <- "path/to/file.json"
|
||||
#' df <- jsonFile(sqlCtx, path)
|
||||
#' newRDD <- toJSON(df)
|
||||
#'}
|
||||
# toJSON
|
||||
#
|
||||
# Convert the rows of a DataFrame into JSON objects and return an RDD where
|
||||
# each element contains a JSON string.
|
||||
#
|
||||
#@param x A SparkSQL DataFrame
|
||||
# @return A StringRRDD of JSON objects
|
||||
# @rdname tojson
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# sqlCtx <- sparkRSQL.init(sc)
|
||||
# path <- "path/to/file.json"
|
||||
# df <- jsonFile(sqlCtx, path)
|
||||
# newRDD <- toJSON(df)
|
||||
#}
|
||||
setMethod("toJSON",
|
||||
signature(x = "DataFrame"),
|
||||
function(x) {
|
||||
|
@ -578,8 +581,8 @@ setMethod("limit",
|
|||
dataFrame(res)
|
||||
})
|
||||
|
||||
# Take the first NUM rows of a DataFrame and return a the results as a data.frame
|
||||
|
||||
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
|
||||
#'
|
||||
#' @rdname take
|
||||
#' @export
|
||||
#' @examples
|
||||
|
@ -644,22 +647,22 @@ setMethod("first",
|
|||
take(x, 1)
|
||||
})
|
||||
|
||||
#' toRDD()
|
||||
#'
|
||||
#' Converts a Spark DataFrame to an RDD while preserving column names.
|
||||
#'
|
||||
#' @param x A Spark DataFrame
|
||||
#'
|
||||
#' @rdname DataFrame
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' sqlCtx <- sparkRSQL.init(sc)
|
||||
#' path <- "path/to/file.json"
|
||||
#' df <- jsonFile(sqlCtx, path)
|
||||
#' rdd <- toRDD(df)
|
||||
#' }
|
||||
# toRDD()
|
||||
#
|
||||
# Converts a Spark DataFrame to an RDD while preserving column names.
|
||||
#
|
||||
# @param x A Spark DataFrame
|
||||
#
|
||||
# @rdname DataFrame
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# sqlCtx <- sparkRSQL.init(sc)
|
||||
# path <- "path/to/file.json"
|
||||
# df <- jsonFile(sqlCtx, path)
|
||||
# rdd <- toRDD(df)
|
||||
# }
|
||||
setMethod("toRDD",
|
||||
signature(x = "DataFrame"),
|
||||
function(x) {
|
||||
|
@ -706,6 +709,7 @@ setMethod("groupBy",
|
|||
#'
|
||||
#' Compute aggregates by specifying a list of columns
|
||||
#'
|
||||
#' @param x a DataFrame
|
||||
#' @rdname DataFrame
|
||||
#' @export
|
||||
setMethod("agg",
|
||||
|
@ -721,7 +725,7 @@ setMethod("agg",
|
|||
# the requested map function. #
|
||||
###################################################################################
|
||||
|
||||
#' @rdname lapply
|
||||
# @rdname lapply
|
||||
setMethod("lapply",
|
||||
signature(X = "DataFrame", FUN = "function"),
|
||||
function(X, FUN) {
|
||||
|
@ -729,14 +733,14 @@ setMethod("lapply",
|
|||
lapply(rdd, FUN)
|
||||
})
|
||||
|
||||
#' @rdname lapply
|
||||
# @rdname lapply
|
||||
setMethod("map",
|
||||
signature(X = "DataFrame", FUN = "function"),
|
||||
function(X, FUN) {
|
||||
lapply(X, FUN)
|
||||
})
|
||||
|
||||
#' @rdname flatMap
|
||||
# @rdname flatMap
|
||||
setMethod("flatMap",
|
||||
signature(X = "DataFrame", FUN = "function"),
|
||||
function(X, FUN) {
|
||||
|
@ -744,7 +748,7 @@ setMethod("flatMap",
|
|||
flatMap(rdd, FUN)
|
||||
})
|
||||
|
||||
#' @rdname lapplyPartition
|
||||
# @rdname lapplyPartition
|
||||
setMethod("lapplyPartition",
|
||||
signature(X = "DataFrame", FUN = "function"),
|
||||
function(X, FUN) {
|
||||
|
@ -752,14 +756,14 @@ setMethod("lapplyPartition",
|
|||
lapplyPartition(rdd, FUN)
|
||||
})
|
||||
|
||||
#' @rdname lapplyPartition
|
||||
# @rdname lapplyPartition
|
||||
setMethod("mapPartitions",
|
||||
signature(X = "DataFrame", FUN = "function"),
|
||||
function(X, FUN) {
|
||||
lapplyPartition(X, FUN)
|
||||
})
|
||||
|
||||
#' @rdname foreach
|
||||
# @rdname foreach
|
||||
setMethod("foreach",
|
||||
signature(x = "DataFrame", func = "function"),
|
||||
function(x, func) {
|
||||
|
@ -767,7 +771,7 @@ setMethod("foreach",
|
|||
foreach(rdd, func)
|
||||
})
|
||||
|
||||
#' @rdname foreach
|
||||
# @rdname foreach
|
||||
setMethod("foreachPartition",
|
||||
signature(x = "DataFrame", func = "function"),
|
||||
function(x, func) {
|
||||
|
@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
|
|||
getColumn(x, name)
|
||||
})
|
||||
|
||||
#' @rdname select
|
||||
setMethod("$<-", signature(x = "DataFrame"),
|
||||
function(x, name, value) {
|
||||
stopifnot(class(value) == "Column" || is.null(value))
|
||||
|
@ -1009,7 +1014,7 @@ setMethod("sortDF",
|
|||
})
|
||||
|
||||
#' @rdname sortDF
|
||||
#' @export
|
||||
#' @aliases orderBy,DataFrame,function-method
|
||||
setMethod("orderBy",
|
||||
signature(x = "DataFrame", col = "characterOrColumn"),
|
||||
function(x, col) {
|
||||
|
@ -1046,7 +1051,7 @@ setMethod("filter",
|
|||
})
|
||||
|
||||
#' @rdname filter
|
||||
#' @export
|
||||
#' @aliases where,DataFrame,function-method
|
||||
setMethod("where",
|
||||
signature(x = "DataFrame", condition = "characterOrColumn"),
|
||||
function(x, condition) {
|
||||
|
|
1546
R/pkg/R/RDD.R
1546
R/pkg/R/RDD.R
File diff suppressed because it is too large
Load diff
|
@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
|
|||
dataFrame(sdf)
|
||||
}
|
||||
|
||||
#' toDF
|
||||
#'
|
||||
#' Converts an RDD to a DataFrame by infer the types.
|
||||
#'
|
||||
#' @param x An RDD
|
||||
#'
|
||||
#' @rdname DataFrame
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' sqlCtx <- sparkRSQL.init(sc)
|
||||
#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
|
||||
#' df <- toDF(rdd)
|
||||
#' }
|
||||
# toDF
|
||||
#
|
||||
# Converts an RDD to a DataFrame by infer the types.
|
||||
#
|
||||
# @param x An RDD
|
||||
#
|
||||
# @rdname DataFrame
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# sqlCtx <- sparkRSQL.init(sc)
|
||||
# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
|
||||
# df <- toDF(rdd)
|
||||
# }
|
||||
|
||||
setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })
|
||||
|
||||
|
@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
|
|||
}
|
||||
|
||||
|
||||
#' JSON RDD
|
||||
#'
|
||||
#' Loads an RDD storing one JSON object per string as a DataFrame.
|
||||
#'
|
||||
#' @param sqlCtx SQLContext to use
|
||||
#' @param rdd An RDD of JSON string
|
||||
#' @param schema A StructType object to use as schema
|
||||
#' @param samplingRatio The ratio of simpling used to infer the schema
|
||||
#' @return A DataFrame
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' sqlCtx <- sparkRSQL.init(sc)
|
||||
#' rdd <- texFile(sc, "path/to/json")
|
||||
#' df <- jsonRDD(sqlCtx, rdd)
|
||||
#' }
|
||||
# JSON RDD
|
||||
#
|
||||
# Loads an RDD storing one JSON object per string as a DataFrame.
|
||||
#
|
||||
# @param sqlCtx SQLContext to use
|
||||
# @param rdd An RDD of JSON string
|
||||
# @param schema A StructType object to use as schema
|
||||
# @param samplingRatio The ratio of simpling used to infer the schema
|
||||
# @return A DataFrame
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# sqlCtx <- sparkRSQL.init(sc)
|
||||
# rdd <- texFile(sc, "path/to/json")
|
||||
# df <- jsonRDD(sqlCtx, rdd)
|
||||
# }
|
||||
|
||||
# TODO: support schema
|
||||
jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {
|
||||
|
|
|
@ -23,21 +23,21 @@
|
|||
.broadcastValues <- new.env()
|
||||
.broadcastIdToName <- new.env()
|
||||
|
||||
#' @title S4 class that represents a Broadcast variable
|
||||
#' @description Broadcast variables can be created using the broadcast
|
||||
#' function from a \code{SparkContext}.
|
||||
#' @rdname broadcast-class
|
||||
#' @seealso broadcast
|
||||
#'
|
||||
#' @param id Id of the backing Spark broadcast variable
|
||||
#' @export
|
||||
# @title S4 class that represents a Broadcast variable
|
||||
# @description Broadcast variables can be created using the broadcast
|
||||
# function from a \code{SparkContext}.
|
||||
# @rdname broadcast-class
|
||||
# @seealso broadcast
|
||||
#
|
||||
# @param id Id of the backing Spark broadcast variable
|
||||
# @export
|
||||
setClass("Broadcast", slots = list(id = "character"))
|
||||
|
||||
#' @rdname broadcast-class
|
||||
#' @param value Value of the broadcast variable
|
||||
#' @param jBroadcastRef reference to the backing Java broadcast object
|
||||
#' @param objName name of broadcasted object
|
||||
#' @export
|
||||
# @rdname broadcast-class
|
||||
# @param value Value of the broadcast variable
|
||||
# @param jBroadcastRef reference to the backing Java broadcast object
|
||||
# @param objName name of broadcasted object
|
||||
# @export
|
||||
Broadcast <- function(id, value, jBroadcastRef, objName) {
|
||||
.broadcastValues[[id]] <- value
|
||||
.broadcastNames[[as.character(objName)]] <- jBroadcastRef
|
||||
|
@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) {
|
|||
new("Broadcast", id = id)
|
||||
}
|
||||
|
||||
#' @description
|
||||
#' \code{value} can be used to get the value of a broadcast variable inside
|
||||
#' a distributed function.
|
||||
#'
|
||||
#' @param bcast The broadcast variable to get
|
||||
#' @rdname broadcast
|
||||
#' @aliases value,Broadcast-method
|
||||
# @description
|
||||
# \code{value} can be used to get the value of a broadcast variable inside
|
||||
# a distributed function.
|
||||
#
|
||||
# @param bcast The broadcast variable to get
|
||||
# @rdname broadcast
|
||||
# @aliases value,Broadcast-method
|
||||
setMethod("value",
|
||||
signature(bcast = "Broadcast"),
|
||||
function(bcast) {
|
||||
|
@ -62,24 +62,24 @@ setMethod("value",
|
|||
}
|
||||
})
|
||||
|
||||
#' Internal function to set values of a broadcast variable.
|
||||
#'
|
||||
#' This function is used internally by Spark to set the value of a broadcast
|
||||
#' variable on workers. Not intended for use outside the package.
|
||||
#'
|
||||
#' @rdname broadcast-internal
|
||||
#' @seealso broadcast, value
|
||||
# Internal function to set values of a broadcast variable.
|
||||
#
|
||||
# This function is used internally by Spark to set the value of a broadcast
|
||||
# variable on workers. Not intended for use outside the package.
|
||||
#
|
||||
# @rdname broadcast-internal
|
||||
# @seealso broadcast, value
|
||||
|
||||
#' @param bcastId The id of broadcast variable to set
|
||||
#' @param value The value to be set
|
||||
#' @export
|
||||
# @param bcastId The id of broadcast variable to set
|
||||
# @param value The value to be set
|
||||
# @export
|
||||
setBroadcastValue <- function(bcastId, value) {
|
||||
bcastIdStr <- as.character(bcastId)
|
||||
.broadcastValues[[bcastIdStr]] <- value
|
||||
}
|
||||
|
||||
#' Helper function to clear the list of broadcast variables we know about
|
||||
#' Should be called when the SparkR JVM backend is shutdown
|
||||
# Helper function to clear the list of broadcast variables we know about
|
||||
# Should be called when the SparkR JVM backend is shutdown
|
||||
clearBroadcastVariables <- function() {
|
||||
bcasts <- ls(.broadcastNames)
|
||||
rm(list = bcasts, envir = .broadcastNames)
|
||||
|
|
|
@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) {
|
|||
as.integer(minPartitions)
|
||||
}
|
||||
|
||||
#' Create an RDD from a text file.
|
||||
#'
|
||||
#' This function reads a text file from HDFS, a local file system (available on all
|
||||
#' nodes), or any Hadoop-supported file system URI, and creates an
|
||||
#' RDD of strings from it.
|
||||
#'
|
||||
#' @param sc SparkContext to use
|
||||
#' @param path Path of file to read. A vector of multiple paths is allowed.
|
||||
#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
|
||||
#' value is chosen based on available parallelism.
|
||||
#' @return RDD where each item is of type \code{character}
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' lines <- textFile(sc, "myfile.txt")
|
||||
#'}
|
||||
# Create an RDD from a text file.
|
||||
#
|
||||
# This function reads a text file from HDFS, a local file system (available on all
|
||||
# nodes), or any Hadoop-supported file system URI, and creates an
|
||||
# RDD of strings from it.
|
||||
#
|
||||
# @param sc SparkContext to use
|
||||
# @param path Path of file to read. A vector of multiple paths is allowed.
|
||||
# @param minPartitions Minimum number of partitions to be created. If NULL, the default
|
||||
# value is chosen based on available parallelism.
|
||||
# @return RDD where each item is of type \code{character}
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# lines <- textFile(sc, "myfile.txt")
|
||||
#}
|
||||
textFile <- function(sc, path, minPartitions = NULL) {
|
||||
# Allow the user to have a more flexible definiton of the text file path
|
||||
path <- suppressWarnings(normalizePath(path))
|
||||
#' Convert a string vector of paths to a string containing comma separated paths
|
||||
# Convert a string vector of paths to a string containing comma separated paths
|
||||
path <- paste(path, collapse = ",")
|
||||
|
||||
jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions))
|
||||
|
@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) {
|
|||
RDD(jrdd, "string")
|
||||
}
|
||||
|
||||
#' Load an RDD saved as a SequenceFile containing serialized objects.
|
||||
#'
|
||||
#' The file to be loaded should be one that was previously generated by calling
|
||||
#' saveAsObjectFile() of the RDD class.
|
||||
#'
|
||||
#' @param sc SparkContext to use
|
||||
#' @param path Path of file to read. A vector of multiple paths is allowed.
|
||||
#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
|
||||
#' value is chosen based on available parallelism.
|
||||
#' @return RDD containing serialized R objects.
|
||||
#' @seealso saveAsObjectFile
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' rdd <- objectFile(sc, "myfile")
|
||||
#'}
|
||||
# Load an RDD saved as a SequenceFile containing serialized objects.
|
||||
#
|
||||
# The file to be loaded should be one that was previously generated by calling
|
||||
# saveAsObjectFile() of the RDD class.
|
||||
#
|
||||
# @param sc SparkContext to use
|
||||
# @param path Path of file to read. A vector of multiple paths is allowed.
|
||||
# @param minPartitions Minimum number of partitions to be created. If NULL, the default
|
||||
# value is chosen based on available parallelism.
|
||||
# @return RDD containing serialized R objects.
|
||||
# @seealso saveAsObjectFile
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# rdd <- objectFile(sc, "myfile")
|
||||
#}
|
||||
objectFile <- function(sc, path, minPartitions = NULL) {
|
||||
# Allow the user to have a more flexible definiton of the text file path
|
||||
path <- suppressWarnings(normalizePath(path))
|
||||
#' Convert a string vector of paths to a string containing comma separated paths
|
||||
# Convert a string vector of paths to a string containing comma separated paths
|
||||
path <- paste(path, collapse = ",")
|
||||
|
||||
jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions))
|
||||
|
@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) {
|
|||
RDD(jrdd, "byte")
|
||||
}
|
||||
|
||||
#' Create an RDD from a homogeneous list or vector.
|
||||
#'
|
||||
#' This function creates an RDD from a local homogeneous list in R. The elements
|
||||
#' in the list are split into \code{numSlices} slices and distributed to nodes
|
||||
#' in the cluster.
|
||||
#'
|
||||
#' @param sc SparkContext to use
|
||||
#' @param coll collection to parallelize
|
||||
#' @param numSlices number of partitions to create in the RDD
|
||||
#' @return an RDD created from this collection
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' rdd <- parallelize(sc, 1:10, 2)
|
||||
#' # The RDD should contain 10 elements
|
||||
#' length(rdd)
|
||||
#'}
|
||||
# Create an RDD from a homogeneous list or vector.
|
||||
#
|
||||
# This function creates an RDD from a local homogeneous list in R. The elements
|
||||
# in the list are split into \code{numSlices} slices and distributed to nodes
|
||||
# in the cluster.
|
||||
#
|
||||
# @param sc SparkContext to use
|
||||
# @param coll collection to parallelize
|
||||
# @param numSlices number of partitions to create in the RDD
|
||||
# @return an RDD created from this collection
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# rdd <- parallelize(sc, 1:10, 2)
|
||||
# # The RDD should contain 10 elements
|
||||
# length(rdd)
|
||||
#}
|
||||
parallelize <- function(sc, coll, numSlices = 1) {
|
||||
# TODO: bound/safeguard numSlices
|
||||
# TODO: unit tests for if the split works for all primitives
|
||||
|
@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
|
|||
RDD(jrdd, "byte")
|
||||
}
|
||||
|
||||
#' Include this specified package on all workers
|
||||
#'
|
||||
#' This function can be used to include a package on all workers before the
|
||||
#' user's code is executed. This is useful in scenarios where other R package
|
||||
#' functions are used in a function passed to functions like \code{lapply}.
|
||||
#' NOTE: The package is assumed to be installed on every node in the Spark
|
||||
#' cluster.
|
||||
#'
|
||||
#' @param sc SparkContext to use
|
||||
#' @param pkg Package name
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' library(Matrix)
|
||||
#'
|
||||
#' sc <- sparkR.init()
|
||||
#' # Include the matrix library we will be using
|
||||
#' includePackage(sc, Matrix)
|
||||
#'
|
||||
#' generateSparse <- function(x) {
|
||||
#' sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
|
||||
#' }
|
||||
#'
|
||||
#' rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
|
||||
#' collect(rdd)
|
||||
#'}
|
||||
# Include this specified package on all workers
|
||||
#
|
||||
# This function can be used to include a package on all workers before the
|
||||
# user's code is executed. This is useful in scenarios where other R package
|
||||
# functions are used in a function passed to functions like \code{lapply}.
|
||||
# NOTE: The package is assumed to be installed on every node in the Spark
|
||||
# cluster.
|
||||
#
|
||||
# @param sc SparkContext to use
|
||||
# @param pkg Package name
|
||||
#
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# library(Matrix)
|
||||
#
|
||||
# sc <- sparkR.init()
|
||||
# # Include the matrix library we will be using
|
||||
# includePackage(sc, Matrix)
|
||||
#
|
||||
# generateSparse <- function(x) {
|
||||
# sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
|
||||
# }
|
||||
#
|
||||
# rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
|
||||
# collect(rdd)
|
||||
#}
|
||||
includePackage <- function(sc, pkg) {
|
||||
pkg <- as.character(substitute(pkg))
|
||||
if (exists(".packages", .sparkREnv)) {
|
||||
|
@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
|
|||
.sparkREnv$.packages <- packages
|
||||
}
|
||||
|
||||
#' @title Broadcast a variable to all workers
|
||||
#'
|
||||
#' @description
|
||||
#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
|
||||
#' object for reading it in distributed functions.
|
||||
#'
|
||||
#' @param sc Spark Context to use
|
||||
#' @param object Object to be broadcast
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' rdd <- parallelize(sc, 1:2, 2L)
|
||||
#'
|
||||
#' # Large Matrix object that we want to broadcast
|
||||
#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
|
||||
#' randomMatBr <- broadcast(sc, randomMat)
|
||||
#'
|
||||
#' # Use the broadcast variable inside the function
|
||||
#' useBroadcast <- function(x) {
|
||||
#' sum(value(randomMatBr) * x)
|
||||
#' }
|
||||
#' sumRDD <- lapply(rdd, useBroadcast)
|
||||
#'}
|
||||
# @title Broadcast a variable to all workers
|
||||
#
|
||||
# @description
|
||||
# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
|
||||
# object for reading it in distributed functions.
|
||||
#
|
||||
# @param sc Spark Context to use
|
||||
# @param object Object to be broadcast
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# rdd <- parallelize(sc, 1:2, 2L)
|
||||
#
|
||||
# # Large Matrix object that we want to broadcast
|
||||
# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
|
||||
# randomMatBr <- broadcast(sc, randomMat)
|
||||
#
|
||||
# # Use the broadcast variable inside the function
|
||||
# useBroadcast <- function(x) {
|
||||
# sum(value(randomMatBr) * x)
|
||||
# }
|
||||
# sumRDD <- lapply(rdd, useBroadcast)
|
||||
#}
|
||||
broadcast <- function(sc, object) {
|
||||
objName <- as.character(substitute(object))
|
||||
serializedObj <- serialize(object, connection = NULL)
|
||||
|
@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
|
|||
Broadcast(id, object, jBroadcast, objName)
|
||||
}
|
||||
|
||||
#' @title Set the checkpoint directory
|
||||
#'
|
||||
#' Set the directory under which RDDs are going to be checkpointed. The
|
||||
#' directory must be a HDFS path if running on a cluster.
|
||||
#'
|
||||
#' @param sc Spark Context to use
|
||||
#' @param dirName Directory path
|
||||
#' @export
|
||||
#' @examples
|
||||
#'\dontrun{
|
||||
#' sc <- sparkR.init()
|
||||
#' setCheckpointDir(sc, "~/checkpoint")
|
||||
#' rdd <- parallelize(sc, 1:2, 2L)
|
||||
#' checkpoint(rdd)
|
||||
#'}
|
||||
# @title Set the checkpoint directory
|
||||
#
|
||||
# Set the directory under which RDDs are going to be checkpointed. The
|
||||
# directory must be a HDFS path if running on a cluster.
|
||||
#
|
||||
# @param sc Spark Context to use
|
||||
# @param dirName Directory path
|
||||
# @export
|
||||
# @examples
|
||||
#\dontrun{
|
||||
# sc <- sparkR.init()
|
||||
# setCheckpointDir(sc, "~/checkpoint")
|
||||
# rdd <- parallelize(sc, 1:2, 2L)
|
||||
# checkpoint(rdd)
|
||||
#}
|
||||
setCheckpointDir <- function(sc, dirName) {
|
||||
invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
|
||||
}
|
||||
|
|
|
@ -17,353 +17,353 @@
|
|||
|
||||
############ RDD Actions and Transformations ############
|
||||
|
||||
#' @rdname aggregateRDD
|
||||
#' @seealso reduce
|
||||
#' @export
|
||||
# @rdname aggregateRDD
|
||||
# @seealso reduce
|
||||
# @export
|
||||
setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
|
||||
|
||||
#' @rdname cache-methods
|
||||
#' @export
|
||||
# @rdname cache-methods
|
||||
# @export
|
||||
setGeneric("cache", function(x) { standardGeneric("cache") })
|
||||
|
||||
#' @rdname coalesce
|
||||
#' @seealso repartition
|
||||
#' @export
|
||||
# @rdname coalesce
|
||||
# @seealso repartition
|
||||
# @export
|
||||
setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") })
|
||||
|
||||
#' @rdname checkpoint-methods
|
||||
#' @export
|
||||
# @rdname checkpoint-methods
|
||||
# @export
|
||||
setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") })
|
||||
|
||||
#' @rdname collect-methods
|
||||
#' @export
|
||||
# @rdname collect-methods
|
||||
# @export
|
||||
setGeneric("collect", function(x, ...) { standardGeneric("collect") })
|
||||
|
||||
#' @rdname collect-methods
|
||||
#' @export
|
||||
# @rdname collect-methods
|
||||
# @export
|
||||
setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") })
|
||||
|
||||
#' @rdname collect-methods
|
||||
#' @export
|
||||
# @rdname collect-methods
|
||||
# @export
|
||||
setGeneric("collectPartition",
|
||||
function(x, partitionId) {
|
||||
standardGeneric("collectPartition")
|
||||
})
|
||||
|
||||
#' @rdname count
|
||||
#' @export
|
||||
# @rdname count
|
||||
# @export
|
||||
setGeneric("count", function(x) { standardGeneric("count") })
|
||||
|
||||
#' @rdname countByValue
|
||||
#' @export
|
||||
# @rdname countByValue
|
||||
# @export
|
||||
setGeneric("countByValue", function(x) { standardGeneric("countByValue") })
|
||||
|
||||
#' @rdname distinct
|
||||
#' @export
|
||||
# @rdname distinct
|
||||
# @export
|
||||
setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") })
|
||||
|
||||
#' @rdname filterRDD
|
||||
#' @export
|
||||
# @rdname filterRDD
|
||||
# @export
|
||||
setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
|
||||
|
||||
#' @rdname first
|
||||
#' @export
|
||||
# @rdname first
|
||||
# @export
|
||||
setGeneric("first", function(x) { standardGeneric("first") })
|
||||
|
||||
#' @rdname flatMap
|
||||
#' @export
|
||||
# @rdname flatMap
|
||||
# @export
|
||||
setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") })
|
||||
|
||||
#' @rdname fold
|
||||
#' @seealso reduce
|
||||
#' @export
|
||||
# @rdname fold
|
||||
# @seealso reduce
|
||||
# @export
|
||||
setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") })
|
||||
|
||||
#' @rdname foreach
|
||||
#' @export
|
||||
# @rdname foreach
|
||||
# @export
|
||||
setGeneric("foreach", function(x, func) { standardGeneric("foreach") })
|
||||
|
||||
#' @rdname foreach
|
||||
#' @export
|
||||
# @rdname foreach
|
||||
# @export
|
||||
setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") })
|
||||
|
||||
# The jrdd accessor function.
|
||||
setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") })
|
||||
|
||||
#' @rdname glom
|
||||
#' @export
|
||||
# @rdname glom
|
||||
# @export
|
||||
setGeneric("glom", function(x) { standardGeneric("glom") })
|
||||
|
||||
#' @rdname keyBy
|
||||
#' @export
|
||||
# @rdname keyBy
|
||||
# @export
|
||||
setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") })
|
||||
|
||||
#' @rdname lapplyPartition
|
||||
#' @export
|
||||
# @rdname lapplyPartition
|
||||
# @export
|
||||
setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") })
|
||||
|
||||
#' @rdname lapplyPartitionsWithIndex
|
||||
#' @export
|
||||
# @rdname lapplyPartitionsWithIndex
|
||||
# @export
|
||||
setGeneric("lapplyPartitionsWithIndex",
|
||||
function(X, FUN) {
|
||||
standardGeneric("lapplyPartitionsWithIndex")
|
||||
})
|
||||
|
||||
#' @rdname lapply
|
||||
#' @export
|
||||
# @rdname lapply
|
||||
# @export
|
||||
setGeneric("map", function(X, FUN) { standardGeneric("map") })
|
||||
|
||||
#' @rdname lapplyPartition
|
||||
#' @export
|
||||
# @rdname lapplyPartition
|
||||
# @export
|
||||
setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") })
|
||||
|
||||
#' @rdname lapplyPartitionsWithIndex
|
||||
#' @export
|
||||
# @rdname lapplyPartitionsWithIndex
|
||||
# @export
|
||||
setGeneric("mapPartitionsWithIndex",
|
||||
function(X, FUN) { standardGeneric("mapPartitionsWithIndex") })
|
||||
|
||||
#' @rdname maximum
|
||||
#' @export
|
||||
# @rdname maximum
|
||||
# @export
|
||||
setGeneric("maximum", function(x) { standardGeneric("maximum") })
|
||||
|
||||
#' @rdname minimum
|
||||
#' @export
|
||||
# @rdname minimum
|
||||
# @export
|
||||
setGeneric("minimum", function(x) { standardGeneric("minimum") })
|
||||
|
||||
#' @rdname sumRDD
|
||||
#' @export
|
||||
# @rdname sumRDD
|
||||
# @export
|
||||
setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
|
||||
|
||||
#' @rdname name
|
||||
#' @export
|
||||
# @rdname name
|
||||
# @export
|
||||
setGeneric("name", function(x) { standardGeneric("name") })
|
||||
|
||||
#' @rdname numPartitions
|
||||
#' @export
|
||||
# @rdname numPartitions
|
||||
# @export
|
||||
setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })
|
||||
|
||||
#' @rdname persist
|
||||
#' @export
|
||||
# @rdname persist
|
||||
# @export
|
||||
setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
|
||||
|
||||
#' @rdname pipeRDD
|
||||
#' @export
|
||||
# @rdname pipeRDD
|
||||
# @export
|
||||
setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")})
|
||||
|
||||
#' @rdname reduce
|
||||
#' @export
|
||||
# @rdname reduce
|
||||
# @export
|
||||
setGeneric("reduce", function(x, func) { standardGeneric("reduce") })
|
||||
|
||||
#' @rdname repartition
|
||||
#' @seealso coalesce
|
||||
#' @export
|
||||
# @rdname repartition
|
||||
# @seealso coalesce
|
||||
# @export
|
||||
setGeneric("repartition", function(x, numPartitions) { standardGeneric("repartition") })
|
||||
|
||||
#' @rdname sampleRDD
|
||||
#' @export
|
||||
# @rdname sampleRDD
|
||||
# @export
|
||||
setGeneric("sampleRDD",
|
||||
function(x, withReplacement, fraction, seed) {
|
||||
standardGeneric("sampleRDD")
|
||||
})
|
||||
|
||||
#' @rdname saveAsObjectFile
|
||||
#' @seealso objectFile
|
||||
#' @export
|
||||
# @rdname saveAsObjectFile
|
||||
# @seealso objectFile
|
||||
# @export
|
||||
setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") })
|
||||
|
||||
#' @rdname saveAsTextFile
|
||||
#' @export
|
||||
# @rdname saveAsTextFile
|
||||
# @export
|
||||
setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") })
|
||||
|
||||
#' @rdname setName
|
||||
#' @export
|
||||
# @rdname setName
|
||||
# @export
|
||||
setGeneric("setName", function(x, name) { standardGeneric("setName") })
|
||||
|
||||
#' @rdname sortBy
|
||||
#' @export
|
||||
# @rdname sortBy
|
||||
# @export
|
||||
setGeneric("sortBy",
|
||||
function(x, func, ascending = TRUE, numPartitions = 1) {
|
||||
standardGeneric("sortBy")
|
||||
})
|
||||
|
||||
#' @rdname take
|
||||
#' @export
|
||||
# @rdname take
|
||||
# @export
|
||||
setGeneric("take", function(x, num) { standardGeneric("take") })
|
||||
|
||||
#' @rdname takeOrdered
|
||||
#' @export
|
||||
# @rdname takeOrdered
|
||||
# @export
|
||||
setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") })
|
||||
|
||||
#' @rdname takeSample
|
||||
#' @export
|
||||
# @rdname takeSample
|
||||
# @export
|
||||
setGeneric("takeSample",
|
||||
function(x, withReplacement, num, seed) {
|
||||
standardGeneric("takeSample")
|
||||
})
|
||||
|
||||
#' @rdname top
|
||||
#' @export
|
||||
# @rdname top
|
||||
# @export
|
||||
setGeneric("top", function(x, num) { standardGeneric("top") })
|
||||
|
||||
#' @rdname unionRDD
|
||||
#' @export
|
||||
# @rdname unionRDD
|
||||
# @export
|
||||
setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") })
|
||||
|
||||
#' @rdname unpersist-methods
|
||||
#' @export
|
||||
# @rdname unpersist-methods
|
||||
# @export
|
||||
setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })
|
||||
|
||||
#' @rdname zipRDD
|
||||
#' @export
|
||||
# @rdname zipRDD
|
||||
# @export
|
||||
setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
|
||||
|
||||
#' @rdname zipRDD
|
||||
#' @export
|
||||
# @rdname zipRDD
|
||||
# @export
|
||||
setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") },
|
||||
signature = "...")
|
||||
|
||||
#' @rdname zipWithIndex
|
||||
#' @seealso zipWithUniqueId
|
||||
#' @export
|
||||
# @rdname zipWithIndex
|
||||
# @seealso zipWithUniqueId
|
||||
# @export
|
||||
setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") })
|
||||
|
||||
#' @rdname zipWithUniqueId
|
||||
#' @seealso zipWithIndex
|
||||
#' @export
|
||||
# @rdname zipWithUniqueId
|
||||
# @seealso zipWithIndex
|
||||
# @export
|
||||
setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") })
|
||||
|
||||
|
||||
############ Binary Functions #############
|
||||
|
||||
#' @rdname cartesian
|
||||
#' @export
|
||||
# @rdname cartesian
|
||||
# @export
|
||||
setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") })
|
||||
|
||||
#' @rdname countByKey
|
||||
#' @export
|
||||
# @rdname countByKey
|
||||
# @export
|
||||
setGeneric("countByKey", function(x) { standardGeneric("countByKey") })
|
||||
|
||||
#' @rdname flatMapValues
|
||||
#' @export
|
||||
# @rdname flatMapValues
|
||||
# @export
|
||||
setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") })
|
||||
|
||||
#' @rdname intersection
|
||||
#' @export
|
||||
# @rdname intersection
|
||||
# @export
|
||||
setGeneric("intersection", function(x, other, numPartitions = 1) {
|
||||
standardGeneric("intersection") })
|
||||
|
||||
#' @rdname keys
|
||||
#' @export
|
||||
# @rdname keys
|
||||
# @export
|
||||
setGeneric("keys", function(x) { standardGeneric("keys") })
|
||||
|
||||
#' @rdname lookup
|
||||
#' @export
|
||||
# @rdname lookup
|
||||
# @export
|
||||
setGeneric("lookup", function(x, key) { standardGeneric("lookup") })
|
||||
|
||||
#' @rdname mapValues
|
||||
#' @export
|
||||
# @rdname mapValues
|
||||
# @export
|
||||
setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") })
|
||||
|
||||
#' @rdname sampleByKey
|
||||
#' @export
|
||||
# @rdname sampleByKey
|
||||
# @export
|
||||
setGeneric("sampleByKey",
|
||||
function(x, withReplacement, fractions, seed) {
|
||||
standardGeneric("sampleByKey")
|
||||
})
|
||||
|
||||
#' @rdname values
|
||||
#' @export
|
||||
# @rdname values
|
||||
# @export
|
||||
setGeneric("values", function(x) { standardGeneric("values") })
|
||||
|
||||
|
||||
############ Shuffle Functions ############
|
||||
|
||||
#' @rdname aggregateByKey
|
||||
#' @seealso foldByKey, combineByKey
|
||||
#' @export
|
||||
# @rdname aggregateByKey
|
||||
# @seealso foldByKey, combineByKey
|
||||
# @export
|
||||
setGeneric("aggregateByKey",
|
||||
function(x, zeroValue, seqOp, combOp, numPartitions) {
|
||||
standardGeneric("aggregateByKey")
|
||||
})
|
||||
|
||||
#' @rdname cogroup
|
||||
#' @export
|
||||
# @rdname cogroup
|
||||
# @export
|
||||
setGeneric("cogroup",
|
||||
function(..., numPartitions) {
|
||||
standardGeneric("cogroup")
|
||||
},
|
||||
signature = "...")
|
||||
|
||||
#' @rdname combineByKey
|
||||
#' @seealso groupByKey, reduceByKey
|
||||
#' @export
|
||||
# @rdname combineByKey
|
||||
# @seealso groupByKey, reduceByKey
|
||||
# @export
|
||||
setGeneric("combineByKey",
|
||||
function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) {
|
||||
standardGeneric("combineByKey")
|
||||
})
|
||||
|
||||
#' @rdname foldByKey
|
||||
#' @seealso aggregateByKey, combineByKey
|
||||
#' @export
|
||||
# @rdname foldByKey
|
||||
# @seealso aggregateByKey, combineByKey
|
||||
# @export
|
||||
setGeneric("foldByKey",
|
||||
function(x, zeroValue, func, numPartitions) {
|
||||
standardGeneric("foldByKey")
|
||||
})
|
||||
|
||||
#' @rdname join-methods
|
||||
#' @export
|
||||
# @rdname join-methods
|
||||
# @export
|
||||
setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") })
|
||||
|
||||
#' @rdname groupByKey
|
||||
#' @seealso reduceByKey
|
||||
#' @export
|
||||
# @rdname groupByKey
|
||||
# @seealso reduceByKey
|
||||
# @export
|
||||
setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") })
|
||||
|
||||
#' @rdname join-methods
|
||||
#' @export
|
||||
# @rdname join-methods
|
||||
# @export
|
||||
setGeneric("join", function(x, y, ...) { standardGeneric("join") })
|
||||
|
||||
#' @rdname join-methods
|
||||
#' @export
|
||||
# @rdname join-methods
|
||||
# @export
|
||||
setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") })
|
||||
|
||||
#' @rdname partitionBy
|
||||
#' @export
|
||||
# @rdname partitionBy
|
||||
# @export
|
||||
setGeneric("partitionBy", function(x, numPartitions, ...) { standardGeneric("partitionBy") })
|
||||
|
||||
#' @rdname reduceByKey
|
||||
#' @seealso groupByKey
|
||||
#' @export
|
||||
# @rdname reduceByKey
|
||||
# @seealso groupByKey
|
||||
# @export
|
||||
setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")})
|
||||
|
||||
#' @rdname reduceByKeyLocally
|
||||
#' @seealso reduceByKey
|
||||
#' @export
|
||||
# @rdname reduceByKeyLocally
|
||||
# @seealso reduceByKey
|
||||
# @export
|
||||
setGeneric("reduceByKeyLocally",
|
||||
function(x, combineFunc) {
|
||||
standardGeneric("reduceByKeyLocally")
|
||||
})
|
||||
|
||||
#' @rdname join-methods
|
||||
#' @export
|
||||
# @rdname join-methods
|
||||
# @export
|
||||
setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") })
|
||||
|
||||
#' @rdname sortByKey
|
||||
#' @export
|
||||
# @rdname sortByKey
|
||||
# @export
|
||||
setGeneric("sortByKey",
|
||||
function(x, ascending = TRUE, numPartitions = 1) {
|
||||
standardGeneric("sortByKey")
|
||||
})
|
||||
|
||||
#' @rdname subtract
|
||||
#' @export
|
||||
# @rdname subtract
|
||||
# @export
|
||||
setGeneric("subtract",
|
||||
function(x, other, numPartitions = 1) {
|
||||
standardGeneric("subtract")
|
||||
})
|
||||
|
||||
#' @rdname subtractByKey
|
||||
#' @export
|
||||
# @rdname subtractByKey
|
||||
# @export
|
||||
setGeneric("subtractByKey",
|
||||
function(x, other, numPartitions = 1) {
|
||||
standardGeneric("subtractByKey")
|
||||
|
@ -372,8 +372,8 @@ setGeneric("subtractByKey",
|
|||
|
||||
################### Broadcast Variable Methods #################
|
||||
|
||||
#' @rdname broadcast
|
||||
#' @export
|
||||
# @rdname broadcast
|
||||
# @export
|
||||
setGeneric("value", function(bcast) { standardGeneric("value") })
|
||||
|
||||
|
||||
|
@ -477,8 +477,8 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
|
|||
#' @export
|
||||
setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") })
|
||||
|
||||
#' @rdname tojson
|
||||
#' @export
|
||||
# @rdname tojson
|
||||
# @export
|
||||
setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
|
||||
|
||||
#' @rdname DataFrame
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue