[SPARK-6824] Fill the docs for DataFrame API in SparkR

This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete
" ' '" of " \#' ".

Author: hqzizania <qian.huang@intel.com>
Author: qhuang <qian.huang@intel.com>

Closes #5969 from hqzizania/R1 and squashes the following commits:

6d27696 [qhuang] fixes in NAMESPACE
eb4b095 [qhuang] remove more docs
6394579 [qhuang] remove RDD docs in generics.R
6813860 [hqzizania] Fill the docs for DataFrame API in SparkR
857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen
c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen
This commit is contained in:
hqzizania 2015-05-08 11:25:04 -07:00 committed by Shivaram Venkataraman
parent 65afd3ce8b
commit 008a60dd37
9 changed files with 1609 additions and 1608 deletions

View file

@ -15,11 +15,11 @@ Suggests:
Description: R frontend for Spark
License: Apache License (== 2.0)
Collate:
'schema.R'
'generics.R'
'jobj.R'
'RDD.R'
'pairRDD.R'
'schema.R'
'column.R'
'group.R'
'DataFrame.R'

View file

@ -26,7 +26,6 @@ exportMethods("cache",
"intersect",
"isLocal",
"join",
"length",
"limit",
"orderBy",
"names",
@ -101,9 +100,6 @@ export("cacheTable",
"tables",
"uncacheTable")
export("sparkRSQL.init",
"sparkRHive.init")
export("structField",
"structField.jobj",
"structField.character",

View file

@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
#' @rdname DataFrame
#' @export
#'
#' @param sdf A Java object reference to the backing Scala DataFrame
#' @param isCached TRUE if the dataFrame is cached
dataFrame <- function(sdf, isCached = FALSE) {
new("DataFrame", sdf, isCached)
}
@ -244,7 +247,7 @@ setMethod("columns",
})
#' @rdname columns
#' @export
#' @aliases names,DataFrame,function-method
setMethod("names",
signature(x = "DataFrame"),
function(x) {
@ -399,23 +402,23 @@ setMethod("repartition",
dataFrame(sdf)
})
#' toJSON
#'
#' Convert the rows of a DataFrame into JSON objects and return an RDD where
#' each element contains a JSON string.
#'
#' @param x A SparkSQL DataFrame
#' @return A StringRRDD of JSON objects
#' @rdname tojson
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlCtx <- sparkRSQL.init(sc)
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlCtx, path)
#' newRDD <- toJSON(df)
#'}
# toJSON
#
# Convert the rows of a DataFrame into JSON objects and return an RDD where
# each element contains a JSON string.
#
#@param x A SparkSQL DataFrame
# @return A StringRRDD of JSON objects
# @rdname tojson
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# sqlCtx <- sparkRSQL.init(sc)
# path <- "path/to/file.json"
# df <- jsonFile(sqlCtx, path)
# newRDD <- toJSON(df)
#}
setMethod("toJSON",
signature(x = "DataFrame"),
function(x) {
@ -578,8 +581,8 @@ setMethod("limit",
dataFrame(res)
})
# Take the first NUM rows of a DataFrame and return a the results as a data.frame
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
#'
#' @rdname take
#' @export
#' @examples
@ -644,22 +647,22 @@ setMethod("first",
take(x, 1)
})
#' toRDD()
#'
#' Converts a Spark DataFrame to an RDD while preserving column names.
#'
#' @param x A Spark DataFrame
#'
#' @rdname DataFrame
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlCtx <- sparkRSQL.init(sc)
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlCtx, path)
#' rdd <- toRDD(df)
#' }
# toRDD()
#
# Converts a Spark DataFrame to an RDD while preserving column names.
#
# @param x A Spark DataFrame
#
# @rdname DataFrame
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# sqlCtx <- sparkRSQL.init(sc)
# path <- "path/to/file.json"
# df <- jsonFile(sqlCtx, path)
# rdd <- toRDD(df)
# }
setMethod("toRDD",
signature(x = "DataFrame"),
function(x) {
@ -706,6 +709,7 @@ setMethod("groupBy",
#'
#' Compute aggregates by specifying a list of columns
#'
#' @param x a DataFrame
#' @rdname DataFrame
#' @export
setMethod("agg",
@ -721,7 +725,7 @@ setMethod("agg",
# the requested map function. #
###################################################################################
#' @rdname lapply
# @rdname lapply
setMethod("lapply",
signature(X = "DataFrame", FUN = "function"),
function(X, FUN) {
@ -729,14 +733,14 @@ setMethod("lapply",
lapply(rdd, FUN)
})
#' @rdname lapply
# @rdname lapply
setMethod("map",
signature(X = "DataFrame", FUN = "function"),
function(X, FUN) {
lapply(X, FUN)
})
#' @rdname flatMap
# @rdname flatMap
setMethod("flatMap",
signature(X = "DataFrame", FUN = "function"),
function(X, FUN) {
@ -744,7 +748,7 @@ setMethod("flatMap",
flatMap(rdd, FUN)
})
#' @rdname lapplyPartition
# @rdname lapplyPartition
setMethod("lapplyPartition",
signature(X = "DataFrame", FUN = "function"),
function(X, FUN) {
@ -752,14 +756,14 @@ setMethod("lapplyPartition",
lapplyPartition(rdd, FUN)
})
#' @rdname lapplyPartition
# @rdname lapplyPartition
setMethod("mapPartitions",
signature(X = "DataFrame", FUN = "function"),
function(X, FUN) {
lapplyPartition(X, FUN)
})
#' @rdname foreach
# @rdname foreach
setMethod("foreach",
signature(x = "DataFrame", func = "function"),
function(x, func) {
@ -767,7 +771,7 @@ setMethod("foreach",
foreach(rdd, func)
})
#' @rdname foreach
# @rdname foreach
setMethod("foreachPartition",
signature(x = "DataFrame", func = "function"),
function(x, func) {
@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
getColumn(x, name)
})
#' @rdname select
setMethod("$<-", signature(x = "DataFrame"),
function(x, name, value) {
stopifnot(class(value) == "Column" || is.null(value))
@ -1009,7 +1014,7 @@ setMethod("sortDF",
})
#' @rdname sortDF
#' @export
#' @aliases orderBy,DataFrame,function-method
setMethod("orderBy",
signature(x = "DataFrame", col = "characterOrColumn"),
function(x, col) {
@ -1046,7 +1051,7 @@ setMethod("filter",
})
#' @rdname filter
#' @export
#' @aliases where,DataFrame,function-method
setMethod("where",
signature(x = "DataFrame", condition = "characterOrColumn"),
function(x, condition) {

File diff suppressed because it is too large Load diff

View file

@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
dataFrame(sdf)
}
#' toDF
#'
#' Converts an RDD to a DataFrame by infer the types.
#'
#' @param x An RDD
#'
#' @rdname DataFrame
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlCtx <- sparkRSQL.init(sc)
#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
#' df <- toDF(rdd)
#' }
# toDF
#
# Converts an RDD to a DataFrame by infer the types.
#
# @param x An RDD
#
# @rdname DataFrame
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# sqlCtx <- sparkRSQL.init(sc)
# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
# df <- toDF(rdd)
# }
setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })
@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
}
#' JSON RDD
#'
#' Loads an RDD storing one JSON object per string as a DataFrame.
#'
#' @param sqlCtx SQLContext to use
#' @param rdd An RDD of JSON string
#' @param schema A StructType object to use as schema
#' @param samplingRatio The ratio of simpling used to infer the schema
#' @return A DataFrame
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlCtx <- sparkRSQL.init(sc)
#' rdd <- texFile(sc, "path/to/json")
#' df <- jsonRDD(sqlCtx, rdd)
#' }
# JSON RDD
#
# Loads an RDD storing one JSON object per string as a DataFrame.
#
# @param sqlCtx SQLContext to use
# @param rdd An RDD of JSON string
# @param schema A StructType object to use as schema
# @param samplingRatio The ratio of simpling used to infer the schema
# @return A DataFrame
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# sqlCtx <- sparkRSQL.init(sc)
# rdd <- texFile(sc, "path/to/json")
# df <- jsonRDD(sqlCtx, rdd)
# }
# TODO: support schema
jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {

View file

@ -23,21 +23,21 @@
.broadcastValues <- new.env()
.broadcastIdToName <- new.env()
#' @title S4 class that represents a Broadcast variable
#' @description Broadcast variables can be created using the broadcast
#' function from a \code{SparkContext}.
#' @rdname broadcast-class
#' @seealso broadcast
#'
#' @param id Id of the backing Spark broadcast variable
#' @export
# @title S4 class that represents a Broadcast variable
# @description Broadcast variables can be created using the broadcast
# function from a \code{SparkContext}.
# @rdname broadcast-class
# @seealso broadcast
#
# @param id Id of the backing Spark broadcast variable
# @export
setClass("Broadcast", slots = list(id = "character"))
#' @rdname broadcast-class
#' @param value Value of the broadcast variable
#' @param jBroadcastRef reference to the backing Java broadcast object
#' @param objName name of broadcasted object
#' @export
# @rdname broadcast-class
# @param value Value of the broadcast variable
# @param jBroadcastRef reference to the backing Java broadcast object
# @param objName name of broadcasted object
# @export
Broadcast <- function(id, value, jBroadcastRef, objName) {
.broadcastValues[[id]] <- value
.broadcastNames[[as.character(objName)]] <- jBroadcastRef
@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) {
new("Broadcast", id = id)
}
#' @description
#' \code{value} can be used to get the value of a broadcast variable inside
#' a distributed function.
#'
#' @param bcast The broadcast variable to get
#' @rdname broadcast
#' @aliases value,Broadcast-method
# @description
# \code{value} can be used to get the value of a broadcast variable inside
# a distributed function.
#
# @param bcast The broadcast variable to get
# @rdname broadcast
# @aliases value,Broadcast-method
setMethod("value",
signature(bcast = "Broadcast"),
function(bcast) {
@ -62,24 +62,24 @@ setMethod("value",
}
})
#' Internal function to set values of a broadcast variable.
#'
#' This function is used internally by Spark to set the value of a broadcast
#' variable on workers. Not intended for use outside the package.
#'
#' @rdname broadcast-internal
#' @seealso broadcast, value
# Internal function to set values of a broadcast variable.
#
# This function is used internally by Spark to set the value of a broadcast
# variable on workers. Not intended for use outside the package.
#
# @rdname broadcast-internal
# @seealso broadcast, value
#' @param bcastId The id of broadcast variable to set
#' @param value The value to be set
#' @export
# @param bcastId The id of broadcast variable to set
# @param value The value to be set
# @export
setBroadcastValue <- function(bcastId, value) {
bcastIdStr <- as.character(bcastId)
.broadcastValues[[bcastIdStr]] <- value
}
#' Helper function to clear the list of broadcast variables we know about
#' Should be called when the SparkR JVM backend is shutdown
# Helper function to clear the list of broadcast variables we know about
# Should be called when the SparkR JVM backend is shutdown
clearBroadcastVariables <- function() {
bcasts <- ls(.broadcastNames)
rm(list = bcasts, envir = .broadcastNames)

View file

@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) {
as.integer(minPartitions)
}
#' Create an RDD from a text file.
#'
#' This function reads a text file from HDFS, a local file system (available on all
#' nodes), or any Hadoop-supported file system URI, and creates an
#' RDD of strings from it.
#'
#' @param sc SparkContext to use
#' @param path Path of file to read. A vector of multiple paths is allowed.
#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
#' value is chosen based on available parallelism.
#' @return RDD where each item is of type \code{character}
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' lines <- textFile(sc, "myfile.txt")
#'}
# Create an RDD from a text file.
#
# This function reads a text file from HDFS, a local file system (available on all
# nodes), or any Hadoop-supported file system URI, and creates an
# RDD of strings from it.
#
# @param sc SparkContext to use
# @param path Path of file to read. A vector of multiple paths is allowed.
# @param minPartitions Minimum number of partitions to be created. If NULL, the default
# value is chosen based on available parallelism.
# @return RDD where each item is of type \code{character}
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# lines <- textFile(sc, "myfile.txt")
#}
textFile <- function(sc, path, minPartitions = NULL) {
# Allow the user to have a more flexible definiton of the text file path
path <- suppressWarnings(normalizePath(path))
#' Convert a string vector of paths to a string containing comma separated paths
# Convert a string vector of paths to a string containing comma separated paths
path <- paste(path, collapse = ",")
jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions))
@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) {
RDD(jrdd, "string")
}
#' Load an RDD saved as a SequenceFile containing serialized objects.
#'
#' The file to be loaded should be one that was previously generated by calling
#' saveAsObjectFile() of the RDD class.
#'
#' @param sc SparkContext to use
#' @param path Path of file to read. A vector of multiple paths is allowed.
#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
#' value is chosen based on available parallelism.
#' @return RDD containing serialized R objects.
#' @seealso saveAsObjectFile
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' rdd <- objectFile(sc, "myfile")
#'}
# Load an RDD saved as a SequenceFile containing serialized objects.
#
# The file to be loaded should be one that was previously generated by calling
# saveAsObjectFile() of the RDD class.
#
# @param sc SparkContext to use
# @param path Path of file to read. A vector of multiple paths is allowed.
# @param minPartitions Minimum number of partitions to be created. If NULL, the default
# value is chosen based on available parallelism.
# @return RDD containing serialized R objects.
# @seealso saveAsObjectFile
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# rdd <- objectFile(sc, "myfile")
#}
objectFile <- function(sc, path, minPartitions = NULL) {
# Allow the user to have a more flexible definiton of the text file path
path <- suppressWarnings(normalizePath(path))
#' Convert a string vector of paths to a string containing comma separated paths
# Convert a string vector of paths to a string containing comma separated paths
path <- paste(path, collapse = ",")
jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions))
@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) {
RDD(jrdd, "byte")
}
#' Create an RDD from a homogeneous list or vector.
#'
#' This function creates an RDD from a local homogeneous list in R. The elements
#' in the list are split into \code{numSlices} slices and distributed to nodes
#' in the cluster.
#'
#' @param sc SparkContext to use
#' @param coll collection to parallelize
#' @param numSlices number of partitions to create in the RDD
#' @return an RDD created from this collection
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' rdd <- parallelize(sc, 1:10, 2)
#' # The RDD should contain 10 elements
#' length(rdd)
#'}
# Create an RDD from a homogeneous list or vector.
#
# This function creates an RDD from a local homogeneous list in R. The elements
# in the list are split into \code{numSlices} slices and distributed to nodes
# in the cluster.
#
# @param sc SparkContext to use
# @param coll collection to parallelize
# @param numSlices number of partitions to create in the RDD
# @return an RDD created from this collection
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# rdd <- parallelize(sc, 1:10, 2)
# # The RDD should contain 10 elements
# length(rdd)
#}
parallelize <- function(sc, coll, numSlices = 1) {
# TODO: bound/safeguard numSlices
# TODO: unit tests for if the split works for all primitives
@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
RDD(jrdd, "byte")
}
#' Include this specified package on all workers
#'
#' This function can be used to include a package on all workers before the
#' user's code is executed. This is useful in scenarios where other R package
#' functions are used in a function passed to functions like \code{lapply}.
#' NOTE: The package is assumed to be installed on every node in the Spark
#' cluster.
#'
#' @param sc SparkContext to use
#' @param pkg Package name
#'
#' @export
#' @examples
#'\dontrun{
#' library(Matrix)
#'
#' sc <- sparkR.init()
#' # Include the matrix library we will be using
#' includePackage(sc, Matrix)
#'
#' generateSparse <- function(x) {
#' sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
#' }
#'
#' rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
#' collect(rdd)
#'}
# Include this specified package on all workers
#
# This function can be used to include a package on all workers before the
# user's code is executed. This is useful in scenarios where other R package
# functions are used in a function passed to functions like \code{lapply}.
# NOTE: The package is assumed to be installed on every node in the Spark
# cluster.
#
# @param sc SparkContext to use
# @param pkg Package name
#
# @export
# @examples
#\dontrun{
# library(Matrix)
#
# sc <- sparkR.init()
# # Include the matrix library we will be using
# includePackage(sc, Matrix)
#
# generateSparse <- function(x) {
# sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
# }
#
# rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
# collect(rdd)
#}
includePackage <- function(sc, pkg) {
pkg <- as.character(substitute(pkg))
if (exists(".packages", .sparkREnv)) {
@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
.sparkREnv$.packages <- packages
}
#' @title Broadcast a variable to all workers
#'
#' @description
#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
#' object for reading it in distributed functions.
#'
#' @param sc Spark Context to use
#' @param object Object to be broadcast
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' rdd <- parallelize(sc, 1:2, 2L)
#'
#' # Large Matrix object that we want to broadcast
#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
#' randomMatBr <- broadcast(sc, randomMat)
#'
#' # Use the broadcast variable inside the function
#' useBroadcast <- function(x) {
#' sum(value(randomMatBr) * x)
#' }
#' sumRDD <- lapply(rdd, useBroadcast)
#'}
# @title Broadcast a variable to all workers
#
# @description
# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
# object for reading it in distributed functions.
#
# @param sc Spark Context to use
# @param object Object to be broadcast
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# rdd <- parallelize(sc, 1:2, 2L)
#
# # Large Matrix object that we want to broadcast
# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
# randomMatBr <- broadcast(sc, randomMat)
#
# # Use the broadcast variable inside the function
# useBroadcast <- function(x) {
# sum(value(randomMatBr) * x)
# }
# sumRDD <- lapply(rdd, useBroadcast)
#}
broadcast <- function(sc, object) {
objName <- as.character(substitute(object))
serializedObj <- serialize(object, connection = NULL)
@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
Broadcast(id, object, jBroadcast, objName)
}
#' @title Set the checkpoint directory
#'
#' Set the directory under which RDDs are going to be checkpointed. The
#' directory must be a HDFS path if running on a cluster.
#'
#' @param sc Spark Context to use
#' @param dirName Directory path
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' setCheckpointDir(sc, "~/checkpoint")
#' rdd <- parallelize(sc, 1:2, 2L)
#' checkpoint(rdd)
#'}
# @title Set the checkpoint directory
#
# Set the directory under which RDDs are going to be checkpointed. The
# directory must be a HDFS path if running on a cluster.
#
# @param sc Spark Context to use
# @param dirName Directory path
# @export
# @examples
#\dontrun{
# sc <- sparkR.init()
# setCheckpointDir(sc, "~/checkpoint")
# rdd <- parallelize(sc, 1:2, 2L)
# checkpoint(rdd)
#}
setCheckpointDir <- function(sc, dirName) {
invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
}

View file

@ -17,353 +17,353 @@
############ RDD Actions and Transformations ############
#' @rdname aggregateRDD
#' @seealso reduce
#' @export
# @rdname aggregateRDD
# @seealso reduce
# @export
setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
#' @rdname cache-methods
#' @export
# @rdname cache-methods
# @export
setGeneric("cache", function(x) { standardGeneric("cache") })
#' @rdname coalesce
#' @seealso repartition
#' @export
# @rdname coalesce
# @seealso repartition
# @export
setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") })
#' @rdname checkpoint-methods
#' @export
# @rdname checkpoint-methods
# @export
setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") })
#' @rdname collect-methods
#' @export
# @rdname collect-methods
# @export
setGeneric("collect", function(x, ...) { standardGeneric("collect") })
#' @rdname collect-methods
#' @export
# @rdname collect-methods
# @export
setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") })
#' @rdname collect-methods
#' @export
# @rdname collect-methods
# @export
setGeneric("collectPartition",
function(x, partitionId) {
standardGeneric("collectPartition")
})
#' @rdname count
#' @export
# @rdname count
# @export
setGeneric("count", function(x) { standardGeneric("count") })
#' @rdname countByValue
#' @export
# @rdname countByValue
# @export
setGeneric("countByValue", function(x) { standardGeneric("countByValue") })
#' @rdname distinct
#' @export
# @rdname distinct
# @export
setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") })
#' @rdname filterRDD
#' @export
# @rdname filterRDD
# @export
setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
#' @rdname first
#' @export
# @rdname first
# @export
setGeneric("first", function(x) { standardGeneric("first") })
#' @rdname flatMap
#' @export
# @rdname flatMap
# @export
setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") })
#' @rdname fold
#' @seealso reduce
#' @export
# @rdname fold
# @seealso reduce
# @export
setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") })
#' @rdname foreach
#' @export
# @rdname foreach
# @export
setGeneric("foreach", function(x, func) { standardGeneric("foreach") })
#' @rdname foreach
#' @export
# @rdname foreach
# @export
setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") })
# The jrdd accessor function.
setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") })
#' @rdname glom
#' @export
# @rdname glom
# @export
setGeneric("glom", function(x) { standardGeneric("glom") })
#' @rdname keyBy
#' @export
# @rdname keyBy
# @export
setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") })
#' @rdname lapplyPartition
#' @export
# @rdname lapplyPartition
# @export
setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") })
#' @rdname lapplyPartitionsWithIndex
#' @export
# @rdname lapplyPartitionsWithIndex
# @export
setGeneric("lapplyPartitionsWithIndex",
function(X, FUN) {
standardGeneric("lapplyPartitionsWithIndex")
})
#' @rdname lapply
#' @export
# @rdname lapply
# @export
setGeneric("map", function(X, FUN) { standardGeneric("map") })
#' @rdname lapplyPartition
#' @export
# @rdname lapplyPartition
# @export
setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") })
#' @rdname lapplyPartitionsWithIndex
#' @export
# @rdname lapplyPartitionsWithIndex
# @export
setGeneric("mapPartitionsWithIndex",
function(X, FUN) { standardGeneric("mapPartitionsWithIndex") })
#' @rdname maximum
#' @export
# @rdname maximum
# @export
setGeneric("maximum", function(x) { standardGeneric("maximum") })
#' @rdname minimum
#' @export
# @rdname minimum
# @export
setGeneric("minimum", function(x) { standardGeneric("minimum") })
#' @rdname sumRDD
#' @export
# @rdname sumRDD
# @export
setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
#' @rdname name
#' @export
# @rdname name
# @export
setGeneric("name", function(x) { standardGeneric("name") })
#' @rdname numPartitions
#' @export
# @rdname numPartitions
# @export
setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })
#' @rdname persist
#' @export
# @rdname persist
# @export
setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
#' @rdname pipeRDD
#' @export
# @rdname pipeRDD
# @export
setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")})
#' @rdname reduce
#' @export
# @rdname reduce
# @export
setGeneric("reduce", function(x, func) { standardGeneric("reduce") })
#' @rdname repartition
#' @seealso coalesce
#' @export
# @rdname repartition
# @seealso coalesce
# @export
setGeneric("repartition", function(x, numPartitions) { standardGeneric("repartition") })
#' @rdname sampleRDD
#' @export
# @rdname sampleRDD
# @export
setGeneric("sampleRDD",
function(x, withReplacement, fraction, seed) {
standardGeneric("sampleRDD")
})
#' @rdname saveAsObjectFile
#' @seealso objectFile
#' @export
# @rdname saveAsObjectFile
# @seealso objectFile
# @export
setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") })
#' @rdname saveAsTextFile
#' @export
# @rdname saveAsTextFile
# @export
setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") })
#' @rdname setName
#' @export
# @rdname setName
# @export
setGeneric("setName", function(x, name) { standardGeneric("setName") })
#' @rdname sortBy
#' @export
# @rdname sortBy
# @export
setGeneric("sortBy",
function(x, func, ascending = TRUE, numPartitions = 1) {
standardGeneric("sortBy")
})
#' @rdname take
#' @export
# @rdname take
# @export
setGeneric("take", function(x, num) { standardGeneric("take") })
#' @rdname takeOrdered
#' @export
# @rdname takeOrdered
# @export
setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") })
#' @rdname takeSample
#' @export
# @rdname takeSample
# @export
setGeneric("takeSample",
function(x, withReplacement, num, seed) {
standardGeneric("takeSample")
})
#' @rdname top
#' @export
# @rdname top
# @export
setGeneric("top", function(x, num) { standardGeneric("top") })
#' @rdname unionRDD
#' @export
# @rdname unionRDD
# @export
setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") })
#' @rdname unpersist-methods
#' @export
# @rdname unpersist-methods
# @export
setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })
#' @rdname zipRDD
#' @export
# @rdname zipRDD
# @export
setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
#' @rdname zipRDD
#' @export
# @rdname zipRDD
# @export
setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") },
signature = "...")
#' @rdname zipWithIndex
#' @seealso zipWithUniqueId
#' @export
# @rdname zipWithIndex
# @seealso zipWithUniqueId
# @export
setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") })
#' @rdname zipWithUniqueId
#' @seealso zipWithIndex
#' @export
# @rdname zipWithUniqueId
# @seealso zipWithIndex
# @export
setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") })
############ Binary Functions #############
#' @rdname cartesian
#' @export
# @rdname cartesian
# @export
setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") })
#' @rdname countByKey
#' @export
# @rdname countByKey
# @export
setGeneric("countByKey", function(x) { standardGeneric("countByKey") })
#' @rdname flatMapValues
#' @export
# @rdname flatMapValues
# @export
setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") })
#' @rdname intersection
#' @export
# @rdname intersection
# @export
setGeneric("intersection", function(x, other, numPartitions = 1) {
standardGeneric("intersection") })
#' @rdname keys
#' @export
# @rdname keys
# @export
setGeneric("keys", function(x) { standardGeneric("keys") })
#' @rdname lookup
#' @export
# @rdname lookup
# @export
setGeneric("lookup", function(x, key) { standardGeneric("lookup") })
#' @rdname mapValues
#' @export
# @rdname mapValues
# @export
setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") })
#' @rdname sampleByKey
#' @export
# @rdname sampleByKey
# @export
setGeneric("sampleByKey",
function(x, withReplacement, fractions, seed) {
standardGeneric("sampleByKey")
})
#' @rdname values
#' @export
# @rdname values
# @export
setGeneric("values", function(x) { standardGeneric("values") })
############ Shuffle Functions ############
#' @rdname aggregateByKey
#' @seealso foldByKey, combineByKey
#' @export
# @rdname aggregateByKey
# @seealso foldByKey, combineByKey
# @export
setGeneric("aggregateByKey",
function(x, zeroValue, seqOp, combOp, numPartitions) {
standardGeneric("aggregateByKey")
})
#' @rdname cogroup
#' @export
# @rdname cogroup
# @export
setGeneric("cogroup",
function(..., numPartitions) {
standardGeneric("cogroup")
},
signature = "...")
#' @rdname combineByKey
#' @seealso groupByKey, reduceByKey
#' @export
# @rdname combineByKey
# @seealso groupByKey, reduceByKey
# @export
setGeneric("combineByKey",
function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) {
standardGeneric("combineByKey")
})
#' @rdname foldByKey
#' @seealso aggregateByKey, combineByKey
#' @export
# @rdname foldByKey
# @seealso aggregateByKey, combineByKey
# @export
setGeneric("foldByKey",
function(x, zeroValue, func, numPartitions) {
standardGeneric("foldByKey")
})
#' @rdname join-methods
#' @export
# @rdname join-methods
# @export
setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") })
#' @rdname groupByKey
#' @seealso reduceByKey
#' @export
# @rdname groupByKey
# @seealso reduceByKey
# @export
setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") })
#' @rdname join-methods
#' @export
# @rdname join-methods
# @export
setGeneric("join", function(x, y, ...) { standardGeneric("join") })
#' @rdname join-methods
#' @export
# @rdname join-methods
# @export
setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") })
#' @rdname partitionBy
#' @export
# @rdname partitionBy
# @export
setGeneric("partitionBy", function(x, numPartitions, ...) { standardGeneric("partitionBy") })
#' @rdname reduceByKey
#' @seealso groupByKey
#' @export
# @rdname reduceByKey
# @seealso groupByKey
# @export
setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")})
#' @rdname reduceByKeyLocally
#' @seealso reduceByKey
#' @export
# @rdname reduceByKeyLocally
# @seealso reduceByKey
# @export
setGeneric("reduceByKeyLocally",
function(x, combineFunc) {
standardGeneric("reduceByKeyLocally")
})
#' @rdname join-methods
#' @export
# @rdname join-methods
# @export
setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") })
#' @rdname sortByKey
#' @export
# @rdname sortByKey
# @export
setGeneric("sortByKey",
function(x, ascending = TRUE, numPartitions = 1) {
standardGeneric("sortByKey")
})
#' @rdname subtract
#' @export
# @rdname subtract
# @export
setGeneric("subtract",
function(x, other, numPartitions = 1) {
standardGeneric("subtract")
})
#' @rdname subtractByKey
#' @export
# @rdname subtractByKey
# @export
setGeneric("subtractByKey",
function(x, other, numPartitions = 1) {
standardGeneric("subtractByKey")
@ -372,8 +372,8 @@ setGeneric("subtractByKey",
################### Broadcast Variable Methods #################
#' @rdname broadcast
#' @export
# @rdname broadcast
# @export
setGeneric("value", function(bcast) { standardGeneric("value") })
@ -477,8 +477,8 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
#' @export
setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") })
#' @rdname tojson
#' @export
# @rdname tojson
# @export
setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
#' @rdname DataFrame

File diff suppressed because it is too large Load diff