[SPARK-6824] Fill the docs for DataFrame API in SparkR

This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete " ' '" of " \#' ". Author: hqzizania <qian.huang@intel.com> Author: qhuang <qian.huang@intel.com> Closes #5969 from hqzizania/R1 and squashes the following commits: 6d27696 [qhuang] fixes in NAMESPACE eb4b095 [qhuang] remove more docs 6394579 [qhuang] remove RDD docs in generics.R 6813860 [hqzizania] Fill the docs for DataFrame API in SparkR 857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen
2015-05-08 11:25:04 -07:00 · 2015-05-08 11:25:04 -07:00 · 008a60dd37
parent 65afd3ce8b
commit 008a60dd37
9 changed files with 1609 additions and 1608 deletions
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+    'schema.R'
    'generics.R'
    'jobj.R'
    'RDD.R'
    'pairRDD.R'
-    'schema.R'
    'column.R'
    'group.R'
    'DataFrame.R'
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@ -26,7 +26,6 @@ exportMethods("cache",
              "intersect",
              "isLocal",
              "join",
-              "length",
              "limit",
              "orderBy",
              "names",
@ -101,9 +100,6 @@ export("cacheTable",
       "tables",
       "uncacheTable")

-export("sparkRSQL.init",
-       "sparkRHive.init")
-
 export("structField",
       "structField.jobj",
       "structField.character",
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {

 #' @rdname DataFrame
 #' @export
+#'
+#' @param sdf A Java object reference to the backing Scala DataFrame
+#' @param isCached TRUE if the dataFrame is cached
 dataFrame <- function(sdf, isCached = FALSE) {
  new("DataFrame", sdf, isCached)
 }
@ -244,7 +247,7 @@ setMethod("columns",
          })

 #' @rdname columns
-#' @export
+#' @aliases names,DataFrame,function-method
 setMethod("names",
          signature(x = "DataFrame"),
          function(x) {
@ -399,23 +402,23 @@ setMethod("repartition",
            dataFrame(sdf)     
          })

-#' toJSON
-#'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
-#'
-#' @param x A SparkSQL DataFrame
-#' @return A StringRRDD of JSON objects
-#' @rdname tojson
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' newRDD <- toJSON(df)
-#'}
+# toJSON
+#
+# Convert the rows of a DataFrame into JSON objects and return an RDD where
+# each element contains a JSON string.
+#
+#@param x A SparkSQL DataFrame
+# @return A StringRRDD of JSON objects
+# @rdname tojson
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# newRDD <- toJSON(df)
+#}
 setMethod("toJSON",
          signature(x = "DataFrame"),
          function(x) {
@ -578,8 +581,8 @@ setMethod("limit",
            dataFrame(res)
          })

-# Take the first NUM rows of a DataFrame and return a the results as a data.frame
-
+#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
+#' 
 #' @rdname take
 #' @export
 #' @examples
@ -644,22 +647,22 @@ setMethod("first",
            take(x, 1)
          })

-#' toRDD()
-#' 
-#' Converts a Spark DataFrame to an RDD while preserving column names.
-#' 
-#' @param x A Spark DataFrame
-#' 
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' rdd <- toRDD(df)
-#' }
+# toRDD()
+# 
+# Converts a Spark DataFrame to an RDD while preserving column names.
+# 
+# @param x A Spark DataFrame
+# 
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# rdd <- toRDD(df)
+# }
 setMethod("toRDD",
          signature(x = "DataFrame"),
          function(x) {
@ -706,6 +709,7 @@ setMethod("groupBy",
 #'
 #' Compute aggregates by specifying a list of columns
 #'
+#' @param x a DataFrame
 #' @rdname DataFrame
 #' @export
 setMethod("agg",
@ -721,7 +725,7 @@ setMethod("agg",
 # the requested map function.                                                     #
 ###################################################################################

-#' @rdname lapply
+# @rdname lapply
 setMethod("lapply",
          signature(X = "DataFrame", FUN = "function"),
          function(X, FUN) {
@ -729,14 +733,14 @@ setMethod("lapply",
            lapply(rdd, FUN)
          })

-#' @rdname lapply
+# @rdname lapply
 setMethod("map",
          signature(X = "DataFrame", FUN = "function"),
          function(X, FUN) {
            lapply(X, FUN)
          })

-#' @rdname flatMap
+# @rdname flatMap
 setMethod("flatMap",
          signature(X = "DataFrame", FUN = "function"),
          function(X, FUN) {
@ -744,7 +748,7 @@ setMethod("flatMap",
            flatMap(rdd, FUN)
          })

-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("lapplyPartition",
          signature(X = "DataFrame", FUN = "function"),
          function(X, FUN) {
@ -752,14 +756,14 @@ setMethod("lapplyPartition",
            lapplyPartition(rdd, FUN)
          })

-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("mapPartitions",
          signature(X = "DataFrame", FUN = "function"),
          function(X, FUN) {
            lapplyPartition(X, FUN)
          })

-#' @rdname foreach
+# @rdname foreach
 setMethod("foreach",
          signature(x = "DataFrame", func = "function"),
          function(x, func) {
@ -767,7 +771,7 @@ setMethod("foreach",
            foreach(rdd, func)
          })

-#' @rdname foreach
+# @rdname foreach
 setMethod("foreachPartition",
          signature(x = "DataFrame", func = "function"),
          function(x, func) {
@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
            getColumn(x, name)
          })

+#' @rdname select
 setMethod("$<-", signature(x = "DataFrame"),
          function(x, name, value) {
            stopifnot(class(value) == "Column" || is.null(value))
@ -1009,7 +1014,7 @@ setMethod("sortDF",
          })

 #' @rdname sortDF
-#' @export
+#' @aliases orderBy,DataFrame,function-method
 setMethod("orderBy",
          signature(x = "DataFrame", col = "characterOrColumn"),
          function(x, col) {
@ -1046,7 +1051,7 @@ setMethod("filter",
          })

 #' @rdname filter
-#' @export
+#' @aliases where,DataFrame,function-method
 setMethod("where",
          signature(x = "DataFrame", condition = "characterOrColumn"),
          function(x, condition) {
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
  dataFrame(sdf)
 }

-#' toDF
-#'
-#' Converts an RDD to a DataFrame by infer the types.
-#'
-#' @param x An RDD
-#'
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
-#' df <- toDF(rdd)
-#' }
+# toDF
+#
+# Converts an RDD to a DataFrame by infer the types.
+#
+# @param x An RDD
+#
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
+# df <- toDF(rdd)
+# }

 setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })

@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
 }


-#' JSON RDD
-#'
-#' Loads an RDD storing one JSON object per string as a DataFrame.
-#'
-#' @param sqlCtx SQLContext to use
-#' @param rdd An RDD of JSON string
-#' @param schema A StructType object to use as schema
-#' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- texFile(sc, "path/to/json")
-#' df <- jsonRDD(sqlCtx, rdd)
-#' }
+# JSON RDD
+#
+# Loads an RDD storing one JSON object per string as a DataFrame.
+#
+# @param sqlCtx SQLContext to use
+# @param rdd An RDD of JSON string
+# @param schema A StructType object to use as schema
+# @param samplingRatio The ratio of simpling used to infer the schema
+# @return A DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- texFile(sc, "path/to/json")
+# df <- jsonRDD(sqlCtx, rdd)
+# }

 # TODO: support schema
 jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@ -23,21 +23,21 @@
 .broadcastValues <- new.env()
 .broadcastIdToName <- new.env()

-#' @title S4 class that represents a Broadcast variable
-#' @description Broadcast variables can be created using the broadcast
-#'              function from a \code{SparkContext}.
-#' @rdname broadcast-class
-#' @seealso broadcast 
-#'
-#' @param id Id of the backing Spark broadcast variable 
-#' @export
+# @title S4 class that represents a Broadcast variable
+# @description Broadcast variables can be created using the broadcast
+#              function from a \code{SparkContext}.
+# @rdname broadcast-class
+# @seealso broadcast 
+#
+# @param id Id of the backing Spark broadcast variable 
+# @export
 setClass("Broadcast", slots = list(id = "character"))

-#' @rdname broadcast-class
-#' @param value Value of the broadcast variable
-#' @param jBroadcastRef reference to the backing Java broadcast object
-#' @param objName name of broadcasted object
-#' @export
+# @rdname broadcast-class
+# @param value Value of the broadcast variable
+# @param jBroadcastRef reference to the backing Java broadcast object
+# @param objName name of broadcasted object
+# @export
 Broadcast <- function(id, value, jBroadcastRef, objName) {
  .broadcastValues[[id]] <- value
  .broadcastNames[[as.character(objName)]] <- jBroadcastRef
@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) {
  new("Broadcast", id = id)
 }

-#' @description
-#' \code{value} can be used to get the value of a broadcast variable inside
-#' a distributed function.
-#'
-#' @param bcast The broadcast variable to get
-#' @rdname broadcast
-#' @aliases value,Broadcast-method
+# @description
+# \code{value} can be used to get the value of a broadcast variable inside
+# a distributed function.
+#
+# @param bcast The broadcast variable to get
+# @rdname broadcast
+# @aliases value,Broadcast-method
 setMethod("value",
          signature(bcast = "Broadcast"),
          function(bcast) {
@ -62,24 +62,24 @@ setMethod("value",
            }
          })

-#' Internal function to set values of a broadcast variable.
-#'
-#' This function is used internally by Spark to set the value of a broadcast
-#' variable on workers. Not intended for use outside the package.
-#'
-#' @rdname broadcast-internal
-#' @seealso broadcast, value 
+# Internal function to set values of a broadcast variable.
+#
+# This function is used internally by Spark to set the value of a broadcast
+# variable on workers. Not intended for use outside the package.
+#
+# @rdname broadcast-internal
+# @seealso broadcast, value 

-#' @param bcastId The id of broadcast variable to set
-#' @param value The value to be set
-#' @export
+# @param bcastId The id of broadcast variable to set
+# @param value The value to be set
+# @export
 setBroadcastValue <- function(bcastId, value) {
  bcastIdStr <- as.character(bcastId)
  .broadcastValues[[bcastIdStr]] <- value
 }

-#' Helper function to clear the list of broadcast variables we know about
-#' Should be called when the SparkR JVM backend is shutdown
+# Helper function to clear the list of broadcast variables we know about
+# Should be called when the SparkR JVM backend is shutdown
 clearBroadcastVariables <- function() {
  bcasts <- ls(.broadcastNames)
  rm(list = bcasts, envir = .broadcastNames)
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) {
  as.integer(minPartitions)
 }

-#' Create an RDD from a text file.
-#'
-#' This function reads a text file from HDFS, a local file system (available on all
-#' nodes), or any Hadoop-supported file system URI, and creates an
-#' RDD of strings from it.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD where each item is of type \code{character}
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  lines <- textFile(sc, "myfile.txt")
-#'}
+# Create an RDD from a text file.
+#
+# This function reads a text file from HDFS, a local file system (available on all
+# nodes), or any Hadoop-supported file system URI, and creates an
+# RDD of strings from it.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD where each item is of type \code{character}
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  lines <- textFile(sc, "myfile.txt")
+#}
 textFile <- function(sc, path, minPartitions = NULL) {
  # Allow the user to have a more flexible definiton of the text file path
  path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
  path <- paste(path, collapse = ",")

  jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions))
@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) {
  RDD(jrdd, "string")
 }

-#' Load an RDD saved as a SequenceFile containing serialized objects.
-#'
-#' The file to be loaded should be one that was previously generated by calling
-#' saveAsObjectFile() of the RDD class.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD containing serialized R objects.
-#' @seealso saveAsObjectFile
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  rdd <- objectFile(sc, "myfile")
-#'}
+# Load an RDD saved as a SequenceFile containing serialized objects.
+#
+# The file to be loaded should be one that was previously generated by calling
+# saveAsObjectFile() of the RDD class.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD containing serialized R objects.
+# @seealso saveAsObjectFile
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  rdd <- objectFile(sc, "myfile")
+#}
 objectFile <- function(sc, path, minPartitions = NULL) {
  # Allow the user to have a more flexible definiton of the text file path
  path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
  path <- paste(path, collapse = ",")

  jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions))
@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) {
  RDD(jrdd, "byte")
 }

-#' Create an RDD from a homogeneous list or vector.
-#'
-#' This function creates an RDD from a local homogeneous list in R. The elements
-#' in the list are split into \code{numSlices} slices and distributed to nodes
-#' in the cluster.
-#'
-#' @param sc SparkContext to use
-#' @param coll collection to parallelize
-#' @param numSlices number of partitions to create in the RDD
-#' @return an RDD created from this collection
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2)
-#' # The RDD should contain 10 elements
-#' length(rdd)
-#'}
+# Create an RDD from a homogeneous list or vector.
+#
+# This function creates an RDD from a local homogeneous list in R. The elements
+# in the list are split into \code{numSlices} slices and distributed to nodes
+# in the cluster.
+#
+# @param sc SparkContext to use
+# @param coll collection to parallelize
+# @param numSlices number of partitions to create in the RDD
+# @return an RDD created from this collection
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2)
+# # The RDD should contain 10 elements
+# length(rdd)
+#}
 parallelize <- function(sc, coll, numSlices = 1) {
  # TODO: bound/safeguard numSlices
  # TODO: unit tests for if the split works for all primitives
@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
  RDD(jrdd, "byte")
 }

-#' Include this specified package on all workers
-#'
-#' This function can be used to include a package on all workers before the
-#' user's code is executed. This is useful in scenarios where other R package
-#' functions are used in a function passed to functions like \code{lapply}.
-#' NOTE: The package is assumed to be installed on every node in the Spark
-#' cluster.
-#'
-#' @param sc SparkContext to use
-#' @param pkg Package name
-#'
-#' @export
-#' @examples
-#'\dontrun{
-#'  library(Matrix)
-#'
-#'  sc <- sparkR.init()
-#'  # Include the matrix library we will be using
-#'  includePackage(sc, Matrix)
-#'
-#'  generateSparse <- function(x) {
-#'    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
-#'  }
-#'
-#'  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
-#'  collect(rdd)
-#'}
+# Include this specified package on all workers
+#
+# This function can be used to include a package on all workers before the
+# user's code is executed. This is useful in scenarios where other R package
+# functions are used in a function passed to functions like \code{lapply}.
+# NOTE: The package is assumed to be installed on every node in the Spark
+# cluster.
+#
+# @param sc SparkContext to use
+# @param pkg Package name
+#
+# @export
+# @examples
+#\dontrun{
+#  library(Matrix)
+#
+#  sc <- sparkR.init()
+#  # Include the matrix library we will be using
+#  includePackage(sc, Matrix)
+#
+#  generateSparse <- function(x) {
+#    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
+#  }
+#
+#  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
+#  collect(rdd)
+#}
 includePackage <- function(sc, pkg) {
  pkg <- as.character(substitute(pkg))
  if (exists(".packages", .sparkREnv)) {
@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
  .sparkREnv$.packages <- packages
 }

-#' @title Broadcast a variable to all workers
-#'
-#' @description
-#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
-#' object for reading it in distributed functions.
-#'
-#' @param sc Spark Context to use
-#' @param object Object to be broadcast
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2, 2L)
-#'
-#' # Large Matrix object that we want to broadcast
-#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
-#' randomMatBr <- broadcast(sc, randomMat)
-#'
-#' # Use the broadcast variable inside the function
-#' useBroadcast <- function(x) {
-#'   sum(value(randomMatBr) * x)
-#' }
-#' sumRDD <- lapply(rdd, useBroadcast)
-#'}
+# @title Broadcast a variable to all workers
+#
+# @description
+# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
+# object for reading it in distributed functions.
+#
+# @param sc Spark Context to use
+# @param object Object to be broadcast
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2, 2L)
+#
+# # Large Matrix object that we want to broadcast
+# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
+# randomMatBr <- broadcast(sc, randomMat)
+#
+# # Use the broadcast variable inside the function
+# useBroadcast <- function(x) {
+#   sum(value(randomMatBr) * x)
+# }
+# sumRDD <- lapply(rdd, useBroadcast)
+#}
 broadcast <- function(sc, object) {
  objName <- as.character(substitute(object))
  serializedObj <- serialize(object, connection = NULL)
@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
  Broadcast(id, object, jBroadcast, objName)
 }

-#' @title Set the checkpoint directory
-#'
-#' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
-#'
-#' @param sc Spark Context to use
-#' @param dirName Directory path
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "~/checkpoint")
-#' rdd <- parallelize(sc, 1:2, 2L)
-#' checkpoint(rdd)
-#'}
+# @title Set the checkpoint directory
+#
+# Set the directory under which RDDs are going to be checkpointed. The
+# directory must be a HDFS path if running on a cluster.
+#
+# @param sc Spark Context to use
+# @param dirName Directory path
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "~/checkpoint")
+# rdd <- parallelize(sc, 1:2, 2L)
+# checkpoint(rdd)
+#}
 setCheckpointDir <- function(sc, dirName) {
  invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
 }
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@ -17,353 +17,353 @@

 ############ RDD Actions and Transformations ############

-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
 setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })

-#' @rdname cache-methods
-#' @export
+# @rdname cache-methods
+# @export
 setGeneric("cache", function(x) { standardGeneric("cache") })

-#' @rdname coalesce
-#' @seealso repartition
-#' @export
+# @rdname coalesce
+# @seealso repartition
+# @export
 setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") })

-#' @rdname checkpoint-methods
-#' @export
+# @rdname checkpoint-methods
+# @export
 setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") })

-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collect", function(x, ...) { standardGeneric("collect") })

-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") })

-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collectPartition",
           function(x, partitionId) {
             standardGeneric("collectPartition")
           })

-#' @rdname count
-#' @export
+# @rdname count
+# @export
 setGeneric("count", function(x) { standardGeneric("count") })

-#' @rdname countByValue
-#' @export
+# @rdname countByValue
+# @export
 setGeneric("countByValue", function(x) { standardGeneric("countByValue") })

-#' @rdname distinct
-#' @export
+# @rdname distinct
+# @export
 setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") })

-#' @rdname filterRDD
-#' @export
+# @rdname filterRDD
+# @export
 setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })

-#' @rdname first
-#' @export
+# @rdname first
+# @export
 setGeneric("first", function(x) { standardGeneric("first") })

-#' @rdname flatMap
-#' @export
+# @rdname flatMap
+# @export
 setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") })

-#' @rdname fold
-#' @seealso reduce
-#' @export
+# @rdname fold
+# @seealso reduce
+# @export
 setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") })

-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric("foreach", function(x, func) { standardGeneric("foreach") })

-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") })

 # The jrdd accessor function.
 setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") })

-#' @rdname glom
-#' @export
+# @rdname glom
+# @export
 setGeneric("glom", function(x) { standardGeneric("glom") })

-#' @rdname keyBy
-#' @export
+# @rdname keyBy
+# @export
 setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") })

-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") })

-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric("lapplyPartitionsWithIndex",
           function(X, FUN) {
             standardGeneric("lapplyPartitionsWithIndex")
           })

-#' @rdname lapply
-#' @export
+# @rdname lapply
+# @export
 setGeneric("map", function(X, FUN) { standardGeneric("map") })

-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") })

-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric("mapPartitionsWithIndex",
           function(X, FUN) { standardGeneric("mapPartitionsWithIndex") })

-#' @rdname maximum
-#' @export
+# @rdname maximum
+# @export
 setGeneric("maximum", function(x) { standardGeneric("maximum") })

-#' @rdname minimum
-#' @export
+# @rdname minimum
+# @export
 setGeneric("minimum", function(x) { standardGeneric("minimum") })

-#' @rdname sumRDD 
-#' @export
+# @rdname sumRDD 
+# @export
 setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })

-#' @rdname name
-#' @export
+# @rdname name
+# @export
 setGeneric("name", function(x) { standardGeneric("name") })

-#' @rdname numPartitions
-#' @export
+# @rdname numPartitions
+# @export
 setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })

-#' @rdname persist
-#' @export
+# @rdname persist
+# @export
 setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })

-#' @rdname pipeRDD
-#' @export
+# @rdname pipeRDD
+# @export
 setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")})

-#' @rdname reduce
-#' @export
+# @rdname reduce
+# @export
 setGeneric("reduce", function(x, func) { standardGeneric("reduce") })

-#' @rdname repartition
-#' @seealso coalesce
-#' @export
+# @rdname repartition
+# @seealso coalesce
+# @export
 setGeneric("repartition", function(x, numPartitions) { standardGeneric("repartition") })

-#' @rdname sampleRDD
-#' @export
+# @rdname sampleRDD
+# @export
 setGeneric("sampleRDD",
           function(x, withReplacement, fraction, seed) {
             standardGeneric("sampleRDD")
           })

-#' @rdname saveAsObjectFile
-#' @seealso objectFile
-#' @export
+# @rdname saveAsObjectFile
+# @seealso objectFile
+# @export
 setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") })

-#' @rdname saveAsTextFile
-#' @export
+# @rdname saveAsTextFile
+# @export
 setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") })

-#' @rdname setName
-#' @export
+# @rdname setName
+# @export
 setGeneric("setName", function(x, name) { standardGeneric("setName") })

-#' @rdname sortBy
-#' @export
+# @rdname sortBy
+# @export
 setGeneric("sortBy",
           function(x, func, ascending = TRUE, numPartitions = 1) {
             standardGeneric("sortBy")
           })

-#' @rdname take
-#' @export
+# @rdname take
+# @export
 setGeneric("take", function(x, num) { standardGeneric("take") })

-#' @rdname takeOrdered
-#' @export
+# @rdname takeOrdered
+# @export
 setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") })

-#' @rdname takeSample
-#' @export
+# @rdname takeSample
+# @export
 setGeneric("takeSample",
           function(x, withReplacement, num, seed) {
             standardGeneric("takeSample")
           })

-#' @rdname top
-#' @export
+# @rdname top
+# @export
 setGeneric("top", function(x, num) { standardGeneric("top") })

-#' @rdname unionRDD
-#' @export
+# @rdname unionRDD
+# @export
 setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") })

-#' @rdname unpersist-methods
-#' @export
+# @rdname unpersist-methods
+# @export
 setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })

-#' @rdname zipRDD
-#' @export
+# @rdname zipRDD
+# @export
 setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })

-#' @rdname zipRDD
-#' @export
+# @rdname zipRDD
+# @export
 setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") }, 
           signature = "...")

-#' @rdname zipWithIndex
-#' @seealso zipWithUniqueId
-#' @export
+# @rdname zipWithIndex
+# @seealso zipWithUniqueId
+# @export
 setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") })

-#' @rdname zipWithUniqueId
-#' @seealso zipWithIndex
-#' @export
+# @rdname zipWithUniqueId
+# @seealso zipWithIndex
+# @export
 setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") })


 ############ Binary Functions #############

-#' @rdname cartesian
-#' @export
+# @rdname cartesian
+# @export
 setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") })

-#' @rdname countByKey
-#' @export
+# @rdname countByKey
+# @export
 setGeneric("countByKey", function(x) { standardGeneric("countByKey") })

-#' @rdname flatMapValues
-#' @export
+# @rdname flatMapValues
+# @export
 setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") })

-#' @rdname intersection
-#' @export
+# @rdname intersection
+# @export
 setGeneric("intersection", function(x, other, numPartitions = 1) {
  standardGeneric("intersection") })

-#' @rdname keys
-#' @export
+# @rdname keys
+# @export
 setGeneric("keys", function(x) { standardGeneric("keys") })

-#' @rdname lookup
-#' @export
+# @rdname lookup
+# @export
 setGeneric("lookup", function(x, key) { standardGeneric("lookup") })

-#' @rdname mapValues
-#' @export
+# @rdname mapValues
+# @export
 setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") })

-#' @rdname sampleByKey
-#' @export
+# @rdname sampleByKey
+# @export
 setGeneric("sampleByKey",
           function(x, withReplacement, fractions, seed) {
             standardGeneric("sampleByKey")
           })

-#' @rdname values
-#' @export
+# @rdname values
+# @export
 setGeneric("values", function(x) { standardGeneric("values") })


 ############ Shuffle Functions ############

-#' @rdname aggregateByKey
-#' @seealso foldByKey, combineByKey
-#' @export
+# @rdname aggregateByKey
+# @seealso foldByKey, combineByKey
+# @export
 setGeneric("aggregateByKey",
           function(x, zeroValue, seqOp, combOp, numPartitions) {
             standardGeneric("aggregateByKey")
           })

-#' @rdname cogroup
-#' @export
+# @rdname cogroup
+# @export
 setGeneric("cogroup",
           function(..., numPartitions) {
             standardGeneric("cogroup")
           },
           signature = "...")

-#' @rdname combineByKey
-#' @seealso groupByKey, reduceByKey
-#' @export
+# @rdname combineByKey
+# @seealso groupByKey, reduceByKey
+# @export
 setGeneric("combineByKey",
           function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) {
             standardGeneric("combineByKey")
           })

-#' @rdname foldByKey
-#' @seealso aggregateByKey, combineByKey
-#' @export
+# @rdname foldByKey
+# @seealso aggregateByKey, combineByKey
+# @export
 setGeneric("foldByKey",
           function(x, zeroValue, func, numPartitions) {
             standardGeneric("foldByKey")
           })

-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") })

-#' @rdname groupByKey
-#' @seealso reduceByKey
-#' @export
+# @rdname groupByKey
+# @seealso reduceByKey
+# @export
 setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") })

-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("join", function(x, y, ...) { standardGeneric("join") })

-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") })

-#' @rdname partitionBy
-#' @export
+# @rdname partitionBy
+# @export
 setGeneric("partitionBy", function(x, numPartitions, ...) { standardGeneric("partitionBy") })

-#' @rdname reduceByKey
-#' @seealso groupByKey
-#' @export
+# @rdname reduceByKey
+# @seealso groupByKey
+# @export
 setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")})

-#' @rdname reduceByKeyLocally
-#' @seealso reduceByKey
-#' @export
+# @rdname reduceByKeyLocally
+# @seealso reduceByKey
+# @export
 setGeneric("reduceByKeyLocally",
           function(x, combineFunc) {
             standardGeneric("reduceByKeyLocally")
           })

-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") })

-#' @rdname sortByKey
-#' @export
+# @rdname sortByKey
+# @export
 setGeneric("sortByKey",
           function(x, ascending = TRUE, numPartitions = 1) {
             standardGeneric("sortByKey")
           })

-#' @rdname subtract
-#' @export
+# @rdname subtract
+# @export
 setGeneric("subtract",
           function(x, other, numPartitions = 1) {
             standardGeneric("subtract")
           })

-#' @rdname subtractByKey
-#' @export
+# @rdname subtractByKey
+# @export
 setGeneric("subtractByKey", 
           function(x, other, numPartitions = 1) {
             standardGeneric("subtractByKey")
@ -372,8 +372,8 @@ setGeneric("subtractByKey",

 ################### Broadcast Variable Methods #################

-#' @rdname broadcast
-#' @export
+# @rdname broadcast
+# @export
 setGeneric("value", function(bcast) { standardGeneric("value") })


@ -477,8 +477,8 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
 #' @export
 setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") })

-#' @rdname tojson
-#' @export
+# @rdname tojson
+# @export
 setGeneric("toJSON", function(x) { standardGeneric("toJSON") })

 #' @rdname DataFrame
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R