[SPARK-16107][R] group glm methods in documentation

## What changes were proposed in this pull request?

This groups GLM methods (spark.glm, summary, print, predict and write.ml) in the documentation. The example code was updated.

## How was this patch tested?

N/A

(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

![screen shot 2016-06-21 at 2 31 37 pm](https://cloud.githubusercontent.com/assets/15318264/16247077/f6eafc04-37bc-11e6-89a8-7898ff3e4078.png)
![screen shot 2016-06-21 at 2 31 45 pm](https://cloud.githubusercontent.com/assets/15318264/16247078/f6eb1c16-37bc-11e6-940a-2b595b10617c.png)

Author: Junyang Qian <junyangq@databricks.com>
Author: Junyang Qian <junyangq@Junyangs-MacBook-Pro.local>

Closes #13820 from junyangq/SPARK-16107.
This commit is contained in:
Junyang Qian 2016-06-22 09:13:08 -07:00 committed by Xiangrui Meng
parent cf1995a976
commit ea3a12b014

View file

@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
#' @note KMeansModel since 2.0.0 #' @note KMeansModel since 2.0.0
setClass("KMeansModel", representation(jobj = "jobj")) setClass("KMeansModel", representation(jobj = "jobj"))
#' Fits a generalized linear model #' Generalized Linear Models
#' #'
#' Fits a generalized linear model against a Spark DataFrame. #' Fits generalized linear model against a Spark DataFrame. Users can print, make predictions on the
#' produced model and save the model to the input path.
#' #'
#' @param data SparkDataFrame for training. #' @param data SparkDataFrame for training.
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj"))
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
#' @param tol Positive convergence tolerance of iterations. #' @param tol Positive convergence tolerance of iterations.
#' @param maxIter Integer giving the maximal number of IRLS iterations. #' @param maxIter Integer giving the maximal number of IRLS iterations.
#' @return a fitted generalized linear model #' @return \code{spark.glm} returns a fitted generalized linear model
#' @rdname spark.glm #' @rdname spark.glm
#' @name spark.glm
#' @export #' @export
#' @examples #' @examples
#' \dontrun{ #' \dontrun{
@ -76,8 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj"))
#' df <- createDataFrame(iris) #' df <- createDataFrame(iris)
#' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian") #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
#' summary(model) #' summary(model)
#'
#' # fitted values on training data
#' fitted <- predict(model, df)
#' head(select(fitted, "Sepal_Length", "prediction"))
#'
#' # save fitted model to input path
#' path <- "path/to/model"
#' write.ml(model, path)
#'
#' # can also read back the saved model and print
#' savedModel <- read.ml(path)
#' summary(savedModel)
#' } #' }
#' @note spark.glm since 2.0.0 #' @note spark.glm since 2.0.0
#' @seealso \link{glm}, \link{read.ml}
setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) { function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) {
if (is.character(family)) { if (is.character(family)) {
@ -99,10 +114,9 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
return(new("GeneralizedLinearRegressionModel", jobj = jobj)) return(new("GeneralizedLinearRegressionModel", jobj = jobj))
}) })
#' Fits a generalized linear model (R-compliant). #' Generalized Linear Models (R-compliant)
#' #'
#' Fits a generalized linear model, similarly to R's glm(). #' Fits a generalized linear model, similarly to R's glm().
#'
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
#' operators are supported, including '~', '.', ':', '+', and '-'. #' operators are supported, including '~', '.', ':', '+', and '-'.
#' @param data SparkDataFrame for training. #' @param data SparkDataFrame for training.
@ -112,7 +126,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
#' @param epsilon Positive convergence tolerance of iterations. #' @param epsilon Positive convergence tolerance of iterations.
#' @param maxit Integer giving the maximal number of IRLS iterations. #' @param maxit Integer giving the maximal number of IRLS iterations.
#' @return a fitted generalized linear model #' @return \code{glm} returns a fitted generalized linear model.
#' @rdname glm #' @rdname glm
#' @export #' @export
#' @examples #' @examples
@ -124,24 +138,21 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
#' summary(model) #' summary(model)
#' } #' }
#' @note glm since 1.5.0 #' @note glm since 1.5.0
#' @seealso \link{spark.glm}
setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"), setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) { function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) {
spark.glm(data, formula, family, tol = epsilon, maxIter = maxit) spark.glm(data, formula, family, tol = epsilon, maxIter = maxit)
}) })
#' Get the summary of a generalized linear model # Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
#'
#' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
#' #'
#' @param object A fitted generalized linear model #' @param object A fitted generalized linear model
#' @return coefficients the model's coefficients, intercept #' @return \code{summary} returns a summary object of the fitted model, a list of components
#' @rdname summary #' including at least the coefficients, null/residual deviance, null/residual degrees
#' of freedom, AIC and number of iterations IRLS takes.
#'
#' @rdname spark.glm
#' @export #' @export
#' @examples
#' \dontrun{
#' model <- glm(y ~ x, trainingData)
#' summary(model)
#' }
#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
function(object, ...) { function(object, ...) {
@ -173,10 +184,10 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
return(ans) return(ans)
}) })
#' Print the summary of GeneralizedLinearRegressionModel # Prints the summary of GeneralizedLinearRegressionModel
#' #'
#' @rdname print #' @rdname spark.glm
#' @name print.summary.GeneralizedLinearRegressionModel #' @param x Summary object of fitted generalized linear model returned by \code{summary} function
#' @export #' @export
#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@ -205,22 +216,13 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
invisible(x) invisible(x)
} }
#' Predicted values based on model # Makes predictions from a generalized linear model produced by glm() or spark.glm(),
# similarly to R's predict().
#' #'
#' Makes predictions from a generalized linear model produced by glm() or spark.glm(),
#' similarly to R's predict().
#'
#' @param object A fitted generalized linear model
#' @param newData SparkDataFrame for testing #' @param newData SparkDataFrame for testing
#' @return SparkDataFrame containing predicted labels in a column named "prediction" #' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction"
#' @rdname predict #' @rdname spark.glm
#' @export #' @export
#' @examples
#' \dontrun{
#' model <- glm(y ~ x, trainingData)
#' predicted <- predict(model, testData)
#' showDF(predicted)
#' }
#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
function(object, newData) { function(object, newData) {
@ -471,24 +473,14 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
invisible(callJMethod(writer, "save", path)) invisible(callJMethod(writer, "save", path))
}) })
#' Save fitted MLlib model to the input path # Saves the generalized linear model to the input path.
#' #'
#' Save the generalized linear model to the input path.
#'
#' @param object A fitted generalized linear model
#' @param path The directory where the model is saved #' @param path The directory where the model is saved
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists. #' which means throw exception if the output path exists.
#' #'
#' @rdname write.ml #' @rdname spark.glm
#' @name write.ml
#' @export #' @export
#' @examples
#' \dontrun{
#' model <- glm(y ~ x, trainingData)
#' path <- "path/to/model"
#' write.ml(model, path)
#' }
#' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0 #' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"), setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
function(object, path, overwrite = FALSE) { function(object, path, overwrite = FALSE) {