1cbdd89918
This exposes the SparkR:::glm() and SparkR:::predict() APIs. It was necessary to change RFormula to silently drop the label column if it was missing from the input dataset, which is kind of a hack but necessary to integrate with the Pipeline API. The umbrella design doc for MLlib + SparkR integration can be viewed here: https://docs.google.com/document/d/10NZNSEurN2EdWM31uFYsgayIPfCFHiuIu3pCWrUmP_c/edit mengxr Author: Eric Liang <ekl@databricks.com> Closes #7483 from ericl/spark-8774 and squashes the following commits: 3dfac0c [Eric Liang] update 17ef516 [Eric Liang] more comments 1753a0f [Eric Liang] make glm generic b0f50f8 [Eric Liang] equivalence test 550d56d [Eric Liang] export methods c015697 [Eric Liang] second pass 117949a [Eric Liang] comments 5afbc67 [Eric Liang] test label columns 6b7f15f [Eric Liang] Fri Jul 17 14:20:22 PDT 2015 3a63ae5 [Eric Liang] Fri Jul 17 13:41:52 PDT 2015 ce61367 [Eric Liang] Fri Jul 17 13:41:17 PDT 2015 0299c59 [Eric Liang] Fri Jul 17 13:40:32 PDT 2015 e37603f [Eric Liang] Fri Jul 17 12:15:03 PDT 2015 d417d0c [Eric Liang] Merge remote-tracking branch 'upstream/master' into spark-8774 29a2ce7 [Eric Liang] Merge branch 'spark-8774-1' into spark-8774 d1959d2 [Eric Liang] clarify comment 2db68aa [Eric Liang] second round of comments dc3c943 [Eric Liang] address comments 5765ec6 [Eric Liang] fix style checks 1f361b0 [Eric Liang] doc d33211b [Eric Liang] r support fb0826b [Eric Liang] [SPARK-8774] Add R model formula with basic support as a transformer
158 lines
3.5 KiB
Plaintext
158 lines
3.5 KiB
Plaintext
# Imports from base R
|
|
importFrom(methods, setGeneric, setMethod, setOldClass)
|
|
|
|
# Disable native libraries till we figure out how to package it
|
|
# See SPARKR-7839
|
|
#useDynLib(SparkR, stringHashCode)
|
|
|
|
# S3 methods exported
|
|
export("sparkR.init")
|
|
export("sparkR.stop")
|
|
export("print.jobj")
|
|
|
|
# MLlib integration
|
|
exportMethods("glm",
|
|
"predict")
|
|
|
|
# Job group lifecycle management methods
|
|
export("setJobGroup",
|
|
"clearJobGroup",
|
|
"cancelJobGroup")
|
|
|
|
exportClasses("DataFrame")
|
|
|
|
exportMethods("arrange",
|
|
"cache",
|
|
"collect",
|
|
"columns",
|
|
"count",
|
|
"describe",
|
|
"distinct",
|
|
"dropna",
|
|
"dtypes",
|
|
"except",
|
|
"explain",
|
|
"fillna",
|
|
"filter",
|
|
"first",
|
|
"group_by",
|
|
"groupBy",
|
|
"head",
|
|
"insertInto",
|
|
"intersect",
|
|
"isLocal",
|
|
"join",
|
|
"limit",
|
|
"orderBy",
|
|
"mutate",
|
|
"names",
|
|
"persist",
|
|
"printSchema",
|
|
"registerTempTable",
|
|
"rename",
|
|
"repartition",
|
|
"sample",
|
|
"sample_frac",
|
|
"saveAsParquetFile",
|
|
"saveAsTable",
|
|
"saveDF",
|
|
"schema",
|
|
"select",
|
|
"selectExpr",
|
|
"show",
|
|
"showDF",
|
|
"summarize",
|
|
"take",
|
|
"unionAll",
|
|
"unpersist",
|
|
"where",
|
|
"withColumn",
|
|
"withColumnRenamed",
|
|
"write.df")
|
|
|
|
exportClasses("Column")
|
|
|
|
exportMethods("abs",
|
|
"acos",
|
|
"alias",
|
|
"approxCountDistinct",
|
|
"asc",
|
|
"asin",
|
|
"atan",
|
|
"atan2",
|
|
"avg",
|
|
"between",
|
|
"cast",
|
|
"cbrt",
|
|
"ceiling",
|
|
"contains",
|
|
"cos",
|
|
"cosh",
|
|
"countDistinct",
|
|
"desc",
|
|
"endsWith",
|
|
"exp",
|
|
"expm1",
|
|
"floor",
|
|
"getField",
|
|
"getItem",
|
|
"hypot",
|
|
"isNotNull",
|
|
"isNull",
|
|
"last",
|
|
"like",
|
|
"log",
|
|
"log10",
|
|
"log1p",
|
|
"lower",
|
|
"max",
|
|
"mean",
|
|
"min",
|
|
"n",
|
|
"n_distinct",
|
|
"rint",
|
|
"rlike",
|
|
"sign",
|
|
"sin",
|
|
"sinh",
|
|
"sqrt",
|
|
"startsWith",
|
|
"substr",
|
|
"sum",
|
|
"sumDistinct",
|
|
"tan",
|
|
"tanh",
|
|
"toDegrees",
|
|
"toRadians",
|
|
"upper")
|
|
|
|
exportClasses("GroupedData")
|
|
exportMethods("agg")
|
|
|
|
export("sparkRSQL.init",
|
|
"sparkRHive.init")
|
|
|
|
export("cacheTable",
|
|
"clearCache",
|
|
"createDataFrame",
|
|
"createExternalTable",
|
|
"dropTempTable",
|
|
"jsonFile",
|
|
"loadDF",
|
|
"parquetFile",
|
|
"read.df",
|
|
"sql",
|
|
"table",
|
|
"tableNames",
|
|
"tables",
|
|
"uncacheTable")
|
|
|
|
export("structField",
|
|
"structField.jobj",
|
|
"structField.character",
|
|
"print.structField",
|
|
"structType",
|
|
"structType.jobj",
|
|
"structType.structField",
|
|
"print.structType")
|