2f5cbd860e
Add `crosstab` to SparkR DataFrames, which takes two column names and returns a local R data.frame. This is similar to `table` in R. However, `table` in SparkR is used for loading SQL tables as DataFrames. The return type is data.frame instead table for `crosstab` to be compatible with Scala/Python. I couldn't run R tests successfully on my local. Many unit tests failed. So let's try Jenkins. Author: Xiangrui Meng <meng@databricks.com> Closes #7318 from mengxr/SPARK-8364 and squashes the following commits: d75e894 [Xiangrui Meng] fix tests 53f6ddd [Xiangrui Meng] fix tests f1348d6 [Xiangrui Meng] update test 47cb088 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-8364 5621262 [Xiangrui Meng] first version without test
159 lines
3.5 KiB
Plaintext
159 lines
3.5 KiB
Plaintext
# Imports from base R
|
|
importFrom(methods, setGeneric, setMethod, setOldClass)
|
|
|
|
# Disable native libraries till we figure out how to package it
|
|
# See SPARKR-7839
|
|
#useDynLib(SparkR, stringHashCode)
|
|
|
|
# S3 methods exported
|
|
export("sparkR.init")
|
|
export("sparkR.stop")
|
|
export("print.jobj")
|
|
|
|
# MLlib integration
|
|
exportMethods("glm",
|
|
"predict")
|
|
|
|
# Job group lifecycle management methods
|
|
export("setJobGroup",
|
|
"clearJobGroup",
|
|
"cancelJobGroup")
|
|
|
|
exportClasses("DataFrame")
|
|
|
|
exportMethods("arrange",
|
|
"cache",
|
|
"collect",
|
|
"columns",
|
|
"count",
|
|
"crosstab",
|
|
"describe",
|
|
"distinct",
|
|
"dropna",
|
|
"dtypes",
|
|
"except",
|
|
"explain",
|
|
"fillna",
|
|
"filter",
|
|
"first",
|
|
"group_by",
|
|
"groupBy",
|
|
"head",
|
|
"insertInto",
|
|
"intersect",
|
|
"isLocal",
|
|
"join",
|
|
"limit",
|
|
"orderBy",
|
|
"mutate",
|
|
"names",
|
|
"persist",
|
|
"printSchema",
|
|
"registerTempTable",
|
|
"rename",
|
|
"repartition",
|
|
"sample",
|
|
"sample_frac",
|
|
"saveAsParquetFile",
|
|
"saveAsTable",
|
|
"saveDF",
|
|
"schema",
|
|
"select",
|
|
"selectExpr",
|
|
"show",
|
|
"showDF",
|
|
"summarize",
|
|
"take",
|
|
"unionAll",
|
|
"unpersist",
|
|
"where",
|
|
"withColumn",
|
|
"withColumnRenamed",
|
|
"write.df")
|
|
|
|
exportClasses("Column")
|
|
|
|
exportMethods("abs",
|
|
"acos",
|
|
"alias",
|
|
"approxCountDistinct",
|
|
"asc",
|
|
"asin",
|
|
"atan",
|
|
"atan2",
|
|
"avg",
|
|
"between",
|
|
"cast",
|
|
"cbrt",
|
|
"ceiling",
|
|
"contains",
|
|
"cos",
|
|
"cosh",
|
|
"countDistinct",
|
|
"desc",
|
|
"endsWith",
|
|
"exp",
|
|
"expm1",
|
|
"floor",
|
|
"getField",
|
|
"getItem",
|
|
"hypot",
|
|
"isNotNull",
|
|
"isNull",
|
|
"last",
|
|
"like",
|
|
"log",
|
|
"log10",
|
|
"log1p",
|
|
"lower",
|
|
"max",
|
|
"mean",
|
|
"min",
|
|
"n",
|
|
"n_distinct",
|
|
"rint",
|
|
"rlike",
|
|
"sign",
|
|
"sin",
|
|
"sinh",
|
|
"sqrt",
|
|
"startsWith",
|
|
"substr",
|
|
"sum",
|
|
"sumDistinct",
|
|
"tan",
|
|
"tanh",
|
|
"toDegrees",
|
|
"toRadians",
|
|
"upper")
|
|
|
|
exportClasses("GroupedData")
|
|
exportMethods("agg")
|
|
|
|
export("sparkRSQL.init",
|
|
"sparkRHive.init")
|
|
|
|
export("cacheTable",
|
|
"clearCache",
|
|
"createDataFrame",
|
|
"createExternalTable",
|
|
"dropTempTable",
|
|
"jsonFile",
|
|
"loadDF",
|
|
"parquetFile",
|
|
"read.df",
|
|
"sql",
|
|
"table",
|
|
"tableNames",
|
|
"tables",
|
|
"uncacheTable")
|
|
|
|
export("structField",
|
|
"structField.jobj",
|
|
"structField.character",
|
|
"print.structField",
|
|
"structType",
|
|
"structType.jobj",
|
|
"structType.structField",
|
|
"print.structType")
|