spark-instrumented-optimizer/R/pkg/NAMESPACE
zero323 e83d03ca48 [SPARK-33040][R][ML] Add SparkR wrapper for vector_to_array
### What changes were proposed in this pull request?

Add SparkR wrapper for `o.a.s.ml.functions.vector_to_array`

### Why are the changes needed?

- Currently ML vectors, including predictions, are almost inaccessible to R users. That's is a serious loss of functionality.
- Feature parity.

### Does this PR introduce _any_ user-facing change?

Yes, new R function is added.

### How was this patch tested?

- New unit tests.
- Manual verification.

Closes #29917 from zero323/SPARK-33040.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
2020-10-05 13:18:12 +09:00

533 lines
14 KiB
Plaintext

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Imports from base R
# Do not include stats:: "rpois", "runif" - causes error at runtime
importFrom("methods", "setGeneric", "setMethod", "setOldClass")
importFrom("methods", "is", "new", "signature", "show")
importFrom("stats", "gaussian", "setNames")
importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "untar")
# Disable native libraries till we figure out how to package it
# See SPARKR-7839
#useDynLib(SparkR, stringHashCode)
# S3 methods exported
export("sparkR.session")
export("sparkR.init")
export("sparkR.session.stop")
export("sparkR.stop")
export("sparkR.conf")
export("sparkR.version")
export("sparkR.uiWebUrl")
export("print.jobj")
export("sparkR.newJObject")
export("sparkR.callJMethod")
export("sparkR.callJStatic")
export("install.spark")
export("sparkRSQL.init",
"sparkRHive.init")
# MLlib integration
exportMethods("glm",
"spark.glm",
"predict",
"summary",
"spark.kmeans",
"fitted",
"spark.mlp",
"spark.naiveBayes",
"spark.survreg",
"spark.lda",
"spark.posterior",
"spark.perplexity",
"spark.isoreg",
"spark.gaussianMixture",
"spark.als",
"spark.kstest",
"spark.logit",
"spark.decisionTree",
"spark.randomForest",
"spark.gbt",
"spark.bisectingKmeans",
"spark.svmLinear",
"spark.fpGrowth",
"spark.freqItemsets",
"spark.associationRules",
"spark.findFrequentSequentialPatterns",
"spark.assignClusters",
"spark.fmClassifier",
"spark.lm",
"spark.fmRegressor")
# Job group lifecycle management methods
export("setJobGroup",
"clearJobGroup",
"cancelJobGroup",
"setJobDescription",
"setLocalProperty",
"getLocalProperty")
# Export Utility methods
export("setLogLevel")
exportClasses("SparkDataFrame")
exportMethods("arrange",
"as.data.frame",
"attach",
"broadcast",
"cache",
"checkpoint",
"coalesce",
"collect",
"colnames",
"colnames<-",
"coltypes",
"coltypes<-",
"columns",
"count",
"cov",
"corr",
"covar_samp",
"covar_pop",
"createOrReplaceTempView",
"crossJoin",
"crosstab",
"cube",
"dapply",
"dapplyCollect",
"describe",
"dim",
"distinct",
"drop",
"dropDuplicates",
"dropna",
"dtypes",
"except",
"exceptAll",
"explain",
"fillna",
"filter",
"first",
"freqItems",
"gapply",
"gapplyCollect",
"getNumPartitions",
"group_by",
"groupBy",
"head",
"hint",
"insertInto",
"intersect",
"intersectAll",
"isLocal",
"isStreaming",
"join",
"limit",
"localCheckpoint",
"merge",
"mutate",
"na.omit",
"names",
"names<-",
"ncol",
"nrow",
"orderBy",
"persist",
"printSchema",
"randomSplit",
"rbind",
"registerTempTable",
"rename",
"repartition",
"repartitionByRange",
"rollup",
"sample",
"sample_frac",
"sampleBy",
"saveAsTable",
"saveDF",
"schema",
"select",
"selectExpr",
"show",
"showDF",
"storageLevel",
"subset",
"summarize",
"summary",
"take",
"toJSON",
"transform",
"union",
"unionAll",
"unionByName",
"unique",
"unpersist",
"where",
"with",
"withColumn",
"withColumnRenamed",
"withWatermark",
"write.df",
"write.jdbc",
"write.json",
"write.orc",
"write.parquet",
"write.stream",
"write.text",
"write.ml")
exportClasses("Column")
exportMethods("%<=>%",
"%in%",
"abs",
"acos",
"add_months",
"alias",
"approx_count_distinct",
"approxCountDistinct",
"approxQuantile",
"array_aggregate",
"array_contains",
"array_distinct",
"array_except",
"array_exists",
"array_filter",
"array_forall",
"array_intersect",
"array_join",
"array_max",
"array_min",
"array_position",
"array_remove",
"array_repeat",
"array_sort",
"array_transform",
"arrays_overlap",
"array_union",
"arrays_zip",
"arrays_zip_with",
"asc",
"ascii",
"asin",
"atan",
"atan2",
"avg",
"base64",
"between",
"bin",
"bitwiseNOT",
"bround",
"cast",
"cbrt",
"ceil",
"ceiling",
"collect_list",
"collect_set",
"column",
"concat",
"concat_ws",
"contains",
"conv",
"cos",
"cosh",
"count",
"countDistinct",
"crc32",
"create_array",
"create_map",
"current_date",
"current_timestamp",
"hash",
"cume_dist",
"date_add",
"date_format",
"date_sub",
"date_trunc",
"datediff",
"dayofmonth",
"dayofweek",
"dayofyear",
"decode",
"degrees",
"dense_rank",
"desc",
"element_at",
"encode",
"endsWith",
"exp",
"explode",
"explode_outer",
"expm1",
"expr",
"factorial",
"first",
"flatten",
"floor",
"format_number",
"format_string",
"from_csv",
"from_json",
"from_unixtime",
"from_utc_timestamp",
"getField",
"getItem",
"greatest",
"grouping_bit",
"grouping_id",
"hex",
"histogram",
"hour",
"hypot",
"ifelse",
"initcap",
"input_file_name",
"instr",
"isNaN",
"isNotNull",
"isNull",
"is.nan",
"isnan",
"kurtosis",
"lag",
"last",
"last_day",
"lead",
"least",
"length",
"levenshtein",
"like",
"lit",
"locate",
"log",
"log10",
"log1p",
"log2",
"lower",
"lpad",
"ltrim",
"map_concat",
"map_entries",
"map_filter",
"map_from_arrays",
"map_from_entries",
"map_keys",
"map_values",
"map_zip_with",
"max",
"md5",
"mean",
"min",
"minute",
"monotonically_increasing_id",
"month",
"months_between",
"n",
"n_distinct",
"nanvl",
"negate",
"next_day",
"not",
"nth_value",
"ntile",
"otherwise",
"over",
"overlay",
"percent_rank",
"percentile_approx",
"pmod",
"posexplode",
"posexplode_outer",
"quarter",
"radians",
"rand",
"randn",
"rank",
"regexp_extract",
"regexp_replace",
"repeat_string",
"reverse",
"rint",
"rlike",
"round",
"row_number",
"rpad",
"rtrim",
"schema_of_csv",
"schema_of_json",
"second",
"sha1",
"sha2",
"shiftLeft",
"shiftRight",
"shiftRightUnsigned",
"shuffle",
"sd",
"sign",
"signum",
"sin",
"sinh",
"size",
"skewness",
"slice",
"sort_array",
"soundex",
"spark_partition_id",
"split_string",
"stddev",
"stddev_pop",
"stddev_samp",
"struct",
"sqrt",
"startsWith",
"substr",
"substring_index",
"sum",
"sumDistinct",
"tan",
"tanh",
"timestamp_seconds",
"toDegrees",
"toRadians",
"to_csv",
"to_date",
"to_json",
"to_timestamp",
"to_utc_timestamp",
"translate",
"transform_keys",
"transform_values",
"trim",
"trunc",
"unbase64",
"unhex",
"unix_timestamp",
"upper",
"var",
"variance",
"var_pop",
"var_samp",
"vector_to_array",
"weekofyear",
"when",
"window",
"withField",
"xxhash64",
"year")
exportClasses("GroupedData")
exportMethods("agg")
exportMethods("pivot")
export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
"createTable",
"currentDatabase",
"dropTempTable",
"dropTempView",
"listColumns",
"listDatabases",
"listFunctions",
"listTables",
"loadDF",
"read.df",
"read.jdbc",
"read.json",
"read.orc",
"read.parquet",
"read.stream",
"read.text",
"recoverPartitions",
"refreshByPath",
"refreshTable",
"setCheckpointDir",
"setCurrentDatabase",
"spark.lapply",
"spark.addFile",
"spark.getSparkFilesRootDirectory",
"spark.getSparkFiles",
"sql",
"str",
"tableToDF",
"tableNames",
"tables",
"uncacheTable",
"print.summary.GeneralizedLinearRegressionModel",
"read.ml",
"print.summary.KSTest",
"print.summary.DecisionTreeRegressionModel",
"print.summary.DecisionTreeClassificationModel",
"print.summary.RandomForestRegressionModel",
"print.summary.RandomForestClassificationModel",
"print.summary.GBTRegressionModel",
"print.summary.GBTClassificationModel")
export("structField",
"structField.jobj",
"structField.character",
"print.structField",
"structType",
"structType.character",
"structType.jobj",
"structType.structField",
"print.structType")
exportClasses("WindowSpec")
export("partitionBy",
"rowsBetween",
"rangeBetween")
export("windowPartitionBy",
"windowOrderBy")
exportClasses("StreamingQuery")
export("awaitTermination",
"isActive",
"lastProgress",
"queryName",
"status",
"stopQuery")
S3method(print, jobj)
S3method(print, structField)
S3method(print, structType)
S3method(print, summary.GeneralizedLinearRegressionModel)
S3method(print, summary.KSTest)
S3method(print, summary.DecisionTreeRegressionModel)
S3method(print, summary.DecisionTreeClassificationModel)
S3method(print, summary.RandomForestRegressionModel)
S3method(print, summary.RandomForestClassificationModel)
S3method(print, summary.GBTRegressionModel)
S3method(print, summary.GBTClassificationModel)
S3method(structField, character)
S3method(structField, jobj)
S3method(structType, character)
S3method(structType, jobj)
S3method(structType, structField)