From 07ee73234f1d1ecd1e5edcce3bc510c59a59cb00 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 19 Mar 2021 10:19:26 +0900 Subject: [PATCH] [SPARK-34747][SQL][DOCS] Add virtual operators to the built-in function document ### What changes were proposed in this pull request? This PR fix an issue that virtual operators (`||`, `!=`, `<>`, `between` and `case`) are absent from the Spark SQL Built-in functions document. ### Why are the changes needed? The document should explain about all the supported built-in operators. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Built the document with `SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_PYTHONDOC=1 bundler exec jekyll build` and then, confirmed the document. ![neq1](https://user-images.githubusercontent.com/4736016/111192859-e2e76380-85fc-11eb-89c9-75916a5e856a.png) ![neq2](https://user-images.githubusercontent.com/4736016/111192874-e7ac1780-85fc-11eb-9a9b-c504265b373f.png) ![between](https://user-images.githubusercontent.com/4736016/111192898-eda1f880-85fc-11eb-992d-cf80c544ec27.png) ![case](https://user-images.githubusercontent.com/4736016/111192918-f266ac80-85fc-11eb-9306-5dbc413a0cdb.png) ![double_pipe](https://user-images.githubusercontent.com/4736016/111192952-fb577e00-85fc-11eb-932e-385e5c2a5205.png) Closes #31841 from sarutak/builtin-op-doc. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .../sql/execution/command/functions.scala | 6 +- sql/gen-sql-api-docs.py | 102 +++++++++++++++++- 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index 25c88d6e63..af5ba4839e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -229,8 +229,10 @@ case class ShowFunctionsCommand( case (f, "USER") if showUserFunctions => f.unquotedString case (f, "SYSTEM") if showSystemFunctions => f.unquotedString } - // Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions. - // "<>", "!=", "between", and "case" is SystemFunctions, only show when showSystemFunctions=true + // Hard code "<>", "!=", "between", "case", and "||" + // for now as there is no corresponding functions. + // "<>", "!=", "between", "case", and "||" is SystemFunctions, + // only show when showSystemFunctions=true if (showSystemFunctions) { (functionNames ++ StringUtils.filterPattern(FunctionsCommand.virtualOperators, pattern.getOrElse("*"))) diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py index 2f734093b1..17631a7352 100644 --- a/sql/gen-sql-api-docs.py +++ b/sql/gen-sql-api-docs.py @@ -24,6 +24,106 @@ from pyspark.java_gateway import launch_gateway ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") +_virtual_operator_infos = [ + ExpressionInfo( + className="", + name="!=", + usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " + + "or false otherwise.", + arguments="\n Arguments:\n " + + """* expr1, expr2 - the two expressions must be same type or can be casted to + a common type, and must be a type that can be used in equality comparison. + Map type is not supported. For complex types such array/struct, + the data types of fields must be orderable.""", + examples="\n Examples:\n " + + "> SELECT 1 != 2;\n " + + " true\n " + + "> SELECT 1 != '2';\n " + + " true\n " + + "> SELECT true != NULL;\n " + + " NULL\n " + + "> SELECT NULL != NULL;\n " + + " NULL", + note="", + since="1.0.0", + deprecated=""), + ExpressionInfo( + className="", + name="<>", + usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " + + "or false otherwise.", + arguments="\n Arguments:\n " + + """* expr1, expr2 - the two expressions must be same type or can be casted to + a common type, and must be a type that can be used in equality comparison. + Map type is not supported. For complex types such array/struct, + the data types of fields must be orderable.""", + examples="\n Examples:\n " + + "> SELECT 1 != 2;\n " + + " true\n " + + "> SELECT 1 != '2';\n " + + " true\n " + + "> SELECT true != NULL;\n " + + " NULL\n " + + "> SELECT NULL != NULL;\n " + + " NULL", + note="", + since="1.0.0", + deprecated=""), + ExpressionInfo( + className="", + name="between", + usage="expr1 [NOT] BETWEEN expr2 AND expr3 - " + + "evaluate if `expr1` is [not] in between `expr2` and `expr3`.", + arguments="", + examples="\n Examples:\n " + + "> SELECT col1 FROM VALUES 1, 3, 5, 7 WHERE col1 BETWEEN 2 AND 5;\n " + + " 3\n " + + " 5", + note="", + since="1.0.0", + deprecated=""), + ExpressionInfo( + className="", + name="case", + usage="CASE expr1 WHEN expr2 THEN expr3 " + + "[WHEN expr4 THEN expr5]* [ELSE expr6] END - " + + "When `expr1` = `expr2`, returns `expr3`; " + + "when `expr1` = `expr4`, return `expr5`; else return `expr6`.", + arguments="\n Arguments:\n " + + "* expr1 - the expression which is one operand of comparison.\n " + + "* expr2, expr4 - the expressions each of which is the other " + + " operand of comparison.\n " + + "* expr3, expr5, expr6 - the branch value expressions and else value expression" + + " should all be same type or coercible to a common type.", + examples="\n Examples:\n " + + "> SELECT CASE col1 WHEN 1 THEN 'one' " + + "WHEN 2 THEN 'two' ELSE '?' END FROM VALUES 1, 2, 3;\n " + + " one\n " + + " two\n " + + " ?\n " + + "> SELECT CASE col1 WHEN 1 THEN 'one' " + + "WHEN 2 THEN 'two' END FROM VALUES 1, 2, 3;\n " + + " one\n " + + " two\n " + + " NULL", + note="", + since="1.0.1", + deprecated=""), + ExpressionInfo( + className="", + name="||", + usage="expr1 || expr2 - Returns the concatenation of `expr1` and `expr2`.", + arguments="", + examples="\n Examples:\n " + + "> SELECT 'Spark' || 'SQL';\n " + + " SparkSQL\n " + + "> SELECT array(1, 2, 3) || array(4, 5) || array(6);\n " + + " [1,2,3,4,5,6]", + note="\n || for arrays is available since 2.4.0.\n", + since="2.3.0", + deprecated="") +] + def _list_function_infos(jvm): """ @@ -32,7 +132,7 @@ def _list_function_infos(jvm): """ jinfos = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos() - infos = [] + infos = _virtual_operator_infos for jinfo in jinfos: name = jinfo.getName() usage = jinfo.getUsage()