[SPARK-34747][SQL][DOCS] Add virtual operators to the built-in function document

### What changes were proposed in this pull request?

This PR fix an issue that virtual operators (`||`, `!=`, `<>`, `between` and `case`) are absent from the Spark SQL Built-in functions document.

### Why are the changes needed?

The document should explain about all the supported built-in operators.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Built the document with `SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_PYTHONDOC=1 bundler exec jekyll build` and then, confirmed the document.

![neq1](https://user-images.githubusercontent.com/4736016/111192859-e2e76380-85fc-11eb-89c9-75916a5e856a.png)
![neq2](https://user-images.githubusercontent.com/4736016/111192874-e7ac1780-85fc-11eb-9a9b-c504265b373f.png)
![between](https://user-images.githubusercontent.com/4736016/111192898-eda1f880-85fc-11eb-992d-cf80c544ec27.png)
![case](https://user-images.githubusercontent.com/4736016/111192918-f266ac80-85fc-11eb-9306-5dbc413a0cdb.png)
![double_pipe](https://user-images.githubusercontent.com/4736016/111192952-fb577e00-85fc-11eb-932e-385e5c2a5205.png)

Closes #31841 from sarutak/builtin-op-doc.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Kousuke Saruta 2021-03-19 10:19:26 +09:00 committed by HyukjinKwon
parent 8207e2f65c
commit 07ee73234f
2 changed files with 105 additions and 3 deletions

View file

@ -229,8 +229,10 @@ case class ShowFunctionsCommand(
case (f, "USER") if showUserFunctions => f.unquotedString
case (f, "SYSTEM") if showSystemFunctions => f.unquotedString
}
// Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions.
// "<>", "!=", "between", and "case" is SystemFunctions, only show when showSystemFunctions=true
// Hard code "<>", "!=", "between", "case", and "||"
// for now as there is no corresponding functions.
// "<>", "!=", "between", "case", and "||" is SystemFunctions,
// only show when showSystemFunctions=true
if (showSystemFunctions) {
(functionNames ++
StringUtils.filterPattern(FunctionsCommand.virtualOperators, pattern.getOrElse("*")))

View file

@ -24,6 +24,106 @@ from pyspark.java_gateway import launch_gateway
ExpressionInfo = namedtuple(
"ExpressionInfo", "className name usage arguments examples note since deprecated")
_virtual_operator_infos = [
ExpressionInfo(
className="",
name="!=",
usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " +
"or false otherwise.",
arguments="\n Arguments:\n " +
"""* expr1, expr2 - the two expressions must be same type or can be casted to
a common type, and must be a type that can be used in equality comparison.
Map type is not supported. For complex types such array/struct,
the data types of fields must be orderable.""",
examples="\n Examples:\n " +
"> SELECT 1 != 2;\n " +
" true\n " +
"> SELECT 1 != '2';\n " +
" true\n " +
"> SELECT true != NULL;\n " +
" NULL\n " +
"> SELECT NULL != NULL;\n " +
" NULL",
note="",
since="1.0.0",
deprecated=""),
ExpressionInfo(
className="",
name="<>",
usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " +
"or false otherwise.",
arguments="\n Arguments:\n " +
"""* expr1, expr2 - the two expressions must be same type or can be casted to
a common type, and must be a type that can be used in equality comparison.
Map type is not supported. For complex types such array/struct,
the data types of fields must be orderable.""",
examples="\n Examples:\n " +
"> SELECT 1 != 2;\n " +
" true\n " +
"> SELECT 1 != '2';\n " +
" true\n " +
"> SELECT true != NULL;\n " +
" NULL\n " +
"> SELECT NULL != NULL;\n " +
" NULL",
note="",
since="1.0.0",
deprecated=""),
ExpressionInfo(
className="",
name="between",
usage="expr1 [NOT] BETWEEN expr2 AND expr3 - " +
"evaluate if `expr1` is [not] in between `expr2` and `expr3`.",
arguments="",
examples="\n Examples:\n " +
"> SELECT col1 FROM VALUES 1, 3, 5, 7 WHERE col1 BETWEEN 2 AND 5;\n " +
" 3\n " +
" 5",
note="",
since="1.0.0",
deprecated=""),
ExpressionInfo(
className="",
name="case",
usage="CASE expr1 WHEN expr2 THEN expr3 " +
"[WHEN expr4 THEN expr5]* [ELSE expr6] END - " +
"When `expr1` = `expr2`, returns `expr3`; " +
"when `expr1` = `expr4`, return `expr5`; else return `expr6`.",
arguments="\n Arguments:\n " +
"* expr1 - the expression which is one operand of comparison.\n " +
"* expr2, expr4 - the expressions each of which is the other " +
" operand of comparison.\n " +
"* expr3, expr5, expr6 - the branch value expressions and else value expression" +
" should all be same type or coercible to a common type.",
examples="\n Examples:\n " +
"> SELECT CASE col1 WHEN 1 THEN 'one' " +
"WHEN 2 THEN 'two' ELSE '?' END FROM VALUES 1, 2, 3;\n " +
" one\n " +
" two\n " +
" ?\n " +
"> SELECT CASE col1 WHEN 1 THEN 'one' " +
"WHEN 2 THEN 'two' END FROM VALUES 1, 2, 3;\n " +
" one\n " +
" two\n " +
" NULL",
note="",
since="1.0.1",
deprecated=""),
ExpressionInfo(
className="",
name="||",
usage="expr1 || expr2 - Returns the concatenation of `expr1` and `expr2`.",
arguments="",
examples="\n Examples:\n " +
"> SELECT 'Spark' || 'SQL';\n " +
" SparkSQL\n " +
"> SELECT array(1, 2, 3) || array(4, 5) || array(6);\n " +
" [1,2,3,4,5,6]",
note="\n || for arrays is available since 2.4.0.\n",
since="2.3.0",
deprecated="")
]
def _list_function_infos(jvm):
"""
@ -32,7 +132,7 @@ def _list_function_infos(jvm):
"""
jinfos = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos()
infos = []
infos = _virtual_operator_infos
for jinfo in jinfos:
name = jinfo.getName()
usage = jinfo.getUsage()