2017-07-26 12:38:51 -04:00
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
|
|
|
|
import os
|
|
|
|
from collections import namedtuple
|
|
|
|
|
2020-02-09 05:20:47 -05:00
|
|
|
from pyspark.java_gateway import launch_gateway
|
|
|
|
|
2020-04-20 21:55:13 -04:00
|
|
|
|
2017-08-05 13:10:56 -04:00
|
|
|
ExpressionInfo = namedtuple(
|
2019-04-09 01:49:42 -04:00
|
|
|
"ExpressionInfo", "className name usage arguments examples note since deprecated")
|
2017-07-26 12:38:51 -04:00
|
|
|
|
2021-03-18 21:19:26 -04:00
|
|
|
_virtual_operator_infos = [
|
|
|
|
ExpressionInfo(
|
|
|
|
className="",
|
|
|
|
name="!=",
|
|
|
|
usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " +
|
|
|
|
"or false otherwise.",
|
|
|
|
arguments="\n Arguments:\n " +
|
|
|
|
"""* expr1, expr2 - the two expressions must be same type or can be casted to
|
|
|
|
a common type, and must be a type that can be used in equality comparison.
|
|
|
|
Map type is not supported. For complex types such array/struct,
|
|
|
|
the data types of fields must be orderable.""",
|
|
|
|
examples="\n Examples:\n " +
|
|
|
|
"> SELECT 1 != 2;\n " +
|
|
|
|
" true\n " +
|
|
|
|
"> SELECT 1 != '2';\n " +
|
|
|
|
" true\n " +
|
|
|
|
"> SELECT true != NULL;\n " +
|
|
|
|
" NULL\n " +
|
|
|
|
"> SELECT NULL != NULL;\n " +
|
|
|
|
" NULL",
|
|
|
|
note="",
|
|
|
|
since="1.0.0",
|
|
|
|
deprecated=""),
|
|
|
|
ExpressionInfo(
|
|
|
|
className="",
|
|
|
|
name="<>",
|
|
|
|
usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " +
|
|
|
|
"or false otherwise.",
|
|
|
|
arguments="\n Arguments:\n " +
|
|
|
|
"""* expr1, expr2 - the two expressions must be same type or can be casted to
|
|
|
|
a common type, and must be a type that can be used in equality comparison.
|
|
|
|
Map type is not supported. For complex types such array/struct,
|
|
|
|
the data types of fields must be orderable.""",
|
|
|
|
examples="\n Examples:\n " +
|
|
|
|
"> SELECT 1 != 2;\n " +
|
|
|
|
" true\n " +
|
|
|
|
"> SELECT 1 != '2';\n " +
|
|
|
|
" true\n " +
|
|
|
|
"> SELECT true != NULL;\n " +
|
|
|
|
" NULL\n " +
|
|
|
|
"> SELECT NULL != NULL;\n " +
|
|
|
|
" NULL",
|
|
|
|
note="",
|
|
|
|
since="1.0.0",
|
|
|
|
deprecated=""),
|
|
|
|
ExpressionInfo(
|
|
|
|
className="",
|
|
|
|
name="between",
|
|
|
|
usage="expr1 [NOT] BETWEEN expr2 AND expr3 - " +
|
|
|
|
"evaluate if `expr1` is [not] in between `expr2` and `expr3`.",
|
|
|
|
arguments="",
|
|
|
|
examples="\n Examples:\n " +
|
|
|
|
"> SELECT col1 FROM VALUES 1, 3, 5, 7 WHERE col1 BETWEEN 2 AND 5;\n " +
|
|
|
|
" 3\n " +
|
|
|
|
" 5",
|
|
|
|
note="",
|
|
|
|
since="1.0.0",
|
|
|
|
deprecated=""),
|
|
|
|
ExpressionInfo(
|
|
|
|
className="",
|
|
|
|
name="case",
|
|
|
|
usage="CASE expr1 WHEN expr2 THEN expr3 " +
|
|
|
|
"[WHEN expr4 THEN expr5]* [ELSE expr6] END - " +
|
|
|
|
"When `expr1` = `expr2`, returns `expr3`; " +
|
|
|
|
"when `expr1` = `expr4`, return `expr5`; else return `expr6`.",
|
|
|
|
arguments="\n Arguments:\n " +
|
|
|
|
"* expr1 - the expression which is one operand of comparison.\n " +
|
|
|
|
"* expr2, expr4 - the expressions each of which is the other " +
|
|
|
|
" operand of comparison.\n " +
|
|
|
|
"* expr3, expr5, expr6 - the branch value expressions and else value expression" +
|
|
|
|
" should all be same type or coercible to a common type.",
|
|
|
|
examples="\n Examples:\n " +
|
|
|
|
"> SELECT CASE col1 WHEN 1 THEN 'one' " +
|
|
|
|
"WHEN 2 THEN 'two' ELSE '?' END FROM VALUES 1, 2, 3;\n " +
|
|
|
|
" one\n " +
|
|
|
|
" two\n " +
|
|
|
|
" ?\n " +
|
|
|
|
"> SELECT CASE col1 WHEN 1 THEN 'one' " +
|
|
|
|
"WHEN 2 THEN 'two' END FROM VALUES 1, 2, 3;\n " +
|
|
|
|
" one\n " +
|
|
|
|
" two\n " +
|
|
|
|
" NULL",
|
|
|
|
note="",
|
|
|
|
since="1.0.1",
|
|
|
|
deprecated=""),
|
|
|
|
ExpressionInfo(
|
|
|
|
className="",
|
|
|
|
name="||",
|
|
|
|
usage="expr1 || expr2 - Returns the concatenation of `expr1` and `expr2`.",
|
|
|
|
arguments="",
|
|
|
|
examples="\n Examples:\n " +
|
|
|
|
"> SELECT 'Spark' || 'SQL';\n " +
|
|
|
|
" SparkSQL\n " +
|
|
|
|
"> SELECT array(1, 2, 3) || array(4, 5) || array(6);\n " +
|
|
|
|
" [1,2,3,4,5,6]",
|
|
|
|
note="\n || for arrays is available since 2.4.0.\n",
|
|
|
|
since="2.3.0",
|
|
|
|
deprecated="")
|
|
|
|
]
|
|
|
|
|
2017-07-26 12:38:51 -04:00
|
|
|
|
|
|
|
def _list_function_infos(jvm):
|
|
|
|
"""
|
|
|
|
Returns a list of function information via JVM. Sorts wrapped expression infos by name
|
|
|
|
and returns them.
|
|
|
|
"""
|
|
|
|
|
|
|
|
jinfos = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos()
|
2021-03-18 21:19:26 -04:00
|
|
|
infos = _virtual_operator_infos
|
2017-07-26 12:38:51 -04:00
|
|
|
for jinfo in jinfos:
|
|
|
|
name = jinfo.getName()
|
|
|
|
usage = jinfo.getUsage()
|
|
|
|
usage = usage.replace("_FUNC_", name) if usage is not None else usage
|
|
|
|
infos.append(ExpressionInfo(
|
|
|
|
className=jinfo.getClassName(),
|
|
|
|
name=name,
|
2017-08-05 13:10:56 -04:00
|
|
|
usage=usage,
|
|
|
|
arguments=jinfo.getArguments().replace("_FUNC_", name),
|
|
|
|
examples=jinfo.getExamples().replace("_FUNC_", name),
|
2020-04-23 00:33:04 -04:00
|
|
|
note=jinfo.getNote().replace("_FUNC_", name),
|
2019-04-09 01:49:42 -04:00
|
|
|
since=jinfo.getSince(),
|
|
|
|
deprecated=jinfo.getDeprecated()))
|
2017-07-26 12:38:51 -04:00
|
|
|
return sorted(infos, key=lambda i: i.name)
|
|
|
|
|
|
|
|
|
|
|
|
def _make_pretty_usage(usage):
|
|
|
|
"""
|
2017-08-05 13:10:56 -04:00
|
|
|
Makes the usage description pretty and returns a formatted string if `usage`
|
|
|
|
is not an empty string. Otherwise, returns None.
|
2017-07-26 12:38:51 -04:00
|
|
|
"""
|
|
|
|
|
|
|
|
if usage is not None and usage.strip() != "":
|
|
|
|
usage = "\n".join(map(lambda u: u.strip(), usage.split("\n")))
|
|
|
|
return "%s\n\n" % usage
|
|
|
|
|
|
|
|
|
2017-08-05 13:10:56 -04:00
|
|
|
def _make_pretty_arguments(arguments):
|
|
|
|
"""
|
|
|
|
Makes the arguments description pretty and returns a formatted string if `arguments`
|
|
|
|
starts with the argument prefix. Otherwise, returns None.
|
|
|
|
|
|
|
|
Expected input:
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
* arg0 - ...
|
|
|
|
...
|
|
|
|
* arg0 - ...
|
|
|
|
...
|
|
|
|
|
|
|
|
Expected output:
|
|
|
|
**Arguments:**
|
|
|
|
|
|
|
|
* arg0 - ...
|
|
|
|
...
|
|
|
|
* arg0 - ...
|
|
|
|
...
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if arguments.startswith("\n Arguments:"):
|
|
|
|
arguments = "\n".join(map(lambda u: u[6:], arguments.strip().split("\n")[1:]))
|
|
|
|
return "**Arguments:**\n\n%s\n\n" % arguments
|
|
|
|
|
|
|
|
|
|
|
|
def _make_pretty_examples(examples):
|
2017-07-26 12:38:51 -04:00
|
|
|
"""
|
2017-08-05 13:10:56 -04:00
|
|
|
Makes the examples description pretty and returns a formatted string if `examples`
|
|
|
|
starts with the example prefix. Otherwise, returns None.
|
|
|
|
|
|
|
|
Expected input:
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
> SELECT ...;
|
|
|
|
...
|
|
|
|
> SELECT ...;
|
|
|
|
...
|
|
|
|
|
|
|
|
Expected output:
|
|
|
|
**Examples:**
|
|
|
|
|
|
|
|
```
|
|
|
|
> SELECT ...;
|
|
|
|
...
|
|
|
|
> SELECT ...;
|
|
|
|
...
|
|
|
|
```
|
|
|
|
|
2017-07-26 12:38:51 -04:00
|
|
|
"""
|
|
|
|
|
2017-08-05 13:10:56 -04:00
|
|
|
if examples.startswith("\n Examples:"):
|
|
|
|
examples = "\n".join(map(lambda u: u[6:], examples.strip().split("\n")[1:]))
|
|
|
|
return "**Examples:**\n\n```\n%s\n```\n\n" % examples
|
|
|
|
|
|
|
|
|
|
|
|
def _make_pretty_note(note):
|
|
|
|
"""
|
|
|
|
Makes the note description pretty and returns a formatted string if `note` is not
|
|
|
|
an empty string. Otherwise, returns None.
|
|
|
|
|
|
|
|
Expected input:
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
Expected output:
|
|
|
|
**Note:**
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if note != "":
|
|
|
|
note = "\n".join(map(lambda n: n[4:], note.split("\n")))
|
|
|
|
return "**Note:**\n%s\n" % note
|
2017-07-26 12:38:51 -04:00
|
|
|
|
|
|
|
|
2019-04-09 01:49:42 -04:00
|
|
|
def _make_pretty_deprecated(deprecated):
|
|
|
|
"""
|
|
|
|
Makes the deprecated description pretty and returns a formatted string if `deprecated`
|
|
|
|
is not an empty string. Otherwise, returns None.
|
|
|
|
|
|
|
|
Expected input:
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
Expected output:
|
|
|
|
**Deprecated:**
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if deprecated != "":
|
|
|
|
deprecated = "\n".join(map(lambda n: n[4:], deprecated.split("\n")))
|
|
|
|
return "**Deprecated:**\n%s\n" % deprecated
|
|
|
|
|
|
|
|
|
2020-04-20 21:55:13 -04:00
|
|
|
def generate_sql_api_markdown(jvm, path):
|
2017-07-26 12:38:51 -04:00
|
|
|
"""
|
|
|
|
Generates a markdown file after listing the function information. The output file
|
|
|
|
is created in `path`.
|
2017-08-05 13:10:56 -04:00
|
|
|
|
|
|
|
Expected output:
|
|
|
|
### NAME
|
|
|
|
|
|
|
|
USAGE
|
|
|
|
|
|
|
|
**Arguments:**
|
|
|
|
|
|
|
|
ARGUMENTS
|
|
|
|
|
|
|
|
**Examples:**
|
|
|
|
|
|
|
|
```
|
|
|
|
EXAMPLES
|
|
|
|
```
|
|
|
|
|
|
|
|
**Note:**
|
|
|
|
|
|
|
|
NOTE
|
|
|
|
|
|
|
|
**Since:** SINCE
|
|
|
|
|
2019-04-09 01:49:42 -04:00
|
|
|
**Deprecated:**
|
|
|
|
|
|
|
|
DEPRECATED
|
|
|
|
|
2017-08-05 13:10:56 -04:00
|
|
|
<br/>
|
|
|
|
|
2017-07-26 12:38:51 -04:00
|
|
|
"""
|
|
|
|
|
|
|
|
with open(path, 'w') as mdfile:
|
2021-01-06 12:28:22 -05:00
|
|
|
mdfile.write("# Built-in Functions\n\n")
|
2017-07-26 12:38:51 -04:00
|
|
|
for info in _list_function_infos(jvm):
|
2017-08-05 13:10:56 -04:00
|
|
|
name = info.name
|
2017-07-26 12:38:51 -04:00
|
|
|
usage = _make_pretty_usage(info.usage)
|
2017-08-05 13:10:56 -04:00
|
|
|
arguments = _make_pretty_arguments(info.arguments)
|
|
|
|
examples = _make_pretty_examples(info.examples)
|
|
|
|
note = _make_pretty_note(info.note)
|
|
|
|
since = info.since
|
2019-04-09 01:49:42 -04:00
|
|
|
deprecated = _make_pretty_deprecated(info.deprecated)
|
2017-08-05 13:10:56 -04:00
|
|
|
|
|
|
|
mdfile.write("### %s\n\n" % name)
|
2017-07-26 12:38:51 -04:00
|
|
|
if usage is not None:
|
2017-08-05 13:10:56 -04:00
|
|
|
mdfile.write("%s\n\n" % usage.strip())
|
|
|
|
if arguments is not None:
|
|
|
|
mdfile.write(arguments)
|
|
|
|
if examples is not None:
|
|
|
|
mdfile.write(examples)
|
|
|
|
if note is not None:
|
|
|
|
mdfile.write(note)
|
|
|
|
if since is not None and since != "":
|
|
|
|
mdfile.write("**Since:** %s\n\n" % since.strip())
|
2019-04-09 01:49:42 -04:00
|
|
|
if deprecated is not None:
|
|
|
|
mdfile.write(deprecated)
|
2017-08-05 13:10:56 -04:00
|
|
|
mdfile.write("<br/>\n\n")
|
2017-07-26 12:38:51 -04:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
jvm = launch_gateway().jvm
|
2020-02-09 05:20:47 -05:00
|
|
|
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
|
|
|
|
markdown_file_path = os.path.join(spark_root_dir, "sql/docs/index.md")
|
2020-04-20 21:55:13 -04:00
|
|
|
generate_sql_api_markdown(jvm, markdown_file_path)
|