# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import os from collections import namedtuple from pyspark.java_gateway import launch_gateway ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") _virtual_operator_infos = [ ExpressionInfo( className="", name="!=", usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " + "or false otherwise.", arguments="\n Arguments:\n " + """* expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be used in equality comparison. Map type is not supported. For complex types such array/struct, the data types of fields must be orderable.""", examples="\n Examples:\n " + "> SELECT 1 != 2;\n " + " true\n " + "> SELECT 1 != '2';\n " + " true\n " + "> SELECT true != NULL;\n " + " NULL\n " + "> SELECT NULL != NULL;\n " + " NULL", note="", since="1.0.0", deprecated=""), ExpressionInfo( className="", name="<>", usage="expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`, " + "or false otherwise.", arguments="\n Arguments:\n " + """* expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be used in equality comparison. Map type is not supported. For complex types such array/struct, the data types of fields must be orderable.""", examples="\n Examples:\n " + "> SELECT 1 != 2;\n " + " true\n " + "> SELECT 1 != '2';\n " + " true\n " + "> SELECT true != NULL;\n " + " NULL\n " + "> SELECT NULL != NULL;\n " + " NULL", note="", since="1.0.0", deprecated=""), ExpressionInfo( className="", name="between", usage="expr1 [NOT] BETWEEN expr2 AND expr3 - " + "evaluate if `expr1` is [not] in between `expr2` and `expr3`.", arguments="", examples="\n Examples:\n " + "> SELECT col1 FROM VALUES 1, 3, 5, 7 WHERE col1 BETWEEN 2 AND 5;\n " + " 3\n " + " 5", note="", since="1.0.0", deprecated=""), ExpressionInfo( className="", name="case", usage="CASE expr1 WHEN expr2 THEN expr3 " + "[WHEN expr4 THEN expr5]* [ELSE expr6] END - " + "When `expr1` = `expr2`, returns `expr3`; " + "when `expr1` = `expr4`, return `expr5`; else return `expr6`.", arguments="\n Arguments:\n " + "* expr1 - the expression which is one operand of comparison.\n " + "* expr2, expr4 - the expressions each of which is the other " + " operand of comparison.\n " + "* expr3, expr5, expr6 - the branch value expressions and else value expression" + " should all be same type or coercible to a common type.", examples="\n Examples:\n " + "> SELECT CASE col1 WHEN 1 THEN 'one' " + "WHEN 2 THEN 'two' ELSE '?' END FROM VALUES 1, 2, 3;\n " + " one\n " + " two\n " + " ?\n " + "> SELECT CASE col1 WHEN 1 THEN 'one' " + "WHEN 2 THEN 'two' END FROM VALUES 1, 2, 3;\n " + " one\n " + " two\n " + " NULL", note="", since="1.0.1", deprecated=""), ExpressionInfo( className="", name="||", usage="expr1 || expr2 - Returns the concatenation of `expr1` and `expr2`.", arguments="", examples="\n Examples:\n " + "> SELECT 'Spark' || 'SQL';\n " + " SparkSQL\n " + "> SELECT array(1, 2, 3) || array(4, 5) || array(6);\n " + " [1,2,3,4,5,6]", note="\n || for arrays is available since 2.4.0.\n", since="2.3.0", deprecated="") ] def _list_function_infos(jvm): """ Returns a list of function information via JVM. Sorts wrapped expression infos by name and returns them. """ jinfos = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos() infos = _virtual_operator_infos for jinfo in jinfos: name = jinfo.getName() usage = jinfo.getUsage() usage = usage.replace("_FUNC_", name) if usage is not None else usage infos.append(ExpressionInfo( className=jinfo.getClassName(), name=name, usage=usage, arguments=jinfo.getArguments().replace("_FUNC_", name), examples=jinfo.getExamples().replace("_FUNC_", name), note=jinfo.getNote().replace("_FUNC_", name), since=jinfo.getSince(), deprecated=jinfo.getDeprecated())) return sorted(infos, key=lambda i: i.name) def _make_pretty_usage(usage): """ Makes the usage description pretty and returns a formatted string if `usage` is not an empty string. Otherwise, returns None. """ if usage is not None and usage.strip() != "": usage = "\n".join(map(lambda u: u.strip(), usage.split("\n"))) return "%s\n\n" % usage def _make_pretty_arguments(arguments): """ Makes the arguments description pretty and returns a formatted string if `arguments` starts with the argument prefix. Otherwise, returns None. Expected input: Arguments: * arg0 - ... ... * arg0 - ... ... Expected output: **Arguments:** * arg0 - ... ... * arg0 - ... ... """ if arguments.startswith("\n Arguments:"): arguments = "\n".join(map(lambda u: u[6:], arguments.strip().split("\n")[1:])) return "**Arguments:**\n\n%s\n\n" % arguments def _make_pretty_examples(examples): """ Makes the examples description pretty and returns a formatted string if `examples` starts with the example prefix. Otherwise, returns None. Expected input: Examples: > SELECT ...; ... > SELECT ...; ... Expected output: **Examples:** ``` > SELECT ...; ... > SELECT ...; ... ``` """ if examples.startswith("\n Examples:"): examples = "\n".join(map(lambda u: u[6:], examples.strip().split("\n")[1:])) return "**Examples:**\n\n```\n%s\n```\n\n" % examples def _make_pretty_note(note): """ Makes the note description pretty and returns a formatted string if `note` is not an empty string. Otherwise, returns None. Expected input: ... Expected output: **Note:** ... """ if note != "": note = "\n".join(map(lambda n: n[4:], note.split("\n"))) return "**Note:**\n%s\n" % note def _make_pretty_deprecated(deprecated): """ Makes the deprecated description pretty and returns a formatted string if `deprecated` is not an empty string. Otherwise, returns None. Expected input: ... Expected output: **Deprecated:** ... """ if deprecated != "": deprecated = "\n".join(map(lambda n: n[4:], deprecated.split("\n"))) return "**Deprecated:**\n%s\n" % deprecated def generate_sql_api_markdown(jvm, path): """ Generates a markdown file after listing the function information. The output file is created in `path`. Expected output: ### NAME USAGE **Arguments:** ARGUMENTS **Examples:** ``` EXAMPLES ``` **Note:** NOTE **Since:** SINCE **Deprecated:** DEPRECATED
""" with open(path, 'w') as mdfile: mdfile.write("# Built-in Functions\n\n") for info in _list_function_infos(jvm): name = info.name usage = _make_pretty_usage(info.usage) arguments = _make_pretty_arguments(info.arguments) examples = _make_pretty_examples(info.examples) note = _make_pretty_note(info.note) since = info.since deprecated = _make_pretty_deprecated(info.deprecated) mdfile.write("### %s\n\n" % name) if usage is not None: mdfile.write("%s\n\n" % usage.strip()) if arguments is not None: mdfile.write(arguments) if examples is not None: mdfile.write(examples) if note is not None: mdfile.write(note) if since is not None and since != "": mdfile.write("**Since:** %s\n\n" % since.strip()) if deprecated is not None: mdfile.write(deprecated) mdfile.write("
\n\n") if __name__ == "__main__": jvm = launch_gateway().jvm spark_root_dir = os.path.dirname(os.path.dirname(__file__)) markdown_file_path = os.path.join(spark_root_dir, "sql/docs/index.md") generate_sql_api_markdown(jvm, markdown_file_path)