2c2062ea7c
### What changes were proposed in this pull request? Currently, only the non-static public SQL configurations are dump to public doc, we'd better also add those static public ones as the command `set -v` This PR force call StaticSQLConf to buildStaticConf. ### Why are the changes needed? Fix missing SQL configurations in doc ### Does this PR introduce any user-facing change? NO ### How was this patch tested? add unit test and verify locally to see if public static SQL conf is in `docs/sql-config.html` Closes #28274 from yaooqinn/SPARK-31498. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
136 lines
4.4 KiB
Python
136 lines
4.4 KiB
Python
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from collections import namedtuple
|
|
from textwrap import dedent
|
|
|
|
# To avoid adding a new direct dependency, we import markdown from within mkdocs.
|
|
from mkdocs.structure.pages import markdown
|
|
|
|
from pyspark.java_gateway import launch_gateway
|
|
|
|
|
|
SQLConfEntry = namedtuple(
|
|
"SQLConfEntry", ["name", "default", "description", "version"])
|
|
|
|
|
|
def get_public_sql_configs(jvm, group):
|
|
if group == "static":
|
|
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs()
|
|
else:
|
|
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
|
|
sql_configs = [
|
|
SQLConfEntry(
|
|
name=_sql_config._1(),
|
|
default=_sql_config._2(),
|
|
description=_sql_config._3(),
|
|
version=_sql_config._4()
|
|
)
|
|
for _sql_config in config_set
|
|
]
|
|
return sql_configs
|
|
|
|
|
|
def generate_sql_configs_table_html(sql_configs, path):
|
|
"""
|
|
Generates an HTML table at `path` that lists all public SQL
|
|
configuration options.
|
|
|
|
The table will look something like this:
|
|
|
|
```html
|
|
<table class="table">
|
|
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
|
|
|
|
<tr>
|
|
<td><code>spark.sql.adaptive.enabled</code></td>
|
|
<td>false</td>
|
|
<td><p>When true, enable adaptive query execution.</p></td>
|
|
<td>2.1.0</td>
|
|
</tr>
|
|
|
|
...
|
|
|
|
</table>
|
|
```
|
|
"""
|
|
value_reference_pattern = re.compile(r"^<value of (\S*)>$")
|
|
|
|
with open(path, 'w') as f:
|
|
f.write(dedent(
|
|
"""
|
|
<table class="table">
|
|
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
|
|
"""
|
|
))
|
|
for config in sorted(sql_configs, key=lambda x: x.name):
|
|
if config.default == "<undefined>":
|
|
default = "(none)"
|
|
elif config.default.startswith("<value of "):
|
|
referenced_config_name = value_reference_pattern.match(config.default).group(1)
|
|
default = "(value of <code>{}</code>)".format(referenced_config_name)
|
|
else:
|
|
default = config.default
|
|
|
|
if default.startswith("<"):
|
|
raise Exception(
|
|
"Unhandled reference in SQL config docs. Config '{name}' "
|
|
"has default '{default}' that looks like an HTML tag."
|
|
.format(
|
|
name=config.name,
|
|
default=config.default,
|
|
)
|
|
)
|
|
|
|
f.write(dedent(
|
|
"""
|
|
<tr>
|
|
<td><code>{name}</code></td>
|
|
<td>{default}</td>
|
|
<td>{description}</td>
|
|
<td>{version}</td>
|
|
</tr>
|
|
"""
|
|
.format(
|
|
name=config.name,
|
|
default=default,
|
|
description=markdown.markdown(config.description),
|
|
version=config.version
|
|
)
|
|
))
|
|
f.write("</table>\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py <static|runtime>")
|
|
sys.exit(-1)
|
|
else:
|
|
group = sys.argv[1]
|
|
|
|
jvm = launch_gateway().jvm
|
|
sql_configs = get_public_sql_configs(jvm, group)
|
|
|
|
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
|
|
sql_configs_table_path = os.path\
|
|
.join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html")
|
|
|
|
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)
|