339c0f9a62
### What changes were proposed in this pull request? This PR adds a doc builder for Spark SQL's configuration options. Here's what the new Spark SQL config docs look like ([configuration.html.zip](https://github.com/apache/spark/files/4172109/configuration.html.zip)): ![Screen Shot 2020-02-07 at 12 13 23 PM](https://user-images.githubusercontent.com/1039369/74050007-425b5480-49a3-11ea-818c-42700c54d1fb.png) Compare this to the [current docs](http://spark.apache.org/docs/3.0.0-preview2/configuration.html#spark-sql): ![Screen Shot 2020-02-04 at 4 55 10 PM](https://user-images.githubusercontent.com/1039369/73790828-24a5a980-476f-11ea-998c-12cd613883e8.png) ### Why are the changes needed? There is no visibility into the various Spark SQL configs on [the config docs page](http://spark.apache.org/docs/3.0.0-preview2/configuration.html#spark-sql). ### Does this PR introduce any user-facing change? No, apart from new documentation. ### How was this patch tested? I tested this manually by building the docs and reviewing them in my browser. Closes #27459 from nchammas/SPARK-30510-spark-sql-options. Authored-by: Nicholas Chammas <nicholas.chammas@liveramp.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
118 lines
3.8 KiB
Python
118 lines
3.8 KiB
Python
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import os
|
|
import re
|
|
from collections import namedtuple
|
|
from textwrap import dedent
|
|
|
|
# To avoid adding a new direct dependency, we import markdown from within mkdocs.
|
|
from mkdocs.structure.pages import markdown
|
|
from pyspark.java_gateway import launch_gateway
|
|
|
|
SQLConfEntry = namedtuple(
|
|
"SQLConfEntry", ["name", "default", "description"])
|
|
|
|
|
|
def get_public_sql_configs(jvm):
|
|
sql_configs = [
|
|
SQLConfEntry(
|
|
name=_sql_config._1(),
|
|
default=_sql_config._2(),
|
|
description=_sql_config._3(),
|
|
)
|
|
for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
|
|
]
|
|
return sql_configs
|
|
|
|
|
|
def generate_sql_configs_table(sql_configs, path):
|
|
"""
|
|
Generates an HTML table at `path` that lists all public SQL
|
|
configuration options.
|
|
|
|
The table will look something like this:
|
|
|
|
```html
|
|
<table class="table">
|
|
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
|
|
|
|
<tr>
|
|
<td><code>spark.sql.adaptive.enabled</code></td>
|
|
<td>false</td>
|
|
<td><p>When true, enable adaptive query execution.</p></td>
|
|
</tr>
|
|
|
|
...
|
|
|
|
</table>
|
|
```
|
|
"""
|
|
value_reference_pattern = re.compile(r"^<value of (\S*)>$")
|
|
|
|
with open(path, 'w') as f:
|
|
f.write(dedent(
|
|
"""
|
|
<table class="table">
|
|
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
|
|
"""
|
|
))
|
|
for config in sorted(sql_configs, key=lambda x: x.name):
|
|
if config.default == "<undefined>":
|
|
default = "(none)"
|
|
elif config.default.startswith("<value of "):
|
|
referenced_config_name = value_reference_pattern.match(config.default).group(1)
|
|
default = "(value of <code>{}</code>)".format(referenced_config_name)
|
|
else:
|
|
default = config.default
|
|
|
|
if default.startswith("<"):
|
|
raise Exception(
|
|
"Unhandled reference in SQL config docs. Config '{name}' "
|
|
"has default '{default}' that looks like an HTML tag."
|
|
.format(
|
|
name=config.name,
|
|
default=config.default,
|
|
)
|
|
)
|
|
|
|
f.write(dedent(
|
|
"""
|
|
<tr>
|
|
<td><code>{name}</code></td>
|
|
<td>{default}</td>
|
|
<td>{description}</td>
|
|
</tr>
|
|
"""
|
|
.format(
|
|
name=config.name,
|
|
default=default,
|
|
description=markdown.markdown(config.description),
|
|
)
|
|
))
|
|
f.write("</table>\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
jvm = launch_gateway().jvm
|
|
sql_configs = get_public_sql_configs(jvm)
|
|
|
|
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
|
|
sql_configs_table_path = os.path.join(spark_root_dir, "docs/sql-configs.html")
|
|
|
|
generate_sql_configs_table(sql_configs, path=sql_configs_table_path)
|