[SPARK-31550][SQL][DOCS] Set nondeterministic configurations with general meanings in sql configuration doc
### What changes were proposed in this pull request? ```scala spark.sql.session.timeZone spark.sql.warehouse.dir ``` these 2 configs are nondeterministic and vary with environments Besides, reflect code in `gen-sql-config-docs.py` via https://github.com/apache/spark/pull/28274#discussion_r412893096 and `configuration.md` via https://github.com/apache/spark/pull/28274#discussion_r412894905 ### Why are the changes needed? doc fix ### Does this PR introduce any user-facing change? no ### How was this patch tested? verify locally ![image](https://user-images.githubusercontent.com/8326978/80179099-5e7da200-8632-11ea-803f-d47a93151869.png) Closes #28322 from yaooqinn/SPARK-31550. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
b6509aa502
commit
5ba467ca1d
|
@ -2624,6 +2624,9 @@ Spark subsystems.
|
|||
|
||||
### Spark SQL
|
||||
|
||||
{% for static_file in site.static_files %}
|
||||
{% if static_file.name == 'generated-runtime-sql-config-table.html' %}
|
||||
|
||||
#### Runtime SQL Configuration
|
||||
|
||||
Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file
|
||||
|
@ -2631,13 +2634,13 @@ and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` th
|
|||
Also, they can be set and queried by SET commands and rest to their initial values by RESET command,
|
||||
or by `SparkSession.conf`'s setter and getter methods in runtime.
|
||||
|
||||
{% for static_file in site.static_files %}
|
||||
{% if static_file.name == 'generated-runtime-sql-config-table.html' %}
|
||||
{% include_relative generated-runtime-sql-config-table.html %}
|
||||
{% include_relative generated-runtime-sql-config-table.html %}
|
||||
{% break %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{% for static_file in site.static_files %}
|
||||
{% if static_file.name == 'generated-static-sql-config-table.html' %}
|
||||
|
||||
#### Static SQL Configuration
|
||||
|
||||
|
@ -2645,9 +2648,7 @@ Static SQL configurations are cross-session, immutable Spark SQL configurations.
|
|||
and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`.
|
||||
External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them.
|
||||
|
||||
{% for static_file in site.static_files %}
|
||||
{% if static_file.name == 'generated-static-sql-config-table.html' %}
|
||||
{% include_relative generated-static-sql-config-table.html %}
|
||||
{% include_relative generated-static-sql-config-table.html %}
|
||||
{% break %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
|
|
@ -40,17 +40,20 @@ private[sql] object PythonSQLUtils {
|
|||
FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
|
||||
}
|
||||
|
||||
def listSQLConfigs(): Array[(String, String, String, String)] = {
|
||||
val conf = new SQLConf()
|
||||
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
|
||||
conf.getAllDefinedConfs.filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray
|
||||
}
|
||||
|
||||
def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
|
||||
private def listAllSQLConfigs(): Seq[(String, String, String, String)] = {
|
||||
val conf = new SQLConf()
|
||||
// Force to build static SQL configurations
|
||||
StaticSQLConf
|
||||
conf.getAllDefinedConfs.filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray
|
||||
conf.getAllDefinedConfs
|
||||
}
|
||||
|
||||
def listRuntimeSQLConfigs(): Array[(String, String, String, String)] = {
|
||||
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
|
||||
listAllSQLConfigs().filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray
|
||||
}
|
||||
|
||||
def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
|
||||
listAllSQLConfigs().filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
|
|||
class PythonSQLUtilsSuite extends SparkFunSuite {
|
||||
|
||||
test("listing sql configurations contains runtime ones only") {
|
||||
val configs = PythonSQLUtils.listSQLConfigs()
|
||||
val configs = PythonSQLUtils.listRuntimeSQLConfigs()
|
||||
|
||||
// static sql configurations
|
||||
assert(!configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key),
|
||||
|
|
|
@ -45,11 +45,8 @@ mkdir docs
|
|||
echo "Generating SQL API Markdown files."
|
||||
"$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py
|
||||
|
||||
echo "Generating runtime SQL runtime configuration table HTML file."
|
||||
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime
|
||||
|
||||
echo "Generating static SQL configuration table HTML file."
|
||||
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static
|
||||
echo "Generating SQL configuration table HTML file."
|
||||
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py
|
||||
|
||||
echo "Generating HTML files for SQL function table and examples."
|
||||
"$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from collections import namedtuple
|
||||
from textwrap import dedent
|
||||
|
||||
|
@ -31,11 +31,11 @@ SQLConfEntry = namedtuple(
|
|||
"SQLConfEntry", ["name", "default", "description", "version"])
|
||||
|
||||
|
||||
def get_public_sql_configs(jvm, group):
|
||||
def get_sql_configs(jvm, group):
|
||||
if group == "static":
|
||||
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs()
|
||||
else:
|
||||
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
|
||||
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listRuntimeSQLConfigs()
|
||||
sql_configs = [
|
||||
SQLConfEntry(
|
||||
name=_sql_config._1(),
|
||||
|
@ -81,7 +81,11 @@ def generate_sql_configs_table_html(sql_configs, path):
|
|||
"""
|
||||
))
|
||||
for config in sorted(sql_configs, key=lambda x: x.name):
|
||||
if config.default == "<undefined>":
|
||||
if config.name == "spark.sql.session.timeZone":
|
||||
default = "(value of local timezone)"
|
||||
elif config.name == "spark.sql.warehouse.dir":
|
||||
default = "(value of <code>$PWD/spark-warehouse</code>)"
|
||||
elif config.default == "<undefined>":
|
||||
default = "(none)"
|
||||
elif config.default.startswith("<value of "):
|
||||
referenced_config_name = value_reference_pattern.match(config.default).group(1)
|
||||
|
@ -119,17 +123,13 @@ def generate_sql_configs_table_html(sql_configs, path):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py <static|runtime>")
|
||||
sys.exit(-1)
|
||||
else:
|
||||
group = sys.argv[1]
|
||||
|
||||
jvm = launch_gateway().jvm
|
||||
sql_configs = get_public_sql_configs(jvm, group)
|
||||
|
||||
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
|
||||
sql_configs_table_path = os.path\
|
||||
.join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html")
|
||||
docs_root_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs")
|
||||
|
||||
sql_configs = get_sql_configs(jvm, "runtime")
|
||||
sql_configs_table_path = os.path.join(docs_root_dir, "generated-runtime-sql-config-table.html")
|
||||
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)
|
||||
|
||||
sql_configs = get_sql_configs(jvm, "static")
|
||||
sql_configs_table_path = os.path.join(docs_root_dir, "generated-static-sql-config-table.html")
|
||||
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)
|
||||
|
|
Loading…
Reference in a new issue