[SPARK-31550][SQL][DOCS] Set nondeterministic configurations with general meanings in sql configuration doc

### What changes were proposed in this pull request?

```scala
spark.sql.session.timeZone

spark.sql.warehouse.dir
```
these 2 configs are nondeterministic and vary with environments

Besides, reflect code in `gen-sql-config-docs.py` via  https://github.com/apache/spark/pull/28274#discussion_r412893096 and `configuration.md` via https://github.com/apache/spark/pull/28274#discussion_r412894905
### Why are the changes needed?

doc fix

### Does this PR introduce any user-facing change?

no
### How was this patch tested?

verify locally
![image](https://user-images.githubusercontent.com/8326978/80179099-5e7da200-8632-11ea-803f-d47a93151869.png)

Closes #28322 from yaooqinn/SPARK-31550.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Kent Yao 2020-04-27 17:08:52 +09:00 committed by HyukjinKwon
parent b6509aa502
commit 5ba467ca1d
5 changed files with 36 additions and 35 deletions

View file

@ -2624,6 +2624,9 @@ Spark subsystems.
### Spark SQL
{% for static_file in site.static_files %}
{% if static_file.name == 'generated-runtime-sql-config-table.html' %}
#### Runtime SQL Configuration
Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file
@ -2631,13 +2634,13 @@ and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` th
Also, they can be set and queried by SET commands and rest to their initial values by RESET command,
or by `SparkSession.conf`'s setter and getter methods in runtime.
{% for static_file in site.static_files %}
{% if static_file.name == 'generated-runtime-sql-config-table.html' %}
{% include_relative generated-runtime-sql-config-table.html %}
{% include_relative generated-runtime-sql-config-table.html %}
{% break %}
{% endif %}
{% endfor %}
{% for static_file in site.static_files %}
{% if static_file.name == 'generated-static-sql-config-table.html' %}
#### Static SQL Configuration
@ -2645,9 +2648,7 @@ Static SQL configurations are cross-session, immutable Spark SQL configurations.
and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`.
External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them.
{% for static_file in site.static_files %}
{% if static_file.name == 'generated-static-sql-config-table.html' %}
{% include_relative generated-static-sql-config-table.html %}
{% include_relative generated-static-sql-config-table.html %}
{% break %}
{% endif %}
{% endfor %}

View file

@ -40,17 +40,20 @@ private[sql] object PythonSQLUtils {
FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
}
def listSQLConfigs(): Array[(String, String, String, String)] = {
val conf = new SQLConf()
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
conf.getAllDefinedConfs.filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray
}
def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
private def listAllSQLConfigs(): Seq[(String, String, String, String)] = {
val conf = new SQLConf()
// Force to build static SQL configurations
StaticSQLConf
conf.getAllDefinedConfs.filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray
conf.getAllDefinedConfs
}
def listRuntimeSQLConfigs(): Array[(String, String, String, String)] = {
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
listAllSQLConfigs().filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray
}
def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
listAllSQLConfigs().filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray
}
/**

View file

@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
class PythonSQLUtilsSuite extends SparkFunSuite {
test("listing sql configurations contains runtime ones only") {
val configs = PythonSQLUtils.listSQLConfigs()
val configs = PythonSQLUtils.listRuntimeSQLConfigs()
// static sql configurations
assert(!configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key),

View file

@ -45,11 +45,8 @@ mkdir docs
echo "Generating SQL API Markdown files."
"$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py
echo "Generating runtime SQL runtime configuration table HTML file."
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime
echo "Generating static SQL configuration table HTML file."
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static
echo "Generating SQL configuration table HTML file."
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py
echo "Generating HTML files for SQL function table and examples."
"$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py

View file

@ -17,7 +17,7 @@
import os
import re
import sys
from collections import namedtuple
from textwrap import dedent
@ -31,11 +31,11 @@ SQLConfEntry = namedtuple(
"SQLConfEntry", ["name", "default", "description", "version"])
def get_public_sql_configs(jvm, group):
def get_sql_configs(jvm, group):
if group == "static":
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs()
else:
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listRuntimeSQLConfigs()
sql_configs = [
SQLConfEntry(
name=_sql_config._1(),
@ -81,7 +81,11 @@ def generate_sql_configs_table_html(sql_configs, path):
"""
))
for config in sorted(sql_configs, key=lambda x: x.name):
if config.default == "<undefined>":
if config.name == "spark.sql.session.timeZone":
default = "(value of local timezone)"
elif config.name == "spark.sql.warehouse.dir":
default = "(value of <code>$PWD/spark-warehouse</code>)"
elif config.default == "<undefined>":
default = "(none)"
elif config.default.startswith("<value of "):
referenced_config_name = value_reference_pattern.match(config.default).group(1)
@ -119,17 +123,13 @@ def generate_sql_configs_table_html(sql_configs, path):
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py <static|runtime>")
sys.exit(-1)
else:
group = sys.argv[1]
jvm = launch_gateway().jvm
sql_configs = get_public_sql_configs(jvm, group)
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
sql_configs_table_path = os.path\
.join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html")
docs_root_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs")
sql_configs = get_sql_configs(jvm, "runtime")
sql_configs_table_path = os.path.join(docs_root_dir, "generated-runtime-sql-config-table.html")
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)
sql_configs = get_sql_configs(jvm, "static")
sql_configs_table_path = os.path.join(docs_root_dir, "generated-static-sql-config-table.html")
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)