[SPARK-31596][SQL][DOCS] Generate SQL Configurations from hive module to configuration doc

### What changes were proposed in this pull request? This PR adds `-Phive` profile to the pre-build phase to build the hive module to dev classpath. Then reflect the HiveUtils object to dump all configurations in the class. ### Why are the changes needed? supply SQL configurations from hive module to doc ### Does this PR introduce any user-facing change? NO ### How was this patch tested? passing Jenkins add verified locally ![image](https://user-images.githubusercontent.com/8326978/80492333-6fae1200-8996-11ea-99fd-595ee18c67e5.png) Closes #28394 from yaooqinn/SPARK-31596. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
2020-04-29 15:34:45 +09:00 · 2020-04-29 15:34:45 +09:00 · 295d866969
parent 62be65efe4
commit 295d866969
2 changed files with 15 additions and 3 deletions
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@ -157,8 +157,8 @@ if not (ENV['SKIP_API'] == '1')
    curr_dir = pwd
    cd("..")

-    puts "Running 'build/sbt clean package' from " + pwd + "; this may take a few minutes..."
-    system("build/sbt clean package") || raise("SQL doc generation failed")
+    puts "Running 'build/sbt clean package -Phive' from " + pwd + "; this may take a few minutes..."
+    system("build/sbt clean package -Phive") || raise("SQL doc generation failed")

    puts "Moving back into docs dir."
    cd("docs")
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@ -20,10 +20,14 @@ package org.apache.spark.sql.api.python
 import java.io.InputStream
 import java.nio.channels.Channels

+import scala.util.control.NonFatal
+
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.PythonRDDServer
+import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@ -32,7 +36,7 @@ import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.DataType

-private[sql] object PythonSQLUtils {
+private[sql] object PythonSQLUtils extends Logging {
  def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)

  // This is needed when generating SQL documentation for built-in functions.
@ -44,6 +48,14 @@ private[sql] object PythonSQLUtils {
    val conf = new SQLConf()
    // Force to build static SQL configurations
    StaticSQLConf
+    // Force to build SQL configurations from Hive module
+    try {
+      val symbol = ScalaReflection.mirror.staticModule("org.apache.spark.sql.hive.HiveUtils")
+      ScalaReflection.mirror.reflectModule(symbol).instance
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Cannot generated sql configurations from hive module", e)
+    }
    conf.getAllDefinedConfs
  }