[SPARK-9174][SQL] Add documentation for all public SQLConfs.
Author: Reynold Xin <rxin@databricks.com> Closes #7500 from rxin/sqlconf and squashes the following commits: a5726c8 [Reynold Xin] [SPARK-9174][SQL] Add documentation for all public SQLConfs.
This commit is contained in:
parent
6e1e2eba69
commit
e16a19a39e
|
@ -26,6 +26,11 @@ import org.apache.parquet.hadoop.ParquetOutputCommitter
|
||||||
|
|
||||||
import org.apache.spark.sql.catalyst.CatalystConf
|
import org.apache.spark.sql.catalyst.CatalystConf
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// This file defines the configuration options for Spark SQL.
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
private[spark] object SQLConf {
|
private[spark] object SQLConf {
|
||||||
|
|
||||||
private val sqlConfEntries = java.util.Collections.synchronizedMap(
|
private val sqlConfEntries = java.util.Collections.synchronizedMap(
|
||||||
|
@ -184,17 +189,20 @@ private[spark] object SQLConf {
|
||||||
val COMPRESS_CACHED = booleanConf("spark.sql.inMemoryColumnarStorage.compressed",
|
val COMPRESS_CACHED = booleanConf("spark.sql.inMemoryColumnarStorage.compressed",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "When set to true Spark SQL will automatically select a compression codec for each " +
|
doc = "When set to true Spark SQL will automatically select a compression codec for each " +
|
||||||
"column based on statistics of the data.")
|
"column based on statistics of the data.",
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
val COLUMN_BATCH_SIZE = intConf("spark.sql.inMemoryColumnarStorage.batchSize",
|
val COLUMN_BATCH_SIZE = intConf("spark.sql.inMemoryColumnarStorage.batchSize",
|
||||||
defaultValue = Some(10000),
|
defaultValue = Some(10000),
|
||||||
doc = "Controls the size of batches for columnar caching. Larger batch sizes can improve " +
|
doc = "Controls the size of batches for columnar caching. Larger batch sizes can improve " +
|
||||||
"memory utilization and compression, but risk OOMs when caching data.")
|
"memory utilization and compression, but risk OOMs when caching data.",
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
val IN_MEMORY_PARTITION_PRUNING =
|
val IN_MEMORY_PARTITION_PRUNING =
|
||||||
booleanConf("spark.sql.inMemoryColumnarStorage.partitionPruning",
|
booleanConf("spark.sql.inMemoryColumnarStorage.partitionPruning",
|
||||||
defaultValue = Some(false),
|
defaultValue = Some(false),
|
||||||
doc = "<TODO>")
|
doc = "When true, enable partition pruning for in-memory columnar tables.",
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
val AUTO_BROADCASTJOIN_THRESHOLD = intConf("spark.sql.autoBroadcastJoinThreshold",
|
val AUTO_BROADCASTJOIN_THRESHOLD = intConf("spark.sql.autoBroadcastJoinThreshold",
|
||||||
defaultValue = Some(10 * 1024 * 1024),
|
defaultValue = Some(10 * 1024 * 1024),
|
||||||
|
@ -203,29 +211,35 @@ private[spark] object SQLConf {
|
||||||
"Note that currently statistics are only supported for Hive Metastore tables where the " +
|
"Note that currently statistics are only supported for Hive Metastore tables where the " +
|
||||||
"command<code>ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan</code> has been run.")
|
"command<code>ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan</code> has been run.")
|
||||||
|
|
||||||
val DEFAULT_SIZE_IN_BYTES = longConf("spark.sql.defaultSizeInBytes", isPublic = false)
|
val DEFAULT_SIZE_IN_BYTES = longConf(
|
||||||
|
"spark.sql.defaultSizeInBytes",
|
||||||
|
doc = "The default table size used in query planning. By default, it is set to a larger " +
|
||||||
|
"value than `spark.sql.autoBroadcastJoinThreshold` to be more conservative. That is to say " +
|
||||||
|
"by default the optimizer will not choose to broadcast a table unless it knows for sure its" +
|
||||||
|
"size is small enough.",
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
val SHUFFLE_PARTITIONS = intConf("spark.sql.shuffle.partitions",
|
val SHUFFLE_PARTITIONS = intConf("spark.sql.shuffle.partitions",
|
||||||
defaultValue = Some(200),
|
defaultValue = Some(200),
|
||||||
doc = "Configures the number of partitions to use when shuffling data for joins or " +
|
doc = "The default number of partitions to use when shuffling data for joins or aggregations.")
|
||||||
"aggregations.")
|
|
||||||
|
|
||||||
val CODEGEN_ENABLED = booleanConf("spark.sql.codegen",
|
val CODEGEN_ENABLED = booleanConf("spark.sql.codegen",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "When true, code will be dynamically generated at runtime for expression evaluation in" +
|
doc = "When true, code will be dynamically generated at runtime for expression evaluation in" +
|
||||||
" a specific query. For some queries with complicated expression this option can lead to " +
|
" a specific query.")
|
||||||
"significant speed-ups. However, for simple queries this can actually slow down query " +
|
|
||||||
"execution.")
|
|
||||||
|
|
||||||
val UNSAFE_ENABLED = booleanConf("spark.sql.unsafe.enabled",
|
val UNSAFE_ENABLED = booleanConf("spark.sql.unsafe.enabled",
|
||||||
defaultValue = Some(false),
|
defaultValue = Some(false),
|
||||||
doc = "<TDDO>")
|
doc = "When true, use the new optimized Tungsten physical execution backend.")
|
||||||
|
|
||||||
val DIALECT = stringConf("spark.sql.dialect", defaultValue = Some("sql"), doc = "<TODO>")
|
val DIALECT = stringConf(
|
||||||
|
"spark.sql.dialect",
|
||||||
|
defaultValue = Some("sql"),
|
||||||
|
doc = "The default SQL dialect to use.")
|
||||||
|
|
||||||
val CASE_SENSITIVE = booleanConf("spark.sql.caseSensitive",
|
val CASE_SENSITIVE = booleanConf("spark.sql.caseSensitive",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "<TODO>")
|
doc = "Whether the query analyzer should be case sensitive or not.")
|
||||||
|
|
||||||
val PARQUET_SCHEMA_MERGING_ENABLED = booleanConf("spark.sql.parquet.mergeSchema",
|
val PARQUET_SCHEMA_MERGING_ENABLED = booleanConf("spark.sql.parquet.mergeSchema",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
|
@ -273,9 +287,8 @@ private[spark] object SQLConf {
|
||||||
val PARQUET_FOLLOW_PARQUET_FORMAT_SPEC = booleanConf(
|
val PARQUET_FOLLOW_PARQUET_FORMAT_SPEC = booleanConf(
|
||||||
key = "spark.sql.parquet.followParquetFormatSpec",
|
key = "spark.sql.parquet.followParquetFormatSpec",
|
||||||
defaultValue = Some(false),
|
defaultValue = Some(false),
|
||||||
doc = "Whether to stick to Parquet format specification when converting Parquet schema to " +
|
doc = "Whether to follow Parquet's format specification when converting Parquet schema to " +
|
||||||
"Spark SQL schema and vice versa. Sticks to the specification if set to true; falls back " +
|
"Spark SQL schema and vice versa.",
|
||||||
"to compatible mode if set to false.",
|
|
||||||
isPublic = false)
|
isPublic = false)
|
||||||
|
|
||||||
val PARQUET_OUTPUT_COMMITTER_CLASS = stringConf(
|
val PARQUET_OUTPUT_COMMITTER_CLASS = stringConf(
|
||||||
|
@ -290,7 +303,7 @@ private[spark] object SQLConf {
|
||||||
|
|
||||||
val ORC_FILTER_PUSHDOWN_ENABLED = booleanConf("spark.sql.orc.filterPushdown",
|
val ORC_FILTER_PUSHDOWN_ENABLED = booleanConf("spark.sql.orc.filterPushdown",
|
||||||
defaultValue = Some(false),
|
defaultValue = Some(false),
|
||||||
doc = "<TODO>")
|
doc = "When true, enable filter pushdown for ORC files.")
|
||||||
|
|
||||||
val HIVE_VERIFY_PARTITION_PATH = booleanConf("spark.sql.hive.verifyPartitionPath",
|
val HIVE_VERIFY_PARTITION_PATH = booleanConf("spark.sql.hive.verifyPartitionPath",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
|
@ -302,7 +315,7 @@ private[spark] object SQLConf {
|
||||||
|
|
||||||
val BROADCAST_TIMEOUT = intConf("spark.sql.broadcastTimeout",
|
val BROADCAST_TIMEOUT = intConf("spark.sql.broadcastTimeout",
|
||||||
defaultValue = Some(5 * 60),
|
defaultValue = Some(5 * 60),
|
||||||
doc = "<TODO>")
|
doc = "Timeout in seconds for the broadcast wait time in broadcast joins.")
|
||||||
|
|
||||||
// Options that control which operators can be chosen by the query planner. These should be
|
// Options that control which operators can be chosen by the query planner. These should be
|
||||||
// considered hints and may be ignored by future versions of Spark SQL.
|
// considered hints and may be ignored by future versions of Spark SQL.
|
||||||
|
@ -313,7 +326,7 @@ private[spark] object SQLConf {
|
||||||
|
|
||||||
val SORTMERGE_JOIN = booleanConf("spark.sql.planner.sortMergeJoin",
|
val SORTMERGE_JOIN = booleanConf("spark.sql.planner.sortMergeJoin",
|
||||||
defaultValue = Some(false),
|
defaultValue = Some(false),
|
||||||
doc = "<TODO>")
|
doc = "When true, use sort merge join (as opposed to hash join) by default for large joins.")
|
||||||
|
|
||||||
// This is only used for the thriftserver
|
// This is only used for the thriftserver
|
||||||
val THRIFTSERVER_POOL = stringConf("spark.sql.thriftserver.scheduler.pool",
|
val THRIFTSERVER_POOL = stringConf("spark.sql.thriftserver.scheduler.pool",
|
||||||
|
@ -321,16 +334,16 @@ private[spark] object SQLConf {
|
||||||
|
|
||||||
val THRIFTSERVER_UI_STATEMENT_LIMIT = intConf("spark.sql.thriftserver.ui.retainedStatements",
|
val THRIFTSERVER_UI_STATEMENT_LIMIT = intConf("spark.sql.thriftserver.ui.retainedStatements",
|
||||||
defaultValue = Some(200),
|
defaultValue = Some(200),
|
||||||
doc = "<TODO>")
|
doc = "The number of SQL statements kept in the JDBC/ODBC web UI history.")
|
||||||
|
|
||||||
val THRIFTSERVER_UI_SESSION_LIMIT = intConf("spark.sql.thriftserver.ui.retainedSessions",
|
val THRIFTSERVER_UI_SESSION_LIMIT = intConf("spark.sql.thriftserver.ui.retainedSessions",
|
||||||
defaultValue = Some(200),
|
defaultValue = Some(200),
|
||||||
doc = "<TODO>")
|
doc = "The number of SQL client sessions kept in the JDBC/ODBC web UI history.")
|
||||||
|
|
||||||
// This is used to set the default data source
|
// This is used to set the default data source
|
||||||
val DEFAULT_DATA_SOURCE_NAME = stringConf("spark.sql.sources.default",
|
val DEFAULT_DATA_SOURCE_NAME = stringConf("spark.sql.sources.default",
|
||||||
defaultValue = Some("org.apache.spark.sql.parquet"),
|
defaultValue = Some("org.apache.spark.sql.parquet"),
|
||||||
doc = "<TODO>")
|
doc = "The default data source to use in input/output.")
|
||||||
|
|
||||||
// This is used to control the when we will split a schema's JSON string to multiple pieces
|
// This is used to control the when we will split a schema's JSON string to multiple pieces
|
||||||
// in order to fit the JSON string in metastore's table property (by default, the value has
|
// in order to fit the JSON string in metastore's table property (by default, the value has
|
||||||
|
@ -338,18 +351,20 @@ private[spark] object SQLConf {
|
||||||
// to its length exceeds the threshold.
|
// to its length exceeds the threshold.
|
||||||
val SCHEMA_STRING_LENGTH_THRESHOLD = intConf("spark.sql.sources.schemaStringLengthThreshold",
|
val SCHEMA_STRING_LENGTH_THRESHOLD = intConf("spark.sql.sources.schemaStringLengthThreshold",
|
||||||
defaultValue = Some(4000),
|
defaultValue = Some(4000),
|
||||||
doc = "<TODO>")
|
doc = "The maximum length allowed in a single cell when " +
|
||||||
|
"storing additional schema information in Hive's metastore.",
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
// Whether to perform partition discovery when loading external data sources. Default to true.
|
// Whether to perform partition discovery when loading external data sources. Default to true.
|
||||||
val PARTITION_DISCOVERY_ENABLED = booleanConf("spark.sql.sources.partitionDiscovery.enabled",
|
val PARTITION_DISCOVERY_ENABLED = booleanConf("spark.sql.sources.partitionDiscovery.enabled",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "<TODO>")
|
doc = "When true, automtically discover data partitions.")
|
||||||
|
|
||||||
// Whether to perform partition column type inference. Default to true.
|
// Whether to perform partition column type inference. Default to true.
|
||||||
val PARTITION_COLUMN_TYPE_INFERENCE =
|
val PARTITION_COLUMN_TYPE_INFERENCE =
|
||||||
booleanConf("spark.sql.sources.partitionColumnTypeInference.enabled",
|
booleanConf("spark.sql.sources.partitionColumnTypeInference.enabled",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "<TODO>")
|
doc = "When true, automatically infer the data types for partitioned columns.")
|
||||||
|
|
||||||
// The output committer class used by HadoopFsRelation. The specified class needs to be a
|
// The output committer class used by HadoopFsRelation. The specified class needs to be a
|
||||||
// subclass of org.apache.hadoop.mapreduce.OutputCommitter.
|
// subclass of org.apache.hadoop.mapreduce.OutputCommitter.
|
||||||
|
@ -363,22 +378,28 @@ private[spark] object SQLConf {
|
||||||
|
|
||||||
// Whether to perform eager analysis when constructing a dataframe.
|
// Whether to perform eager analysis when constructing a dataframe.
|
||||||
// Set to false when debugging requires the ability to look at invalid query plans.
|
// Set to false when debugging requires the ability to look at invalid query plans.
|
||||||
val DATAFRAME_EAGER_ANALYSIS = booleanConf("spark.sql.eagerAnalysis",
|
val DATAFRAME_EAGER_ANALYSIS = booleanConf(
|
||||||
|
"spark.sql.eagerAnalysis",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "<TODO>")
|
doc = "When true, eagerly applies query analysis on DataFrame operations.",
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
// Whether to automatically resolve ambiguity in join conditions for self-joins.
|
// Whether to automatically resolve ambiguity in join conditions for self-joins.
|
||||||
// See SPARK-6231.
|
// See SPARK-6231.
|
||||||
val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
|
val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY = booleanConf(
|
||||||
booleanConf("spark.sql.selfJoinAutoResolveAmbiguity", defaultValue = Some(true), doc = "<TODO>")
|
"spark.sql.selfJoinAutoResolveAmbiguity",
|
||||||
|
defaultValue = Some(true),
|
||||||
|
isPublic = false)
|
||||||
|
|
||||||
// Whether to retain group by columns or not in GroupedData.agg.
|
// Whether to retain group by columns or not in GroupedData.agg.
|
||||||
val DATAFRAME_RETAIN_GROUP_COLUMNS = booleanConf("spark.sql.retainGroupColumns",
|
val DATAFRAME_RETAIN_GROUP_COLUMNS = booleanConf(
|
||||||
|
"spark.sql.retainGroupColumns",
|
||||||
defaultValue = Some(true),
|
defaultValue = Some(true),
|
||||||
doc = "<TODO>")
|
isPublic = false)
|
||||||
|
|
||||||
val USE_SQL_SERIALIZER2 = booleanConf("spark.sql.useSerializer2",
|
val USE_SQL_SERIALIZER2 = booleanConf(
|
||||||
defaultValue = Some(true), doc = "<TODO>")
|
"spark.sql.useSerializer2",
|
||||||
|
defaultValue = Some(true), isPublic = false)
|
||||||
|
|
||||||
val USE_JACKSON_STREAMING_API = booleanConf("spark.sql.json.useJacksonStreamingAPI",
|
val USE_JACKSON_STREAMING_API = booleanConf("spark.sql.json.useJacksonStreamingAPI",
|
||||||
defaultValue = Some(true), doc = "<TODO>")
|
defaultValue = Some(true), doc = "<TODO>")
|
||||||
|
@ -422,112 +443,53 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
|
||||||
*/
|
*/
|
||||||
private[spark] def dialect: String = getConf(DIALECT)
|
private[spark] def dialect: String = getConf(DIALECT)
|
||||||
|
|
||||||
/** When true tables cached using the in-memory columnar caching will be compressed. */
|
|
||||||
private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED)
|
private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED)
|
||||||
|
|
||||||
/** The compression codec for writing to a Parquetfile */
|
|
||||||
private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION)
|
private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION)
|
||||||
|
|
||||||
private[spark] def parquetCacheMetadata: Boolean = getConf(PARQUET_CACHE_METADATA)
|
private[spark] def parquetCacheMetadata: Boolean = getConf(PARQUET_CACHE_METADATA)
|
||||||
|
|
||||||
/** The number of rows that will be */
|
|
||||||
private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE)
|
private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE)
|
||||||
|
|
||||||
/** Number of partitions to use for shuffle operators. */
|
|
||||||
private[spark] def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
|
private[spark] def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
|
||||||
|
|
||||||
/** When true predicates will be passed to the parquet record reader when possible. */
|
|
||||||
private[spark] def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
|
private[spark] def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
|
||||||
|
|
||||||
/** When true uses Parquet implementation based on data source API */
|
|
||||||
private[spark] def parquetUseDataSourceApi: Boolean = getConf(PARQUET_USE_DATA_SOURCE_API)
|
private[spark] def parquetUseDataSourceApi: Boolean = getConf(PARQUET_USE_DATA_SOURCE_API)
|
||||||
|
|
||||||
private[spark] def orcFilterPushDown: Boolean = getConf(ORC_FILTER_PUSHDOWN_ENABLED)
|
private[spark] def orcFilterPushDown: Boolean = getConf(ORC_FILTER_PUSHDOWN_ENABLED)
|
||||||
|
|
||||||
/** When true uses verifyPartitionPath to prune the path which is not exists. */
|
|
||||||
private[spark] def verifyPartitionPath: Boolean = getConf(HIVE_VERIFY_PARTITION_PATH)
|
private[spark] def verifyPartitionPath: Boolean = getConf(HIVE_VERIFY_PARTITION_PATH)
|
||||||
|
|
||||||
/** When true the planner will use the external sort, which may spill to disk. */
|
|
||||||
private[spark] def externalSortEnabled: Boolean = getConf(EXTERNAL_SORT)
|
private[spark] def externalSortEnabled: Boolean = getConf(EXTERNAL_SORT)
|
||||||
|
|
||||||
/**
|
|
||||||
* Sort merge join would sort the two side of join first, and then iterate both sides together
|
|
||||||
* only once to get all matches. Using sort merge join can save a lot of memory usage compared
|
|
||||||
* to HashJoin.
|
|
||||||
*/
|
|
||||||
private[spark] def sortMergeJoinEnabled: Boolean = getConf(SORTMERGE_JOIN)
|
private[spark] def sortMergeJoinEnabled: Boolean = getConf(SORTMERGE_JOIN)
|
||||||
|
|
||||||
/**
|
|
||||||
* When set to true, Spark SQL will use the Janino at runtime to generate custom bytecode
|
|
||||||
* that evaluates expressions found in queries. In general this custom code runs much faster
|
|
||||||
* than interpreted evaluation, but there are some start-up costs (5-10ms) due to compilation.
|
|
||||||
*/
|
|
||||||
private[spark] def codegenEnabled: Boolean = getConf(CODEGEN_ENABLED)
|
private[spark] def codegenEnabled: Boolean = getConf(CODEGEN_ENABLED)
|
||||||
|
|
||||||
/**
|
|
||||||
* caseSensitive analysis true by default
|
|
||||||
*/
|
|
||||||
def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
|
def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
|
||||||
|
|
||||||
/**
|
|
||||||
* When set to true, Spark SQL will use managed memory for certain operations. This option only
|
|
||||||
* takes effect if codegen is enabled.
|
|
||||||
*
|
|
||||||
* Defaults to false as this feature is currently experimental.
|
|
||||||
*/
|
|
||||||
private[spark] def unsafeEnabled: Boolean = getConf(UNSAFE_ENABLED)
|
private[spark] def unsafeEnabled: Boolean = getConf(UNSAFE_ENABLED)
|
||||||
|
|
||||||
private[spark] def useSqlSerializer2: Boolean = getConf(USE_SQL_SERIALIZER2)
|
private[spark] def useSqlSerializer2: Boolean = getConf(USE_SQL_SERIALIZER2)
|
||||||
|
|
||||||
/**
|
|
||||||
* Selects between the new (true) and old (false) JSON handlers, to be removed in Spark 1.5.0
|
|
||||||
*/
|
|
||||||
private[spark] def useJacksonStreamingAPI: Boolean = getConf(USE_JACKSON_STREAMING_API)
|
private[spark] def useJacksonStreamingAPI: Boolean = getConf(USE_JACKSON_STREAMING_API)
|
||||||
|
|
||||||
/**
|
|
||||||
* Upper bound on the sizes (in bytes) of the tables qualified for the auto conversion to
|
|
||||||
* a broadcast value during the physical executions of join operations. Setting this to -1
|
|
||||||
* effectively disables auto conversion.
|
|
||||||
*
|
|
||||||
* Hive setting: hive.auto.convert.join.noconditionaltask.size, whose default value is 10000.
|
|
||||||
*/
|
|
||||||
private[spark] def autoBroadcastJoinThreshold: Int = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
|
private[spark] def autoBroadcastJoinThreshold: Int = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
|
||||||
|
|
||||||
/**
|
|
||||||
* The default size in bytes to assign to a logical operator's estimation statistics. By default,
|
|
||||||
* it is set to a larger value than `autoBroadcastJoinThreshold`, hence any logical operator
|
|
||||||
* without a properly implemented estimation of this statistic will not be incorrectly broadcasted
|
|
||||||
* in joins.
|
|
||||||
*/
|
|
||||||
private[spark] def defaultSizeInBytes: Long =
|
private[spark] def defaultSizeInBytes: Long =
|
||||||
getConf(DEFAULT_SIZE_IN_BYTES, autoBroadcastJoinThreshold + 1L)
|
getConf(DEFAULT_SIZE_IN_BYTES, autoBroadcastJoinThreshold + 1L)
|
||||||
|
|
||||||
/**
|
|
||||||
* When set to true, we always treat byte arrays in Parquet files as strings.
|
|
||||||
*/
|
|
||||||
private[spark] def isParquetBinaryAsString: Boolean = getConf(PARQUET_BINARY_AS_STRING)
|
private[spark] def isParquetBinaryAsString: Boolean = getConf(PARQUET_BINARY_AS_STRING)
|
||||||
|
|
||||||
/**
|
|
||||||
* When set to true, we always treat INT96Values in Parquet files as timestamp.
|
|
||||||
*/
|
|
||||||
private[spark] def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)
|
private[spark] def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)
|
||||||
|
|
||||||
/**
|
|
||||||
* When set to true, sticks to Parquet format spec when converting Parquet schema to Spark SQL
|
|
||||||
* schema and vice versa. Otherwise, falls back to compatible mode.
|
|
||||||
*/
|
|
||||||
private[spark] def followParquetFormatSpec: Boolean = getConf(PARQUET_FOLLOW_PARQUET_FORMAT_SPEC)
|
private[spark] def followParquetFormatSpec: Boolean = getConf(PARQUET_FOLLOW_PARQUET_FORMAT_SPEC)
|
||||||
|
|
||||||
/**
|
|
||||||
* When set to true, partition pruning for in-memory columnar tables is enabled.
|
|
||||||
*/
|
|
||||||
private[spark] def inMemoryPartitionPruning: Boolean = getConf(IN_MEMORY_PARTITION_PRUNING)
|
private[spark] def inMemoryPartitionPruning: Boolean = getConf(IN_MEMORY_PARTITION_PRUNING)
|
||||||
|
|
||||||
private[spark] def columnNameOfCorruptRecord: String = getConf(COLUMN_NAME_OF_CORRUPT_RECORD)
|
private[spark] def columnNameOfCorruptRecord: String = getConf(COLUMN_NAME_OF_CORRUPT_RECORD)
|
||||||
|
|
||||||
/**
|
|
||||||
* Timeout in seconds for the broadcast wait time in hash join
|
|
||||||
*/
|
|
||||||
private[spark] def broadcastTimeout: Int = getConf(BROADCAST_TIMEOUT)
|
private[spark] def broadcastTimeout: Int = getConf(BROADCAST_TIMEOUT)
|
||||||
|
|
||||||
private[spark] def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME)
|
private[spark] def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME)
|
||||||
|
|
Loading…
Reference in a new issue