[SPARK-34481][SQL] Refactor dataframe reader/writer optionsWithPath logic
### What changes were proposed in this pull request? Extract optionsWithPath logic into their own function. ### Why are the changes needed? Reduce the code duplication and improve modularity. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Just some refactoring. Existing tests. Closes #31599 from yuchenhuo/SPARK-34481. Authored-by: Yuchen Huo <yuchen.huo@databricks.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
82b33a3041
commit
7de49a8fc0
|
@ -268,14 +268,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
|
||||||
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
|
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
|
||||||
source = provider, conf = sparkSession.sessionState.conf)
|
source = provider, conf = sparkSession.sessionState.conf)
|
||||||
|
|
||||||
val optionsWithPath = if (paths.isEmpty) {
|
val optionsWithPath = getOptionsWithPaths(paths: _*)
|
||||||
extraOptions
|
|
||||||
} else if (paths.length == 1) {
|
|
||||||
extraOptions + ("path" -> paths.head)
|
|
||||||
} else {
|
|
||||||
val objectMapper = new ObjectMapper()
|
|
||||||
extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
|
|
||||||
}
|
|
||||||
|
|
||||||
val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
|
val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
|
||||||
optionsWithPath.originalMap
|
optionsWithPath.originalMap
|
||||||
|
@ -308,6 +301,17 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
|
||||||
}.getOrElse(loadV1Source(paths: _*))
|
}.getOrElse(loadV1Source(paths: _*))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def getOptionsWithPaths(paths: String*): CaseInsensitiveMap[String] = {
|
||||||
|
if (paths.isEmpty) {
|
||||||
|
extraOptions
|
||||||
|
} else if (paths.length == 1) {
|
||||||
|
extraOptions + ("path" -> paths.head)
|
||||||
|
} else {
|
||||||
|
val objectMapper = new ObjectMapper()
|
||||||
|
extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private def loadV1Source(paths: String*) = {
|
private def loadV1Source(paths: String*) = {
|
||||||
val legacyPathOptionBehavior = sparkSession.sessionState.conf.legacyPathOptionBehavior
|
val legacyPathOptionBehavior = sparkSession.sessionState.conf.legacyPathOptionBehavior
|
||||||
val (finalPaths, finalOptions) = if (!legacyPathOptionBehavior && paths.length == 1) {
|
val (finalPaths, finalOptions) = if (!legacyPathOptionBehavior && paths.length == 1) {
|
||||||
|
|
|
@ -314,11 +314,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
|
||||||
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
|
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
|
||||||
provider, df.sparkSession.sessionState.conf)
|
provider, df.sparkSession.sessionState.conf)
|
||||||
|
|
||||||
val optionsWithPath = if (path.isEmpty) {
|
val optionsWithPath = getOptionsWithPath(path)
|
||||||
extraOptions
|
|
||||||
} else {
|
|
||||||
extraOptions + ("path" -> path.get)
|
|
||||||
}
|
|
||||||
|
|
||||||
val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
|
val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
|
||||||
optionsWithPath.originalMap
|
optionsWithPath.originalMap
|
||||||
|
@ -416,6 +412,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def getOptionsWithPath(path: Option[String]): CaseInsensitiveMap[String] = {
|
||||||
|
if (path.isEmpty) {
|
||||||
|
extraOptions
|
||||||
|
} else {
|
||||||
|
extraOptions + ("path" -> path.get)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private def saveToV1Source(path: Option[String]): Unit = {
|
private def saveToV1Source(path: Option[String]): Unit = {
|
||||||
partitioningColumns.foreach { columns =>
|
partitioningColumns.foreach { columns =>
|
||||||
extraOptions = extraOptions + (
|
extraOptions = extraOptions + (
|
||||||
|
@ -423,11 +427,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
|
||||||
DataSourceUtils.encodePartitioningColumns(columns))
|
DataSourceUtils.encodePartitioningColumns(columns))
|
||||||
}
|
}
|
||||||
|
|
||||||
val optionsWithPath = if (path.isEmpty) {
|
val optionsWithPath = getOptionsWithPath(path)
|
||||||
extraOptions
|
|
||||||
} else {
|
|
||||||
extraOptions + ("path" -> path.get)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Code path for data source v1.
|
// Code path for data source v1.
|
||||||
runCommand(df.sparkSession, "save") {
|
runCommand(df.sparkSession, "save") {
|
||||||
|
|
Loading…
Reference in a new issue