[SPARK-36454][SQL] Not push down partition filter to ORCScan for DSv2
### What changes were proposed in this pull request? not push down partition filter to `ORCScan` for DSv2 ### Why are the changes needed? Seems to me that partition filter is only used for partition pruning and shouldn't be pushed down to `ORCScan`. We don't push down partition filter to ORCScan in DSv1 ``` == Physical Plan == *(1) Filter (isnotnull(value#19) AND NOT (value#19 = a)) +- *(1) ColumnarToRow +- FileScan orc [value#19,p1#20,p2#21] Batched: true, DataFilters: [isnotnull(value#19), NOT (value#19 = a)], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/pt/_5f4sxy56x70dv9zpz032f0m0000gn/T/spark-c1..., PartitionFilters: [isnotnull(p1#20), isnotnull(p2#21), (p1#20 = 1), (p2#21 = 2)], PushedFilters: [IsNotNull(value), Not(EqualTo(value,a))], ReadSchema: struct<value:string> ``` Also, we don't push down partition filter for parquet in DSv2. https://github.com/apache/spark/pull/30652 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test suites Closes #33680 from huaxingao/orc_filter. Authored-by: Huaxin Gao <huaxin_gao@apple.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
33c6d1168c
commit
b04330cd38
|
@ -53,7 +53,8 @@ case class OrcScanBuilder(
|
|||
|
||||
override def pushFilters(filters: Array[Filter]): Array[Filter] = {
|
||||
if (sparkSession.sessionState.conf.orcFilterPushDown) {
|
||||
val dataTypeMap = OrcFilters.getSearchableTypeMap(schema, SQLConf.get.caseSensitiveAnalysis)
|
||||
val dataTypeMap = OrcFilters.getSearchableTypeMap(
|
||||
readDataSchema(), SQLConf.get.caseSensitiveAnalysis)
|
||||
_pushedFilters = OrcFilters.convertibleFilters(dataTypeMap, filters).toArray
|
||||
}
|
||||
filters
|
||||
|
|
|
@ -460,7 +460,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
|
|||
"parquet" ->
|
||||
"|PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
|
||||
"orc" ->
|
||||
"|PushedFilters: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]",
|
||||
"|PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
|
||||
"csv" ->
|
||||
"|PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
|
||||
"json" ->
|
||||
|
|
Loading…
Reference in a new issue