[SPARK-33540][SQL] Subexpression elimination for interpreted predicate
### What changes were proposed in this pull request? This patch proposes to support subexpression elimination for interpreted predicate. ### Why are the changes needed? Similar to interpreted projection, there are use cases when codegen predicate is not able to work, e.g. too complex schema, non-codegen expression, etc. When there are frequently occurring expressions (subexpressions) among predicate expression, the performance is quite bad as we need to re-compute same expressions. We should be able to support subexpression elimination for interpreted predicate like interpreted projection. ### Does this PR introduce _any_ user-facing change? No, this doesn't change user behavior. ### How was this patch tested? Unit test and benchmark. Closes #30497 from viirya/SPARK-33540. Authored-by: Liang-Chi Hsieh <viirya@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
d691d85701
commit
9643eab53e
|
@ -46,11 +46,26 @@ abstract class BasePredicate {
|
|||
}
|
||||
|
||||
case class InterpretedPredicate(expression: Expression) extends BasePredicate {
|
||||
override def eval(r: InternalRow): Boolean = expression.eval(r).asInstanceOf[Boolean]
|
||||
private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
|
||||
private[this] lazy val runtime =
|
||||
new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
|
||||
private[this] val expr = if (subExprEliminationEnabled) {
|
||||
runtime.proxyExpressions(Seq(expression)).head
|
||||
} else {
|
||||
expression
|
||||
}
|
||||
|
||||
override def eval(r: InternalRow): Boolean = {
|
||||
if (subExprEliminationEnabled) {
|
||||
runtime.setInput(r)
|
||||
}
|
||||
|
||||
expr.eval(r).asInstanceOf[Boolean]
|
||||
}
|
||||
|
||||
override def initialize(partitionIndex: Int): Unit = {
|
||||
super.initialize(partitionIndex)
|
||||
expression.foreach {
|
||||
expr.foreach {
|
||||
case n: Nondeterministic => n.initialize(partitionIndex)
|
||||
case _ =>
|
||||
}
|
||||
|
|
|
@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
|
|||
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
|
||||
from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
subExprElimination false, codegen: true 26447 27127 605 0.0 264467933.4 1.0X
|
||||
subExprElimination false, codegen: false 25673 26035 546 0.0 256732419.1 1.0X
|
||||
subExprElimination true, codegen: true 1384 1448 102 0.0 13842910.3 19.1X
|
||||
subExprElimination true, codegen: false 1244 1347 123 0.0 12442389.3 21.3X
|
||||
subExprElimination false, codegen: true 24827 25398 562 0.0 248271027.2 1.0X
|
||||
subExprElimination false, codegen: false 25052 25704 625 0.0 250518603.6 1.0X
|
||||
subExprElimination true, codegen: true 1540 1606 92 0.0 15403083.7 16.1X
|
||||
subExprElimination true, codegen: false 1487 1535 53 0.0 14865051.6 16.7X
|
||||
|
||||
Preparing data for benchmarking ...
|
||||
OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
|
||||
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
|
||||
from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
subexpressionElimination off, codegen on 34631 35449 833 0.0 346309884.0 1.0X
|
||||
subexpressionElimination off, codegen on 34480 34851 353 0.0 344798490.4 1.0X
|
||||
subexpressionElimination off, codegen on 16618 16811 291 0.0 166176642.6 2.1X
|
||||
subexpressionElimination off, codegen on 34316 34667 310 0.0 343157094.7 1.0X
|
||||
subexpressionElimination off, codegen on 37327 38261 809 0.0 373266387.0 1.0X
|
||||
subexpressionElimination off, codegen on 36126 37445 1575 0.0 361263987.0 1.0X
|
||||
subexpressionElimination off, codegen on 20152 21596 1263 0.0 201522903.8 1.9X
|
||||
subexpressionElimination off, codegen on 20799 20940 233 0.0 207993923.0 1.8X
|
||||
|
||||
|
||||
|
|
|
@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
|
|||
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
|
||||
from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
subExprElimination false, codegen: true 22767 23240 424 0.0 227665316.7 1.0X
|
||||
subExprElimination false, codegen: false 22869 23351 465 0.0 228693464.1 1.0X
|
||||
subExprElimination true, codegen: true 1328 1340 10 0.0 13280056.2 17.1X
|
||||
subExprElimination true, codegen: false 1248 1276 31 0.0 12476135.1 18.2X
|
||||
subExprElimination false, codegen: true 23094 23763 585 0.0 230939301.2 1.0X
|
||||
subExprElimination false, codegen: false 23161 24087 844 0.0 231611379.8 1.0X
|
||||
subExprElimination true, codegen: true 1492 1517 30 0.0 14921022.9 15.5X
|
||||
subExprElimination true, codegen: false 1300 1361 93 0.0 12996167.7 17.8X
|
||||
|
||||
Preparing data for benchmarking ...
|
||||
OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
|
||||
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
|
||||
from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
subexpressionElimination off, codegen on 37691 38846 1004 0.0 376913767.9 1.0X
|
||||
subexpressionElimination off, codegen on 37852 39124 1103 0.0 378517745.5 1.0X
|
||||
subexpressionElimination off, codegen on 22900 23085 202 0.0 229000242.5 1.6X
|
||||
subexpressionElimination off, codegen on 38298 38598 374 0.0 382978731.3 1.0X
|
||||
subexpressionElimination off, codegen on 37069 37767 985 0.0 370694301.5 1.0X
|
||||
subexpressionElimination off, codegen on 37095 37970 1008 0.0 370945081.6 1.0X
|
||||
subexpressionElimination off, codegen on 20618 21443 715 0.0 206175173.8 1.8X
|
||||
subexpressionElimination off, codegen on 21563 21887 307 0.0 215626274.7 1.7X
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue