[SPARK-21787][SQL] Support for pushing down filters for DateType in native OrcFileFormat
## What changes were proposed in this pull request? This PR support for pushing down filters for DateType in ORC ## How was this patch tested? Pass the Jenkins with newly add and updated test cases. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #18995 from dongjoon-hyun/SPARK-21787.
This commit is contained in:
parent
aa1764ba1a
commit
0ba8f4b211
|
@ -82,8 +82,7 @@ private[orc] object OrcFilters {
|
|||
* Both CharType and VarcharType are cleaned at AstBuilder.
|
||||
*/
|
||||
private def isSearchableType(dataType: DataType) = dataType match {
|
||||
// TODO: SPARK-21787 Support for pushing down filters for DateType in ORC
|
||||
case BinaryType | DateType => false
|
||||
case BinaryType => false
|
||||
case _: AtomicType => true
|
||||
case _ => false
|
||||
}
|
||||
|
|
|
@ -316,6 +316,30 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
|
|||
}
|
||||
}
|
||||
|
||||
test("filter pushdown - date") {
|
||||
val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
|
||||
Date.valueOf(day)
|
||||
}
|
||||
withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
|
||||
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
|
||||
|
||||
checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
|
||||
checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
|
||||
|
||||
checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
|
||||
checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
|
||||
checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
|
||||
checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
|
||||
|
||||
checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
|
||||
checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
|
||||
checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
|
||||
checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
|
||||
checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
|
||||
checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
|
||||
}
|
||||
}
|
||||
|
||||
test("no filter pushdown - non-supported types") {
|
||||
implicit class IntToBinary(int: Int) {
|
||||
def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
|
||||
|
@ -328,11 +352,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
|
|||
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
|
||||
checkNoFilterPredicate('_1 <=> 1.b)
|
||||
}
|
||||
// DateType
|
||||
val stringDate = "2015-01-01"
|
||||
withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
|
||||
checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
|
||||
}
|
||||
// MapType
|
||||
withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
|
||||
checkNoFilterPredicate('_1.isNotNull)
|
||||
|
|
Loading…
Reference in a new issue