[SPARK-21787][SQL] Support for pushing down filters for DateType in native OrcFileFormat

## What changes were proposed in this pull request?

This PR support for pushing down filters for DateType in ORC

## How was this patch tested?

Pass the Jenkins with newly add and updated test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18995 from dongjoon-hyun/SPARK-21787.
This commit is contained in:
Dongjoon Hyun 2017-12-08 09:52:16 +08:00 committed by Wenchen Fan
parent aa1764ba1a
commit 0ba8f4b211
2 changed files with 25 additions and 7 deletions

View file

@ -82,8 +82,7 @@ private[orc] object OrcFilters {
* Both CharType and VarcharType are cleaned at AstBuilder.
*/
private def isSearchableType(dataType: DataType) = dataType match {
// TODO: SPARK-21787 Support for pushing down filters for DateType in ORC
case BinaryType | DateType => false
case BinaryType => false
case _: AtomicType => true
case _ => false
}

View file

@ -316,6 +316,30 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
}
}
test("filter pushdown - date") {
val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
Date.valueOf(day)
}
withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
}
}
test("no filter pushdown - non-supported types") {
implicit class IntToBinary(int: Int) {
def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
@ -328,11 +352,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
checkNoFilterPredicate('_1 <=> 1.b)
}
// DateType
val stringDate = "2015-01-01"
withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
}
// MapType
withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
checkNoFilterPredicate('_1.isNotNull)