[SPARK-33582][SQL] Hive Metastore support filter by not-equals

### What changes were proposed in this pull request?

This pr make partition predicate pushdown into Hive metastore support not-equals operator.

Hive related changes:
b8bd4594be/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java (L2194-L2207)
https://issues.apache.org/jira/browse/HIVE-2702

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30534 from wangyum/SPARK-33582.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Yuming Wang 2020-11-30 11:24:15 +09:00 committed by HyukjinKwon
parent f93d4395b2
commit a5e13acd19
3 changed files with 36 additions and 0 deletions

View file

@ -812,6 +812,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
right <- convert(expr2)
} yield s"($left or $right)"
case Not(EqualTo(
ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value))) if useAdvanced =>
Some(s"$name != $value")
case Not(EqualTo(
ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name)))) if useAdvanced =>
Some(s"$value != $name")
case _ => None
}

View file

@ -100,6 +100,14 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
(a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil,
"(intcol = 1)")
filterTest("NOT: int and string filters",
(a("intcol", IntegerType) =!= Literal(1)) :: (Literal("a") =!= a("strcol", IntegerType)) :: Nil,
"""intcol != 1 and "a" != strcol""")
filterTest("NOT: date filter",
(a("datecol", DateType) =!= Literal(Date.valueOf("2019-01-01"))) :: Nil,
"datecol != 2019-01-01")
// Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization
// will be applied by Catalyst, this filter converter does not need to account for this.
filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE",

View file

@ -352,6 +352,26 @@ class HivePartitionFilteringSuite(version: String)
dateStrValue)
}
test("getPartitionsByFilter: ds<>20170101") {
testMetastorePartitionFiltering(
attr("ds") =!= 20170101,
20170102 to 20170103,
hValue,
chunkValue,
dateValue,
dateStrValue)
}
test("getPartitionsByFilter: h<>0 and chunk<>ab and d<>2019-01-01") {
testMetastorePartitionFiltering(
attr("h") =!= 0 && attr("chunk") =!= "ab" && attr("d") =!= Date.valueOf("2019-01-01"),
dsValue,
1 to 4,
Seq("aa", "ba", "bb"),
Seq("2019-01-02", "2019-01-03"),
dateStrValue)
}
test("getPartitionsByFilter: d=2019-01-01") {
testMetastorePartitionFiltering(
attr("d") === Date.valueOf("2019-01-01"),