From a5e13acd19871831a93a5bdcbc99a9eb9f1aba07 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 30 Nov 2020 11:24:15 +0900
Subject: [PATCH] [SPARK-33582][SQL] Hive Metastore support filter by
 not-equals

### What changes were proposed in this pull request?

This pr make partition predicate pushdown into Hive metastore support not-equals operator.

Hive related changes:
https://github.com/apache/hive/blob/b8bd4594bef718b1eeac9fceb437d7df7b480ed1/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java#L2194-L2207
https://issues.apache.org/jira/browse/HIVE-2702

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30534 from wangyum/SPARK-33582.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/hive/client/HiveShim.scala      |  8 ++++++++
 .../spark/sql/hive/client/FiltersSuite.scala  |  8 ++++++++
 .../client/HivePartitionFilteringSuite.scala  | 20 +++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 17a64a67df..ed088648bc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -812,6 +812,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
           right <- convert(expr2)
         } yield s"($left or $right)"
 
+      case Not(EqualTo(
+          ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value))) if useAdvanced =>
+        Some(s"$name != $value")
+
+      case Not(EqualTo(
+          ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name)))) if useAdvanced =>
+        Some(s"$value != $name")
+
       case _ => None
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 6c0531182e..12ed0e5305 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -100,6 +100,14 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
     (a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil,
     "(intcol = 1)")
 
+  filterTest("NOT: int and string filters",
+    (a("intcol", IntegerType) =!= Literal(1)) :: (Literal("a") =!= a("strcol", IntegerType)) :: Nil,
+    """intcol != 1 and "a" != strcol""")
+
+  filterTest("NOT: date filter",
+    (a("datecol", DateType) =!= Literal(Date.valueOf("2019-01-01"))) :: Nil,
+    "datecol != 2019-01-01")
+
   // Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization
   // will be applied by Catalyst, this filter converter does not need to account for this.
   filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index e07fbc29ee..dc56e6bc4d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -352,6 +352,26 @@ class HivePartitionFilteringSuite(version: String)
       dateStrValue)
   }
 
+  test("getPartitionsByFilter: ds<>20170101") {
+    testMetastorePartitionFiltering(
+      attr("ds") =!= 20170101,
+      20170102 to 20170103,
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
+  }
+
+  test("getPartitionsByFilter: h<>0 and chunk<>ab and d<>2019-01-01") {
+    testMetastorePartitionFiltering(
+      attr("h") =!= 0 && attr("chunk") =!= "ab" && attr("d") =!= Date.valueOf("2019-01-01"),
+      dsValue,
+      1 to 4,
+      Seq("aa", "ba", "bb"),
+      Seq("2019-01-02", "2019-01-03"),
+      dateStrValue)
+  }
+
   test("getPartitionsByFilter: d=2019-01-01") {
     testMetastorePartitionFiltering(
       attr("d") === Date.valueOf("2019-01-01"),