[SPARK-26576][SQL] Broadcast hint not applied to partitioned table

## What changes were proposed in this pull request? Make sure broadcast hint is applied to partitioned tables. ## How was this patch tested? - A new unit test in PruneFileSourcePartitionsSuite - Unit test suites touched by SPARK-14581: JoinOptimizationSuite, FilterPushdownSuite, ColumnPruningSuite, and PruneFiltersSuite Closes #23507 from jzhuge/SPARK-26576. Closes #23530 from jzhuge/SPARK-26576-master. Authored-by: John Zhuge <jzhuge@apache.org> Signed-off-by: gatorsmile <gatorsmile@gmail.com>
2019-01-13 15:36:40 -08:00 · 2019-01-13 15:36:40 -08:00 · 3f8007102a
parent 985f966b9c
commit 3f8007102a
1 changed files with 16 additions and 0 deletions
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
@ -17,6 +17,8 @@

 package org.apache.spark.sql.hive.execution

+import org.scalatest.Matchers._
+
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
@ -25,7 +27,10 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
+import org.apache.spark.sql.functions.broadcast
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType

@ -91,4 +96,15 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
      assert(size2 < tableStats.get.sizeInBytes)
    }
  }
+
+  test("SPARK-26576 Broadcast hint not applied to partitioned table") {
+    withTable("tbl") {
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+        spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
+        val df = spark.table("tbl")
+        val qe = df.join(broadcast(df), "p").queryExecution
+        qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1
+      }
+    }
+  }
 }