[SPARK-6743] [SQL] Fix empty projections of cached data

Author: Michael Armbrust <michael@databricks.com> Closes #6165 from marmbrus/wrongColumn and squashes the following commits: 4fad158 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into wrongColumn aad7eab [Michael Armbrust] rxins comments f1e8df1 [Michael Armbrust] [SPARK-6743][SQL] Fix empty projections of cached data
2015-05-22 09:43:46 -07:00 · 2015-05-22 09:43:46 -07:00 · 3b68cb0430
parent 4e5220c317
commit 3b68cb0430
4 changed files with 20 additions and 3 deletions
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@ -324,6 +324,7 @@ object Hive {
        |import org.apache.spark.sql.functions._
        |import org.apache.spark.sql.hive._
        |import org.apache.spark.sql.hive.test.TestHive._
        |import org.apache.spark.sql.hive.test.TestHive.implicits._
        |import org.apache.spark.sql.types._""".stripMargin,
    cleanupCommands in console := "sparkContext.stop()",
    // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@ -55,6 +55,9 @@ object Row {
    // TODO: Improve the performance of this if used in performance critical part.
    new GenericRow(rows.flatMap(_.toSeq).toArray)
  }
  /** Returns an empty row. */
  val empty = apply()
 }
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@ -314,7 +314,7 @@ private[sql] case class InMemoryColumnarTableScan(
                columnAccessors(i).extractTo(nextRow, i)
                i += 1
              }
-              nextRow
+              if (attributes.isEmpty) Row.empty else nextRow
            }
            override def hasNext: Boolean = columnAccessors(0).hasNext
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@ -39,6 +39,19 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
  import org.apache.spark.sql.test.TestSQLContext.implicits._
  val sqlCtx = TestSQLContext
  test("SPARK-6743: no columns from cache") {
    Seq(
      (83, 0, 38),
      (26, 0, 79),
      (43, 81, 24)
    ).toDF("a", "b", "c").registerTempTable("cachedData")
    cacheTable("cachedData")
    checkAnswer(
      sql("SELECT t1.b FROM cachedData, cachedData t1 GROUP BY t1.b"),
      Row(0) :: Row(81) :: Nil)
  }
  test("self join with aliases") {
    Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str").registerTempTable("df")