[SPARK-33244][SQL] Unify the code paths for spark.table and spark.read.table

### What changes were proposed in this pull request? - Call `spark.read.table` in `spark.table`. - Add comments for `spark.table` to emphasize it also support streaming temp view reading. ### Why are the changes needed? The code paths of `spark.table` and `spark.read.table` should be the same. This behavior is broke in SPARK-32592 since we need to respect options in `spark.read.table` API. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT. Closes #30148 from xuanyuanking/SPARK-33244. Authored-by: Yuanjian Li <yuanjian.li@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
2020-11-10 05:46:45 +00:00 · 2020-11-10 05:46:45 +00:00 · ad02ceda29
parent 90f6f39e42
commit ad02ceda29
2 changed files with 14 additions and 7 deletions
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@ -825,8 +825,16 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
  def orc(paths: String*): DataFrame = format("orc").load(paths: _*)

  /**
-   * Returns the specified table as a `DataFrame`.
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view,
+   * the returned DataFrame is simply the query plan of the view, which can either be a batch or
+   * streaming query plan.
   *
+   * @param tableName is either a qualified or unqualified name that designates a table or view.
+   *                  If a database is specified, it identifies the table/view from the database.
+   *                  Otherwise, it first attempts to find a temporary view with the given name
+   *                  and then match the table/view from the current database.
+   *                  Note that, the global temporary view database is also valid here.
   * @since 1.4.0
   */
  def table(tableName: String): DataFrame = {
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@ -573,7 +573,10 @@ class SparkSession private(
  @transient lazy val catalog: Catalog = new CatalogImpl(self)

  /**
-   * Returns the specified table/view as a `DataFrame`.
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view,
+   * the returned DataFrame is simply the query plan of the view, which can either be a batch or
+   * streaming query plan.
   *
   * @param tableName is either a qualified or unqualified name that designates a table or view.
   *                  If a database is specified, it identifies the table/view from the database.
@ -583,11 +586,7 @@ class SparkSession private(
   * @since 2.0.0
   */
  def table(tableName: String): DataFrame = {
-    table(sessionState.sqlParser.parseMultipartIdentifier(tableName))
-  }
-
-  private[sql] def table(multipartIdentifier: Seq[String]): DataFrame = {
-    Dataset.ofRows(self, UnresolvedRelation(multipartIdentifier))
+    read.table(tableName)
  }

  private[sql] def table(tableIdent: TableIdentifier): DataFrame = {