[SPARK-34215][SQL] Keep tables cached after truncation
### What changes were proposed in this pull request? Invoke `CatalogImpl.refreshTable()` instead of combination of `SessionCatalog.refreshTable()` + `uncacheQuery()`. This allows to clear cached table data while keeping the table cached. ### Why are the changes needed? 1. To improve user experience with Spark SQL 2. To be consistent to other commands, see https://github.com/apache/spark/pull/31206 ### Does this PR introduce _any_ user-facing change? Yes. Before: ```scala scala> sql("CREATE TABLE tbl (c0 int)") res1: org.apache.spark.sql.DataFrame = [] scala> sql("INSERT INTO tbl SELECT 0") res2: org.apache.spark.sql.DataFrame = [] scala> sql("CACHE TABLE tbl") res3: org.apache.spark.sql.DataFrame = [] scala> sql("SELECT * FROM tbl").show(false) +---+ |c0 | +---+ |0 | +---+ scala> spark.catalog.isCached("tbl") res5: Boolean = true scala> sql("TRUNCATE TABLE tbl") res6: org.apache.spark.sql.DataFrame = [] scala> spark.catalog.isCached("tbl") res7: Boolean = false ``` After: ```scala scala> sql("TRUNCATE TABLE tbl") res6: org.apache.spark.sql.DataFrame = [] scala> spark.catalog.isCached("tbl") res7: Boolean = true ``` ### How was this patch tested? Added new test to `CachedTableSuite`: ``` $ build/sbt -Phive -Phive-thriftserver "test:testOnly *CachedTableSuite" $ build/sbt -Phive -Phive-thriftserver "test:testOnly *CatalogedDDLSuite" ``` Closes #31308 from MaxGekk/truncate-table-cached. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
dd88eff820
commit
ac8307d75c
|
@ -49,6 +49,7 @@ license: |
|
|||
* `MSCK REPAIR TABLE`
|
||||
* `LOAD DATA`
|
||||
* `REFRESH TABLE`
|
||||
* `TRUNCATE TABLE`
|
||||
* and the method `spark.catalog.refreshTable`
|
||||
In Spark 3.1 and earlier, table refreshing leaves dependents uncached.
|
||||
|
||||
|
|
|
@ -561,16 +561,9 @@ case class TruncateTableCommand(
|
|||
}
|
||||
}
|
||||
}
|
||||
// After deleting the data, invalidate the table to make sure we don't keep around a stale
|
||||
// file relation in the metastore cache.
|
||||
spark.sessionState.refreshTable(tableName.unquotedString)
|
||||
// Also try to drop the contents of the table from the columnar cache
|
||||
try {
|
||||
spark.sharedState.cacheManager.uncacheQuery(spark.table(table.identifier), cascade = true)
|
||||
} catch {
|
||||
case NonFatal(e) =>
|
||||
log.warn(s"Exception when attempting to uncache table $tableIdentWithDB", e)
|
||||
}
|
||||
// After deleting the data, refresh the table to make sure we don't keep around a stale
|
||||
// file relation in the metastore cache and cached table data in the cache manager.
|
||||
spark.catalog.refreshTable(tableIdentWithDB)
|
||||
|
||||
if (table.stats.nonEmpty) {
|
||||
// empty table after truncation
|
||||
|
|
|
@ -501,4 +501,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-34215: keep table cached after truncation") {
|
||||
withTable("tbl") {
|
||||
sql("CREATE TABLE tbl (c0 int)")
|
||||
sql("INSERT INTO tbl SELECT 0")
|
||||
sql("CACHE TABLE tbl")
|
||||
assert(spark.catalog.isCached("tbl"))
|
||||
checkAnswer(sql("SELECT * FROM tbl"), Row(0))
|
||||
sql("TRUNCATE TABLE tbl")
|
||||
assert(spark.catalog.isCached("tbl"))
|
||||
checkAnswer(sql("SELECT * FROM tbl"), Seq.empty)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue