Revert "[SPARK-32481][CORE][SQL] Support truncate table to move data to trash"

This reverts commit 5c077f0580.
2020-08-26 11:24:35 -07:00 · 2020-08-26 11:24:35 -07:00 · 2dee4352a0
parent d3304268d3
commit 2dee4352a0
4 changed files with 2 additions and 110 deletions
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@ -50,7 +50,7 @@ import com.google.common.net.InetAddresses
 import org.apache.commons.codec.binary.Hex
 import org.apache.commons.lang3.SystemUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, FileUtil, Path, Trash}
+import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
 import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec}
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.conf.YarnConfiguration
@ -269,27 +269,6 @@ private[spark] object Utils extends Logging {
    file.setExecutable(true, true)
  }
  /**
   * Move data to trash if 'spark.sql.truncate.trash.enabled' is true
   */
  def moveToTrashIfEnabled(
      fs: FileSystem,
      partitionPath: Path,
      isTrashEnabled: Boolean,
      hadoopConf: Configuration): Boolean = {
    if (isTrashEnabled) {
      logDebug(s"will move data ${partitionPath.toString} to trash")
      val isSuccess = Trash.moveToAppropriateTrash(fs, partitionPath, hadoopConf)
      if (!isSuccess) {
        logWarning(s"Failed to move data ${partitionPath.toString} to trash")
        return fs.delete(partitionPath, true)
      }
      isSuccess
    } else {
      fs.delete(partitionPath, true)
    }
  }
  /**
   * Create a directory given the abstract pathname
   * @return true, if the directory is successfully created; otherwise, return false.
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@ -2722,17 +2722,6 @@ object SQLConf {
      .booleanConf
      .createWithDefault(false)
  val TRUNCATE_TRASH_ENABLED =
    buildConf("spark.sql.truncate.trash.enabled")
      .doc("This configuration decides when truncating table, whether data files will be moved " +
        "to trash directory or deleted permanently. The trash retention time is controlled by " +
        "fs.trash.interval, and in default, the server side configuration value takes " +
        "precedence over the client-side one. Note that if fs.trash.interval is non-positive, " +
        "this will be a no-op and log a warning message.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)
  /**
   * Holds information about keys that have been deprecated.
   *
@ -3345,8 +3334,6 @@ class SQLConf extends Serializable with Logging {
  def legacyPathOptionBehavior: Boolean = getConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR)
  def truncateTrashEnabled: Boolean = getConf(SQLConf.TRUNCATE_TRASH_ENABLED)
  /** ********************** SQLConf functionality methods ************ */
  /** Set Spark SQL configuration properties. */
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@ -48,7 +48,6 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.Utils
 /**
 * A command to create a table with the same definition of the given existing table.
@ -490,7 +489,6 @@ case class TruncateTableCommand(
      }
    val hadoopConf = spark.sessionState.newHadoopConf()
    val ignorePermissionAcl = SQLConf.get.truncateTableIgnorePermissionAcl
    val isTrashEnabled = SQLConf.get.truncateTrashEnabled
    locations.foreach { location =>
      if (location.isDefined) {
        val path = new Path(location.get)
@ -515,7 +513,7 @@ case class TruncateTableCommand(
            }
          }
-          Utils.moveToTrashIfEnabled(fs, path, isTrashEnabled, hadoopConf)
+          fs.delete(path, true)
          // We should keep original permission/acl of the path.
          // For owner/group, only super-user can set it, for example on HDFS. Because
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@ -3101,78 +3101,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
      assert(spark.sessionState.catalog.isRegisteredFunction(rand))
    }
  }
  test("SPARK-32481 Move data to trash on truncate table if enabled") {
    val trashIntervalKey = "fs.trash.interval"
    withTable("tab1") {
      withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") {
        sql("CREATE TABLE tab1 (col INT) USING parquet")
        sql("INSERT INTO tab1 SELECT 1")
        // scalastyle:off hadoopconfiguration
        val hadoopConf = spark.sparkContext.hadoopConfiguration
        // scalastyle:on hadoopconfiguration
        val originalValue = hadoopConf.get(trashIntervalKey, "0")
        val tablePath = new Path(spark.sessionState.catalog
          .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
        val fs = tablePath.getFileSystem(hadoopConf)
        val trashRoot = fs.getTrashRoot(tablePath)
        assert(!fs.exists(trashRoot))
        try {
          hadoopConf.set(trashIntervalKey, "5")
          sql("TRUNCATE TABLE tab1")
        } finally {
          hadoopConf.set(trashIntervalKey, originalValue)
        }
        assert(fs.exists(trashRoot))
        fs.delete(trashRoot, true)
      }
    }
  }
  test("SPARK-32481 delete data permanently on truncate table if trash interval is non-positive") {
    val trashIntervalKey = "fs.trash.interval"
    withTable("tab1") {
      withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") {
        sql("CREATE TABLE tab1 (col INT) USING parquet")
        sql("INSERT INTO tab1 SELECT 1")
        // scalastyle:off hadoopconfiguration
        val hadoopConf = spark.sparkContext.hadoopConfiguration
        // scalastyle:on hadoopconfiguration
        val originalValue = hadoopConf.get(trashIntervalKey, "0")
        val tablePath = new Path(spark.sessionState.catalog
          .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
        val fs = tablePath.getFileSystem(hadoopConf)
        val trashRoot = fs.getTrashRoot(tablePath)
        assert(!fs.exists(trashRoot))
        try {
          hadoopConf.set(trashIntervalKey, "0")
          sql("TRUNCATE TABLE tab1")
        } finally {
          hadoopConf.set(trashIntervalKey, originalValue)
        }
        assert(!fs.exists(trashRoot))
      }
    }
  }
  test("SPARK-32481 Do not move data to trash on truncate table if disabled") {
    withTable("tab1") {
      withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "false") {
        sql("CREATE TABLE tab1 (col INT) USING parquet")
        sql("INSERT INTO tab1 SELECT 1")
        val hadoopConf = spark.sessionState.newHadoopConf()
        val tablePath = new Path(spark.sessionState.catalog
          .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
        val fs = tablePath.getFileSystem(hadoopConf)
        val trashRoot = fs.getTrashRoot(tablePath)
        sql("TRUNCATE TABLE tab1")
        assert(!fs.exists(trashRoot))
      }
    }
  }
 }
 object FakeLocalFsFileSystem {