[SPARK-14089][CORE][MLLIB] Remove methods that has been deprecated since 1.1, 1.2, 1.3, 1.4, and 1.5

## What changes were proposed in this pull request? Removed methods that has been deprecated since 1.1, 1.2, 1.3, 1.4, and 1.5. ## How was this patch tested? - manully checked that no codes in Spark call these methods any more - existing test suits Author: Liwei Lin <lwlin7@gmail.com> Author: proflin <proflin.me@gmail.com> Closes #11910 from lw-lin/remove-deprecates.
2016-03-26 12:41:34 +00:00 · 2016-03-26 12:41:34 +00:00 · 62a85eb09f
parent 1808465855
commit 62a85eb09f
9 changed files with 13 additions and 158 deletions
--- a/core/src/main/java/org/apache/spark/api/java/StorageLevels.java
+++ b/core/src/main/java/org/apache/spark/api/java/StorageLevels.java
@ -36,19 +36,6 @@ public class StorageLevels {
  public static final StorageLevel MEMORY_AND_DISK_SER_2 = create(true, true, false, false, 2);
  public static final StorageLevel OFF_HEAP = create(false, false, true, false, 1);

-  /**
-   * Create a new StorageLevel object.
-   * @param useDisk saved to disk, if true
-   * @param useMemory saved to memory, if true
-   * @param deserialized saved as deserialized objects, if true
-   * @param replication replication factor
-   */
-  @Deprecated
-  public static StorageLevel create(boolean useDisk, boolean useMemory, boolean deserialized,
-      int replication) {
-    return StorageLevel.apply(useDisk, useMemory, false, deserialized, replication);
-  }
-
  /**
   * Create a new StorageLevel object.
   * @param useDisk saved to disk, if true
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@ -156,14 +156,6 @@ object SparkEnv extends Logging {
    env
  }

-  /**
-   * Returns the ThreadLocal SparkEnv.
-   */
-  @deprecated("Use SparkEnv.get instead", "1.2.0")
-  def getThreadLocal: SparkEnv = {
-    env
-  }
-
  /**
   * Create a SparkEnv for the driver.
   */
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@ -63,14 +63,6 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
  @Since("1.5.0")
  def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)

-  /**
-   * @group setParam
-   * @deprecated use [[setRawPredictionCol()]] instead
-   */
-  @deprecated("use setRawPredictionCol instead", "1.5.0")
-  @Since("1.2.0")
-  def setScoreCol(value: String): this.type = set(rawPredictionCol, value)
-
  /** @group setParam */
  @Since("1.2.0")
  def setLabelCol(value: String): this.type = set(labelCol, value)
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@ -78,15 +78,6 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
    this.convergenceTol
  }

-  /**
-   * Set the maximal number of iterations for L-BFGS. Default 100.
-   * @deprecated use [[LBFGS#setNumIterations]] instead
-   */
-  @deprecated("use setNumIterations instead", "1.1.0")
-  def setMaxNumIterations(iters: Int): this.type = {
-    this.setNumIterations(iters)
-  }
-
  /**
   * Set the maximal number of iterations for L-BFGS. Default 100.
   */
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@ -51,30 +51,6 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
   */
  def sliding(windowSize: Int): RDD[Array[T]] = sliding(windowSize, 1)

-  /**
-   * Reduces the elements of this RDD in a multi-level tree pattern.
-   *
-   * @param depth suggested depth of the tree (default: 2)
-   * @see [[org.apache.spark.rdd.RDD#treeReduce]]
-   * @deprecated Use [[org.apache.spark.rdd.RDD#treeReduce]] instead.
-   */
-  @deprecated("Use RDD.treeReduce instead.", "1.3.0")
-  def treeReduce(f: (T, T) => T, depth: Int = 2): T = self.treeReduce(f, depth)
-
-  /**
-   * Aggregates the elements of this RDD in a multi-level tree pattern.
-   *
-   * @param depth suggested depth of the tree (default: 2)
-   * @see [[org.apache.spark.rdd.RDD#treeAggregate]]
-   * @deprecated Use [[org.apache.spark.rdd.RDD#treeAggregate]] instead.
-   */
-  @deprecated("Use RDD.treeAggregate instead.", "1.3.0")
-  def treeAggregate[U: ClassTag](zeroValue: U)(
-      seqOp: (U, T) => U,
-      combOp: (U, U) => U,
-      depth: Int = 2): U = {
-    self.treeAggregate(zeroValue)(seqOp, combOp, depth)
-  }
 }

@DeveloperApi
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@ -202,8 +202,4 @@ object Strategy {
        numClasses = 0)
  }

-  @deprecated("Use Strategy.defaultStrategy instead.", "1.5.0")
-  @Since("1.2.0")
-  def defaultStategy(algo: Algo): Strategy = defaultStrategy(algo)
-
 }
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@ -56,27 +56,6 @@ class Node @Since("1.2.0") (
      s"split = $split, stats = $stats"
  }

-  /**
-   * build the left node and right nodes if not leaf
-   * @param nodes array of nodes
-   */
-  @Since("1.0.0")
-  @deprecated("build should no longer be used since trees are constructed on-the-fly in training",
-    "1.2.0")
-  def build(nodes: Array[Node]): Unit = {
-    logDebug("building node " + id + " at level " + Node.indexToLevel(id))
-    logDebug("id = " + id + ", split = " + split)
-    logDebug("stats = " + stats)
-    logDebug("predict = " + predict)
-    logDebug("impurity = " + impurity)
-    if (!isLeaf) {
-      leftNode = Some(nodes(Node.leftChildIndex(id)))
-      rightNode = Some(nodes(Node.rightChildIndex(id)))
-      leftNode.get.build(nodes)
-      rightNode.get.build(nodes)
-    }
-  }
-
  /**
   * predict value if node is not leaf
   * @param features feature value
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@ -110,18 +110,6 @@ object MLUtils {
    }
  }

-  // Convenient methods for `loadLibSVMFile`.
-
-  @Since("1.0.0")
-  @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
-  def loadLibSVMFile(
-      sc: SparkContext,
-      path: String,
-      multiclass: Boolean,
-      numFeatures: Int,
-      minPartitions: Int): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path, numFeatures, minPartitions)
-
  /**
   * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint], with the default number of
   * partitions.
@ -133,23 +121,6 @@ object MLUtils {
      numFeatures: Int): RDD[LabeledPoint] =
    loadLibSVMFile(sc, path, numFeatures, sc.defaultMinPartitions)

-  @Since("1.0.0")
-  @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
-  def loadLibSVMFile(
-      sc: SparkContext,
-      path: String,
-      multiclass: Boolean,
-      numFeatures: Int): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path, numFeatures)
-
-  @Since("1.0.0")
-  @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
-  def loadLibSVMFile(
-      sc: SparkContext,
-      path: String,
-      multiclass: Boolean): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path)
-
  /**
   * Loads binary labeled data in the LIBSVM format into an RDD[LabeledPoint], with number of
   * features determined automatically and the default number of partitions.
@ -216,48 +187,6 @@ object MLUtils {
  def loadLabeledPoints(sc: SparkContext, dir: String): RDD[LabeledPoint] =
    loadLabeledPoints(sc, dir, sc.defaultMinPartitions)

-  /**
-   * Load labeled data from a file. The data format used here is
-   * L, f1 f2 ...
-   * where f1, f2 are feature values in Double and L is the corresponding label as Double.
-   *
-   * @param sc SparkContext
-   * @param dir Directory to the input data files.
-   * @return An RDD of LabeledPoint. Each labeled point has two elements: the first element is
-   *         the label, and the second element represents the feature values (an array of Double).
-   *
-   * @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and
-   *            [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading.
-   */
-  @Since("1.0.0")
-  @deprecated("Should use MLUtils.loadLabeledPoints instead.", "1.0.1")
-  def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
-    sc.textFile(dir).map { line =>
-      val parts = line.split(',')
-      val label = parts(0).toDouble
-      val features = Vectors.dense(parts(1).trim().split(' ').map(_.toDouble))
-      LabeledPoint(label, features)
-    }
-  }
-
-  /**
-   * Save labeled data to a file. The data format used here is
-   * L, f1 f2 ...
-   * where f1, f2 are feature values in Double and L is the corresponding label as Double.
-   *
-   * @param data An RDD of LabeledPoints containing data to be saved.
-   * @param dir Directory to save the data.
-   *
-   * @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and
-   *            [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading.
-   */
-  @Since("1.0.0")
-  @deprecated("Should use RDD[LabeledPoint].saveAsTextFile instead.", "1.0.1")
-  def saveLabeledData(data: RDD[LabeledPoint], dir: String) {
-    val dataStr = data.map(x => x.label + "," + x.features.toArray.mkString(" "))
-    dataStr.saveAsTextFile(dir)
-  }
-
  /**
   * Return a k element array of pairs of RDDs with the first element of each pair
   * containing the training data, a complement of the validation data and the second
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@ -576,6 +576,19 @@ object MimaExcludes {
      ) ++ Seq(
        // [SPARK-13990] Automatically pick serializer when caching RDDs
        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.network.netty.NettyBlockTransferService.uploadBlock")
+      ) ++ Seq(
+        // [SPARK-14089][CORE][MLLIB] Remove methods that has been deprecated since 1.1, 1.2, 1.3, 1.4, and 1.5
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.getThreadLocal"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.rdd.RDDFunctions.treeReduce"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.rdd.RDDFunctions.treeAggregate"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.tree.configuration.Strategy.defaultStategy"),
+        ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.util.MLUtils.loadLibSVMFile"),
+        ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.util.MLUtils.loadLibSVMFile"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.loadLibSVMFile"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.saveLabeledData"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.loadLabeledData"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.optimization.LBFGS.setMaxNumIterations"),
+        ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.setScoreCol")
      )
    case v if v.startsWith("1.6") =>
      Seq(