[SPARK-3850] Trim trailing spaces for MLlib.

Author: Reynold Xin <rxin@databricks.com> Closes #6534 from rxin/whitespace-mllib and squashes the following commits: 38926e3 [Reynold Xin] [SPARK-3850] Trim trailing spaces for MLlib.
2015-05-31 11:35:30 -07:00 · 2015-05-31 11:35:30 -07:00 · e1067d0ad1
parent d1d2def2f5
commit e1067d0ad1
30 changed files with 189 additions and 189 deletions
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@ -35,13 +35,13 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with

  /**
   * Centers the data with mean before scaling.
-   * It will build a dense output, so this does not work on sparse input 
+   * It will build a dense output, so this does not work on sparse input
   * and will raise an exception.
   * Default: false
   * @group param
   */
  val withMean: BooleanParam = new BooleanParam(this, "withMean", "Center data with mean")
-  
+
  /**
   * Scales the data to unit standard deviation.
   * Default: true
@ -68,13 +68,13 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM

  /** @group setParam */
  def setOutputCol(value: String): this.type = set(outputCol, value)
-  
+
  /** @group setParam */
  def setWithMean(value: Boolean): this.type = set(withMean, value)
-  
+
  /** @group setParam */
  def setWithStd(value: Boolean): this.type = set(withStd, value)
-  
+
  override def fit(dataset: DataFrame): StandardScalerModel = {
    transformSchema(dataset.schema, logging = true)
    val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v }
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@ -321,7 +321,7 @@ private class LeastSquaresAggregator(
    }
    (weightsArray, -sum + labelMean / labelStd, weightsArray.length)
  }
-  
+
  private val effectiveWeightsVector = Vectors.dense(effectiveWeightsArray)

  private val gradientSumArray = Array.ofDim[Double](dim)
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@ -399,7 +399,7 @@ private[python] class PythonMLLibAPI extends Serializable {
      val sigma = si.map(_.asInstanceOf[DenseMatrix])
      val gaussians = Array.tabulate(weight.length){
        i => new MultivariateGaussian(mean(i), sigma(i))
-      }      
+      }
      val model = new GaussianMixtureModel(weight, gaussians)
      model.predictSoft(data).map(Vectors.dense)
  }
@ -494,7 +494,7 @@ private[python] class PythonMLLibAPI extends Serializable {
  def normalizeVector(p: Double, rdd: JavaRDD[Vector]): JavaRDD[Vector] = {
    new Normalizer(p).transform(rdd)
  }
-  
+
  /**
   * Java stub for StandardScaler.fit(). This stub returns a
   * handle to the Java object instead of the content of the Java object.
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@ -36,11 +36,11 @@ import org.apache.spark.util.Utils
 * independent Gaussian distributions with associated "mixing" weights
 * specifying each's contribution to the composite.
 *
- * Given a set of sample points, this class will maximize the log-likelihood 
- * for a mixture of k Gaussians, iterating until the log-likelihood changes by 
+ * Given a set of sample points, this class will maximize the log-likelihood
+ * for a mixture of k Gaussians, iterating until the log-likelihood changes by
 * less than convergenceTol, or until it has reached the max number of iterations.
 * While this process is generally guaranteed to converge, it is not guaranteed
- * to find a global optimum.  
+ * to find a global optimum.
 *
 * Note: For high-dimensional data (with many features), this algorithm may perform poorly.
 *       This is due to high-dimensional data (a) making it difficult to cluster at all (based
@ -53,24 +53,24 @@ import org.apache.spark.util.Utils
 */
@Experimental
 class GaussianMixture private (
-    private var k: Int, 
-    private var convergenceTol: Double, 
+    private var k: Int,
+    private var convergenceTol: Double,
    private var maxIterations: Int,
    private var seed: Long) extends Serializable {
-  
+
  /**
   * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
   * maxIterations: 100, seed: random}.
   */
  def this() = this(2, 0.01, 100, Utils.random.nextLong())
-  
+
  // number of samples per cluster to use when initializing Gaussians
  private val nSamples = 5
-  
-  // an initializing GMM can be provided rather than using the 
+
+  // an initializing GMM can be provided rather than using the
  // default random starting point
  private var initialModel: Option[GaussianMixtureModel] = None
-  
+
  /** Set the initial GMM starting point, bypassing the random initialization.
   *  You must call setK() prior to calling this method, and the condition
   *  (model.k == this.k) must be met; failure will result in an IllegalArgumentException
@ -83,37 +83,37 @@ class GaussianMixture private (
    }
    this
  }
-  
+
  /** Return the user supplied initial GMM, if supplied */
  def getInitialModel: Option[GaussianMixtureModel] = initialModel
-  
+
  /** Set the number of Gaussians in the mixture model.  Default: 2 */
  def setK(k: Int): this.type = {
    this.k = k
    this
  }
-  
+
  /** Return the number of Gaussians in the mixture model */
  def getK: Int = k
-  
+
  /** Set the maximum number of iterations to run. Default: 100 */
  def setMaxIterations(maxIterations: Int): this.type = {
    this.maxIterations = maxIterations
    this
  }
-  
+
  /** Return the maximum number of iterations to run */
  def getMaxIterations: Int = maxIterations
-  
+
  /**
-   * Set the largest change in log-likelihood at which convergence is 
+   * Set the largest change in log-likelihood at which convergence is
   * considered to have occurred.
   */
  def setConvergenceTol(convergenceTol: Double): this.type = {
    this.convergenceTol = convergenceTol
    this
  }
-  
+
  /**
   * Return the largest change in log-likelihood at which convergence is
   * considered to have occurred.
@ -132,41 +132,41 @@ class GaussianMixture private (
  /** Perform expectation maximization */
  def run(data: RDD[Vector]): GaussianMixtureModel = {
    val sc = data.sparkContext
-    
+
    // we will operate on the data as breeze data
    val breezeData = data.map(_.toBreeze).cache()
-    
+
    // Get length of the input vectors
    val d = breezeData.first().length
-    
+
    // Determine initial weights and corresponding Gaussians.
    // If the user supplied an initial GMM, we use those values, otherwise
    // we start with uniform weights, a random mean from the data, and
    // diagonal covariance matrices using component variances
-    // derived from the samples    
+    // derived from the samples
    val (weights, gaussians) = initialModel match {
      case Some(gmm) => (gmm.weights, gmm.gaussians)
-      
+
      case None => {
        val samples = breezeData.takeSample(withReplacement = true, k * nSamples, seed)
-        (Array.fill(k)(1.0 / k), Array.tabulate(k) { i => 
+        (Array.fill(k)(1.0 / k), Array.tabulate(k) { i =>
          val slice = samples.view(i * nSamples, (i + 1) * nSamples)
-          new MultivariateGaussian(vectorMean(slice), initCovariance(slice)) 
+          new MultivariateGaussian(vectorMean(slice), initCovariance(slice))
        })
      }
    }
-    
-    var llh = Double.MinValue // current log-likelihood 
+
+    var llh = Double.MinValue // current log-likelihood
    var llhp = 0.0            // previous log-likelihood
-    
+
    var iter = 0
    while (iter < maxIterations && math.abs(llh-llhp) > convergenceTol) {
      // create and broadcast curried cluster contribution function
      val compute = sc.broadcast(ExpectationSum.add(weights, gaussians)_)
-      
+
      // aggregate the cluster contribution for all sample points
      val sums = breezeData.aggregate(ExpectationSum.zero(k, d))(compute.value, _ += _)
-      
+
      // Create new distributions based on the partial assignments
      // (often referred to as the "M" step in literature)
      val sumWeights = sums.weights.sum
@ -179,22 +179,22 @@ class GaussianMixture private (
        gaussians(i) = new MultivariateGaussian(mu, sums.sigmas(i) / sums.weights(i))
        i = i + 1
      }
-   
+
      llhp = llh // current becomes previous
      llh = sums.logLikelihood // this is the freshly computed log-likelihood
      iter += 1
-    } 
-    
+    }
+
    new GaussianMixtureModel(weights, gaussians)
  }
-    
+
  /** Average of dense breeze vectors */
  private def vectorMean(x: IndexedSeq[BV[Double]]): BDV[Double] = {
    val v = BDV.zeros[Double](x(0).length)
    x.foreach(xi => v += xi)
-    v / x.length.toDouble 
+    v / x.length.toDouble
  }
-  
+
  /**
   * Construct matrix where diagonal entries are element-wise
   * variance of input vectors (computes biased variance)
@ -210,14 +210,14 @@ class GaussianMixture private (
 // companion class to provide zero constructor for ExpectationSum
 private object ExpectationSum {
  def zero(k: Int, d: Int): ExpectationSum = {
-    new ExpectationSum(0.0, Array.fill(k)(0.0), 
+    new ExpectationSum(0.0, Array.fill(k)(0.0),
      Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d, d)))
  }
-  
+
  // compute cluster contributions for each input point
  // (U, T) => U for aggregation
  def add(
-      weights: Array[Double], 
+      weights: Array[Double],
      dists: Array[MultivariateGaussian])
      (sums: ExpectationSum, x: BV[Double]): ExpectationSum = {
    val p = weights.zip(dists).map {
@ -235,7 +235,7 @@ private object ExpectationSum {
      i = i + 1
    }
    sums
-  }  
+  }
 }

 // Aggregation class for partial expectation results
@ -244,9 +244,9 @@ private class ExpectationSum(
    val weights: Array[Double],
    val means: Array[BDV[Double]],
    val sigmas: Array[BreezeMatrix[Double]]) extends Serializable {
-  
+
  val k = weights.length
-  
+
  def +=(x: ExpectationSum): ExpectationSum = {
    var i = 0
    while (i < k) {
@ -257,5 +257,5 @@ private class ExpectationSum(
    }
    logLikelihood += x.logLikelihood
    this
-  }  
+  }
 }
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@ -34,10 +34,10 @@ import org.apache.spark.sql.{SQLContext, Row}
 /**
 * :: Experimental ::
 *
- * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points 
- * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are 
- * the respective mean and covariance for each Gaussian distribution i=1..k. 
- * 
+ * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points
+ * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are
+ * the respective mean and covariance for each Gaussian distribution i=1..k.
+ *
 * @param weights Weights for each Gaussian distribution in the mixture, where weights(i) is
 *                the weight for Gaussian i, and weights.sum == 1
 * @param gaussians Array of MultivariateGaussian where gaussians(i) represents
@ -45,9 +45,9 @@ import org.apache.spark.sql.{SQLContext, Row}
 */
@Experimental
 class GaussianMixtureModel(
-  val weights: Array[Double], 
+  val weights: Array[Double],
  val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable{
-  
+
  require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match")

  override protected def formatVersion = "1.0"
@ -64,20 +64,20 @@ class GaussianMixtureModel(
    val responsibilityMatrix = predictSoft(points)
    responsibilityMatrix.map(r => r.indexOf(r.max))
  }
-  
+
  /**
   * Given the input vectors, return the membership value of each vector
-   * to all mixture components. 
+   * to all mixture components.
   */
  def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = {
    val sc = points.sparkContext
    val bcDists = sc.broadcast(gaussians)
    val bcWeights = sc.broadcast(weights)
-    points.map { x => 
+    points.map { x =>
      computeSoftAssignments(x.toBreeze.toDenseVector, bcDists.value, bcWeights.value, k)
    }
  }
-  
+
  /**
   * Compute the partial assignments for each vector
   */
@ -89,7 +89,7 @@ class GaussianMixtureModel(
    val p = weights.zip(dists).map {
      case (weight, dist) => MLUtils.EPSILON + weight * dist.pdf(pt)
    }
-    val pSum = p.sum 
+    val pSum = p.sum
    for (i <- 0 until k) {
      p(i) /= pSum
    }
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@ -121,7 +121,7 @@ class PowerIterationClustering private[clustering] (
  import org.apache.spark.mllib.clustering.PowerIterationClustering._

  /** Constructs a PIC instance with default parameters: {k: 2, maxIterations: 100,
-   *  initMode: "random"}. 
+   *  initMode: "random"}.
   */
  def this() = this(k = 2, maxIterations = 100, initMode = "random")

@ -243,7 +243,7 @@ object PowerIterationClustering extends Logging {

  /**
   * Generates random vertex properties (v0) to start power iteration.
-   * 
+   *
   * @param g a graph representing the normalized affinity matrix (W)
   * @return a graph with edges representing W and vertices representing a random vector
   *         with unit 1-norm
@ -266,7 +266,7 @@ object PowerIterationClustering extends Logging {
   * Generates the degree vector as the vertex properties (v0) to start power iteration.
   * It is not exactly the node degrees but just the normalized sum similarities. Call it
   * as degree vector because it is used in the PIC paper.
-   * 
+   *
   * @param g a graph representing the normalized affinity matrix (W)
   * @return a graph with edges representing W and vertices representing the degree vector
   */
@ -276,7 +276,7 @@ object PowerIterationClustering extends Logging {
    val v0 = g.vertices.mapValues(_ / sum)
    GraphImpl.fromExistingRDDs(VertexRDD(v0), g.edges)
  }
- 
+
  /**
   * Runs power iteration.
   * @param g input graph with edges representing the normalized affinity matrix (W) and vertices
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@ -42,7 +42,7 @@ import org.apache.spark.util.random.XORShiftRandom
 import org.apache.spark.sql.{SQLContext, Row}

 /**
- *  Entry in vocabulary 
+ *  Entry in vocabulary
 */
 private case class VocabWord(
  var word: String,
@ -56,18 +56,18 @@ private case class VocabWord(
 * :: Experimental ::
 * Word2Vec creates vector representation of words in a text corpus.
 * The algorithm first constructs a vocabulary from the corpus
- * and then learns vector representation of words in the vocabulary. 
- * The vector representation can be used as features in 
+ * and then learns vector representation of words in the vocabulary.
+ * The vector representation can be used as features in
 * natural language processing and machine learning algorithms.
- * 
- * We used skip-gram model in our implementation and hierarchical softmax 
+ *
+ * We used skip-gram model in our implementation and hierarchical softmax
 * method to train the model. The variable names in the implementation
 * matches the original C implementation.
 *
- * For original C implementation, see https://code.google.com/p/word2vec/ 
- * For research papers, see 
+ * For original C implementation, see https://code.google.com/p/word2vec/
+ * For research papers, see
 * Efficient Estimation of Word Representations in Vector Space
- * and 
+ * and
 * Distributed Representations of Words and Phrases and their Compositionality.
 */
@Experimental
@ -79,7 +79,7 @@ class Word2Vec extends Serializable with Logging {
  private var numIterations = 1
  private var seed = Utils.random.nextLong()
  private var minCount = 5
-  
+
  /**
   * Sets vector size (default: 100).
   */
@ -122,15 +122,15 @@ class Word2Vec extends Serializable with Logging {
    this
  }

-  /** 
-   * Sets minCount, the minimum number of times a token must appear to be included in the word2vec 
+  /**
+   * Sets minCount, the minimum number of times a token must appear to be included in the word2vec
   * model's vocabulary (default: 5).
   */
  def setMinCount(minCount: Int): this.type = {
    this.minCount = minCount
    this
  }
-  
+
  private val EXP_TABLE_SIZE = 1000
  private val MAX_EXP = 6
  private val MAX_CODE_LENGTH = 40
@ -150,13 +150,13 @@ class Word2Vec extends Serializable with Logging {
      .map(x => VocabWord(
        x._1,
        x._2,
-        new Array[Int](MAX_CODE_LENGTH), 
-        new Array[Int](MAX_CODE_LENGTH), 
+        new Array[Int](MAX_CODE_LENGTH),
+        new Array[Int](MAX_CODE_LENGTH),
        0))
      .filter(_.cn >= minCount)
      .collect()
      .sortWith((a, b) => a.cn > b.cn)
-    
+
    vocabSize = vocab.length
    require(vocabSize > 0, "The vocabulary size should be > 0. You may need to check " +
      "the setting of minCount, which could be large enough to remove all your words in sentences.")
@ -198,8 +198,8 @@ class Word2Vec extends Serializable with Logging {
    }
    var pos1 = vocabSize - 1
    var pos2 = vocabSize
-    
-    var min1i = 0 
+
+    var min1i = 0
    var min2i = 0

    a = 0
@ -268,15 +268,15 @@ class Word2Vec extends Serializable with Logging {
    val words = dataset.flatMap(x => x)

    learnVocab(words)
-    
+
    createBinaryTree()
-    
+
    val sc = dataset.context

    val expTable = sc.broadcast(createExpTable())
    val bcVocab = sc.broadcast(vocab)
    val bcVocabHash = sc.broadcast(vocabHash)
-    
+
    val sentences: RDD[Array[Int]] = words.mapPartitions { iter =>
      new Iterator[Array[Int]] {
        def hasNext: Boolean = iter.hasNext
@ -297,7 +297,7 @@ class Word2Vec extends Serializable with Logging {
        }
      }
    }
-    
+
    val newSentences = sentences.repartition(numPartitions).cache()
    val initRandom = new XORShiftRandom(seed)

@ -402,7 +402,7 @@ class Word2Vec extends Serializable with Logging {
      }
    }
    newSentences.unpersist()
-    
+
    val word2VecMap = mutable.HashMap.empty[String, Array[Float]]
    var i = 0
    while (i < vocabSize) {
@ -480,7 +480,7 @@ class Word2VecModel private[mllib] (

  /**
   * Transforms a word to its vector representation
-   * @param word a word 
+   * @param word a word
   * @return vector representation of word
   */
  def transform(word: String): Vector = {
@ -495,7 +495,7 @@ class Word2VecModel private[mllib] (
  /**
   * Find synonyms of a word
   * @param word a word
-   * @param num number of synonyms to find  
+   * @param num number of synonyms to find
   * @return array of (word, cosineSimilarity)
   */
  def findSynonyms(word: String, num: Int): Array[(String, Double)] = {
@ -506,7 +506,7 @@ class Word2VecModel private[mllib] (
  /**
   * Find synonyms of the vector representation of a word
   * @param vector vector representation of a word
-   * @param num number of synonyms to find  
+   * @param num number of synonyms to find
   * @return array of (word, cosineSimilarity)
   */
  def findSynonyms(vector: Vector, num: Int): Array[(String, Double)] = {
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@ -228,7 +228,7 @@ private[spark] object BLAS extends Serializable with Logging {
    }
    _nativeBLAS
  }
- 
+
  /**
   * A := alpha * x * x^T^ + A
   * @param alpha a real scalar that will be multiplied to x * x^T^.
@ -264,7 +264,7 @@ private[spark] object BLAS extends Serializable with Logging {
        j += 1
      }
      i += 1
-    }    
+    }
  }

  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
@ -505,7 +505,7 @@ private[spark] object BLAS extends Serializable with Logging {
    nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta,
      y.values, 1)
  }
- 
+
  /**
   * y := alpha * A * x + beta * y
   * For `DenseMatrix` A and `SparseVector` x.
@ -557,7 +557,7 @@ private[spark] object BLAS extends Serializable with Logging {
      }
    }
  }
- 
+
  /**
   * y := alpha * A * x + beta * y
   * For `SparseMatrix` A and `SparseVector` x.
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
@ -81,7 +81,7 @@ private[mllib] object EigenValueDecomposition {

    require(n * ncv.toLong <= Integer.MAX_VALUE && ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE,
      s"k = $k and/or n = $n are too large to compute an eigendecomposition")
-    
+
    var ido = new intW(0)
    var info = new intW(0)
    var resid = new Array[Double](n)
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
@ -27,10 +27,10 @@ import org.apache.spark.mllib.regression.GeneralizedLinearModel
 * PMML Model Export for GeneralizedLinearModel class with binary ClassificationModel
 */
 private[mllib] class BinaryClassificationPMMLModelExport(
-    model : GeneralizedLinearModel, 
+    model : GeneralizedLinearModel,
    description : String,
    normalizationMethod : RegressionNormalizationMethodType,
-    threshold: Double) 
+    threshold: Double)
  extends PMMLModelExport {

  populateBinaryClassificationPMML()
@ -72,7 +72,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
           .withUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }
-       
+
       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
@ -80,9 +80,9 @@ private[mllib] class BinaryClassificationPMMLModelExport(
       miningSchema
         .withMiningFields(new MiningField(targetField)
         .withUsageType(FieldUsageType.TARGET))
-       
+
       dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
-       
+
       pmml.setDataDictionary(dataDictionary)
       pmml.withModels(regressionModel)
     }
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
@ -25,7 +25,7 @@ import scala.beans.BeanProperty
 import org.dmg.pmml.{Application, Header, PMML, Timestamp}

 private[mllib] trait PMMLModelExport {
-  
+
  /**
   * Holder of the exported model in PMML format
   */
@ -33,7 +33,7 @@ private[mllib] trait PMMLModelExport {
  val pmml: PMML = new PMML

  setHeader(pmml)
-  
+
  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
@ -27,9 +27,9 @@ import org.apache.spark.mllib.regression.LinearRegressionModel
 import org.apache.spark.mllib.regression.RidgeRegressionModel

 private[mllib] object PMMLModelExportFactory {
-  
+
  /**
-   * Factory object to help creating the necessary PMMLModelExport implementation 
+   * Factory object to help creating the necessary PMMLModelExport implementation
   * taking as input the machine learning model (for example KMeansModel).
   */
  def createPMMLModelExport(model: Any): PMMLModelExport = {
@ -44,7 +44,7 @@ private[mllib] object PMMLModelExportFactory {
        new GeneralizedLinearPMMLModelExport(lasso, "lasso regression")
      case svm: SVMModel =>
        new BinaryClassificationPMMLModelExport(
-          svm, "linear SVM", RegressionNormalizationMethodType.NONE, 
+          svm, "linear SVM", RegressionNormalizationMethodType.NONE,
          svm.getThreshold.getOrElse(0.0))
      case logistic: LogisticRegressionModel =>
        if (logistic.numClasses == 2) {
@ -60,5 +60,5 @@ private[mllib] object PMMLModelExportFactory {
          "PMML Export not supported for model: " + model.getClass.getName)
    }
  }
-  
+
 }
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@ -234,7 +234,7 @@ object RandomRDDs {
   *
   * @param sc SparkContext used to create the RDD.
   * @param shape shape parameter (> 0) for the gamma distribution
-   * @param scale scale parameter (> 0) for the gamma distribution  
+   * @param scale scale parameter (> 0) for the gamma distribution
   * @param size Size of the RDD.
   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
   * @param seed Random seed (default: a random long integer).
@ -293,7 +293,7 @@ object RandomRDDs {
   *
   * @param sc SparkContext used to create the RDD.
   * @param mean mean for the log normal distribution
-   * @param std standard deviation for the log normal distribution  
+   * @param std standard deviation for the log normal distribution
   * @param size Size of the RDD.
   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
   * @param seed Random seed (default: a random long integer).
@ -671,7 +671,7 @@ object RandomRDDs {
   *
   * @param sc SparkContext used to create the RDD.
   * @param shape shape parameter (> 0) for the gamma distribution.
-   * @param scale scale parameter (> 0) for the gamma distribution. 
+   * @param scale scale parameter (> 0) for the gamma distribution.
   * @param numRows Number of Vectors in the RDD.
   * @param numCols Number of elements in each Vector.
   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@ -175,7 +175,7 @@ class ALS private (
  /**
   * :: DeveloperApi ::
   * Sets storage level for final RDDs (user/product used in MatrixFactorizationModel). The default
-   * value is `MEMORY_AND_DISK`. Users can change it to a serialized storage, e.g. 
+   * value is `MEMORY_AND_DISK`. Users can change it to a serialized storage, e.g.
   * `MEMORY_AND_DISK_SER` and set `spark.rdd.compress` to `true` to reduce the space requirement,
   * at the cost of speed.
   */
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@ -170,15 +170,15 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
    case class Data(boundary: Double, prediction: Double)

    def save(
-        sc: SparkContext, 
-        path: String, 
-        boundaries: Array[Double], 
-        predictions: Array[Double], 
+        sc: SparkContext,
+        path: String,
+        boundaries: Array[Double],
+        predictions: Array[Double],
        isotonic: Boolean): Unit = {
      val sqlContext = new SQLContext(sc)

      val metadata = compact(render(
-        ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~ 
+        ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~
          ("isotonic" -> isotonic)))
      sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path))

--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@ -29,102 +29,102 @@ import org.apache.spark.mllib.util.MLUtils
 * the event that the covariance matrix is singular, the density will be computed in a
 * reduced dimensional subspace under which the distribution is supported.
 * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]])
- * 
+ *
 * @param mu The mean vector of the distribution
 * @param sigma The covariance matrix of the distribution
 */
@DeveloperApi
 class MultivariateGaussian (
-    val mu: Vector, 
+    val mu: Vector,
    val sigma: Matrix) extends Serializable {

  require(sigma.numCols == sigma.numRows, "Covariance matrix must be square")
  require(mu.size == sigma.numCols, "Mean vector length must match covariance matrix size")
-  
+
  private val breezeMu = mu.toBreeze.toDenseVector
-  
+
  /**
   * private[mllib] constructor
-   * 
+   *
   * @param mu The mean vector of the distribution
   * @param sigma The covariance matrix of the distribution
   */
  private[mllib] def this(mu: DBV[Double], sigma: DBM[Double]) = {
    this(Vectors.fromBreeze(mu), Matrices.fromBreeze(sigma))
  }
-  
+
  /**
   * Compute distribution dependent constants:
   *    rootSigmaInv = D^(-1/2)^ * U, where sigma = U * D * U.t
-   *    u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^) 
+   *    u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^)
   */
  private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants
-  
+
  /** Returns density of this multivariate Gaussian at given point, x */
  def pdf(x: Vector): Double = {
    pdf(x.toBreeze)
  }
-  
+
  /** Returns the log-density of this multivariate Gaussian at given point, x */
  def logpdf(x: Vector): Double = {
    logpdf(x.toBreeze)
  }
-  
+
  /** Returns density of this multivariate Gaussian at given point, x */
  private[mllib] def pdf(x: BV[Double]): Double = {
    math.exp(logpdf(x))
  }
-  
+
  /** Returns the log-density of this multivariate Gaussian at given point, x */
  private[mllib] def logpdf(x: BV[Double]): Double = {
    val delta = x - breezeMu
    val v = rootSigmaInv * delta
    u + v.t * v * -0.5
  }
-  
+
  /**
   * Calculate distribution dependent components used for the density function:
   *    pdf(x) = (2*pi)^(-k/2)^ * det(sigma)^(-1/2)^ * exp((-1/2) * (x-mu).t * inv(sigma) * (x-mu))
   * where k is length of the mean vector.
-   * 
-   * We here compute distribution-fixed parts 
+   *
+   * We here compute distribution-fixed parts
   *  log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^)
   * and
   *  D^(-1/2)^ * U, where sigma = U * D * U.t
-   *  
+   *
   * Both the determinant and the inverse can be computed from the singular value decomposition
   * of sigma.  Noting that covariance matrices are always symmetric and positive semi-definite,
   * we can use the eigendecomposition. We also do not compute the inverse directly; noting
-   * that 
-   * 
+   * that
+   *
   *    sigma = U * D * U.t
-   *    inv(Sigma) = U * inv(D) * U.t 
+   *    inv(Sigma) = U * inv(D) * U.t
   *               = (D^{-1/2}^ * U).t * (D^{-1/2}^ * U)
-   * 
+   *
   * and thus
-   * 
+   *
   *    -0.5 * (x-mu).t * inv(Sigma) * (x-mu) = -0.5 * norm(D^{-1/2}^ * U  * (x-mu))^2^
-   *  
-   * To guard against singular covariance matrices, this method computes both the 
+   *
+   * To guard against singular covariance matrices, this method computes both the
   * pseudo-determinant and the pseudo-inverse (Moore-Penrose).  Singular values are considered
   * to be non-zero only if they exceed a tolerance based on machine precision, matrix size, and
   * relation to the maximum singular value (same tolerance used by, e.g., Octave).
   */
  private def calculateCovarianceConstants: (DBM[Double], Double) = {
    val eigSym.EigSym(d, u) = eigSym(sigma.toBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t
-    
+
    // For numerical stability, values are considered to be non-zero only if they exceed tol.
    // This prevents any inverted value from exceeding (eps * n * max(d))^-1
    val tol = MLUtils.EPSILON * max(d) * d.length
-    
+
    try {
      // log(pseudo-determinant) is sum of the logs of all non-zero singular values
      val logPseudoDetSigma = d.activeValuesIterator.filter(_ > tol).map(math.log).sum
-      
-      // calculate the root-pseudo-inverse of the diagonal matrix of singular values 
+
+      // calculate the root-pseudo-inverse of the diagonal matrix of singular values
      // by inverting the square root of all non-zero values
      val pinvS = diag(new DBV(d.map(v => if (v > tol) math.sqrt(1.0 / v) else 0.0).toArray))
-    
+
      (pinvS * u, -0.5 * (mu.size * math.log(2.0 * math.Pi) + logPseudoDetSigma))
    } catch {
      case uex: UnsupportedOperationException =>
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@ -270,7 +270,7 @@ object GradientBoostedTrees extends Logging {
    logInfo(s"$timer")

    if (persistedInput) input.unpersist()
-    
+
    if (validate) {
      new GradientBoostedTreesModel(
        boostingStrategy.treeStrategy.algo,
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@ -474,7 +474,7 @@ object RandomForest extends Serializable with Logging {
      val (treeIndex, node) = nodeQueue.head
      // Choose subset of features for node (if subsampling).
      val featureSubset: Option[Array[Int]] = if (metadata.subsamplingFeatures) {
-        Some(SamplingUtils.reservoirSampleAndCount(Range(0, 
+        Some(SamplingUtils.reservoirSampleAndCount(Range(0,
          metadata.numFeatures).iterator, metadata.numFeaturesPerNode, rng.nextLong)._1)
      } else {
        None
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@ -265,7 +265,7 @@ object MLUtils {
    }
    Vectors.fromBreeze(vector1)
  }
- 
+
  /**
   * Returns the squared Euclidean distance between two vectors. The following formula will be used
   * if it does not introduce too much numerical error:
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@ -38,7 +38,7 @@ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext
    val dataset = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 0.1), 2))
-    
+
    /**
     * Using the following R code to load the data, train the model and evaluate metrics.
     *
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
@ -47,7 +47,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("Binarize continuous features with setter") {
    val threshold: Double = 0.2
-    val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0) 
+    val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0)
    val dataFrame: DataFrame = sqlContext.createDataFrame(
        data.zip(thresholdBinarized)).toDF("feature", "expected")

--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
@ -46,7 +46,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext {
    }

  }
-  
+
  test("two clusters") {
    val data = sc.parallelize(GaussianTestData.data)

@ -62,7 +62,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext {
    val Ew = Array(1.0 / 3.0, 2.0 / 3.0)
    val Emu = Array(Vectors.dense(-4.3673), Vectors.dense(5.1604))
    val Esigma = Array(Matrices.dense(1, 1, Array(1.1098)), Matrices.dense(1, 1, Array(0.86644)))
-    
+
    val gmm = new GaussianMixture()
      .setK(2)
      .setInitialModel(initialGmm)
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@ -56,7 +56,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
      predictions(a.cluster) += a.id
    }
    assert(predictions.toSet == Set((0 to 3).toSet, (4 to 15).toSet))
- 
+
    val model2 = new PowerIterationClustering()
      .setK(2)
      .setInitializationMode("degree")
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
@ -139,7 +139,7 @@ class BLASSuite extends SparkFunSuite {
    syr(alpha, x, dA)

    assert(dA ~== expected absTol 1e-15)
- 
+
    val dB =
      new DenseMatrix(3, 4, Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0))

@ -148,7 +148,7 @@ class BLASSuite extends SparkFunSuite {
        syr(alpha, x, dB)
      }
    }
- 
+
    val dC =
      new DenseMatrix(3, 3, Array(0.0, 1.2, 2.2, 1.2, 3.2, 5.3, 2.2, 5.3, 1.8))

@ -157,7 +157,7 @@ class BLASSuite extends SparkFunSuite {
        syr(alpha, x, dC)
      }
    }
- 
+
    val y = new DenseVector(Array(0.0, 2.7, 3.5, 2.1, 1.5))

    withClue("Size of vector must match the rank of matrix") {
@ -255,13 +255,13 @@ class BLASSuite extends SparkFunSuite {
    val dA =
      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
- 
+
    val dA2 =
      new DenseMatrix(4, 3, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0), true)
    val sA2 =
      new SparseMatrix(4, 3, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0),
        true)
- 
+
    val dx = new DenseVector(Array(1.0, 2.0, 3.0))
    val sx = dx.toSparse
    val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
@ -270,7 +270,7 @@ class BLASSuite extends SparkFunSuite {
    assert(sA.multiply(dx) ~== expected absTol 1e-15)
    assert(dA.multiply(sx) ~== expected absTol 1e-15)
    assert(sA.multiply(sx) ~== expected absTol 1e-15)
- 
+
    val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
    val y2 = y1.copy
    val y3 = y1.copy
@ -287,7 +287,7 @@ class BLASSuite extends SparkFunSuite {
    val y14 = y1.copy
    val y15 = y1.copy
    val y16 = y1.copy
- 
+
    val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
    val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))

@ -295,42 +295,42 @@ class BLASSuite extends SparkFunSuite {
    gemv(1.0, sA, dx, 2.0, y2)
    gemv(1.0, dA, sx, 2.0, y3)
    gemv(1.0, sA, sx, 2.0, y4)
- 
+
    gemv(1.0, dA2, dx, 2.0, y5)
    gemv(1.0, sA2, dx, 2.0, y6)
    gemv(1.0, dA2, sx, 2.0, y7)
    gemv(1.0, sA2, sx, 2.0, y8)
- 
+
    gemv(2.0, dA, dx, 2.0, y9)
    gemv(2.0, sA, dx, 2.0, y10)
    gemv(2.0, dA, sx, 2.0, y11)
    gemv(2.0, sA, sx, 2.0, y12)
- 
+
    gemv(2.0, dA2, dx, 2.0, y13)
    gemv(2.0, sA2, dx, 2.0, y14)
    gemv(2.0, dA2, sx, 2.0, y15)
    gemv(2.0, sA2, sx, 2.0, y16)
- 
+
    assert(y1 ~== expected2 absTol 1e-15)
    assert(y2 ~== expected2 absTol 1e-15)
    assert(y3 ~== expected2 absTol 1e-15)
    assert(y4 ~== expected2 absTol 1e-15)
- 
+
    assert(y5 ~== expected2 absTol 1e-15)
    assert(y6 ~== expected2 absTol 1e-15)
    assert(y7 ~== expected2 absTol 1e-15)
    assert(y8 ~== expected2 absTol 1e-15)
- 
+
    assert(y9 ~== expected3 absTol 1e-15)
    assert(y10 ~== expected3 absTol 1e-15)
    assert(y11 ~== expected3 absTol 1e-15)
    assert(y12 ~== expected3 absTol 1e-15)
- 
+
    assert(y13 ~== expected3 absTol 1e-15)
    assert(y14 ~== expected3 absTol 1e-15)
    assert(y15 ~== expected3 absTol 1e-15)
    assert(y16 ~== expected3 absTol 1e-15)
- 
+
    withClue("columns of A don't match the rows of B") {
      intercept[Exception] {
        gemv(1.0, dA.transpose, dx, 2.0, y1)
@ -345,12 +345,12 @@ class BLASSuite extends SparkFunSuite {
        gemv(1.0, sA.transpose, sx, 2.0, y1)
      }
    }
- 
+
    val dAT =
      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
    val sAT =
      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
- 
+
    val dATT = dAT.transpose
    val sATT = sAT.transpose

--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@ -214,13 +214,13 @@ class VectorsSuite extends SparkFunSuite {

      val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze)

-      // SparseVector vs. SparseVector 
-      assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) 
+      // SparseVector vs. SparseVector
+      assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
      // DenseVector  vs. SparseVector
      assert(Vectors.sqdist(denseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
      // DenseVector  vs. DenseVector
      assert(Vectors.sqdist(denseVector1, denseVector2) ~== squaredDist relTol 1E-8)
-    }    
+    }
  }

  test("foreachActive") {
--- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
@ -53,13 +53,13 @@ class BinaryClassificationPMMLModelExportSuite extends SparkFunSuite {
    // ensure logistic regression has normalization method set to LOGIT
    assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.LOGIT)
  }
-  
+
  test("linear SVM PMML export") {
    val linearInput = LinearDataGenerator.generateLinearInput(3.0, Array(10.0, 10.0), 1, 17)
    val svmModel = new SVMModel(linearInput(0).features, linearInput(0).label)
-    
+
    val svmModelExport = PMMLModelExportFactory.createPMMLModelExport(svmModel)
-    
+
    // assert that the PMML format is as expected
    assert(svmModelExport.isInstanceOf[PMMLModelExport])
    val pmml = svmModelExport.getPmml
@ -80,5 +80,5 @@ class BinaryClassificationPMMLModelExportSuite extends SparkFunSuite {
    // ensure linear SVM has normalization method set to NONE
    assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.NONE)
  }
-  
+
 }
--- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
@ -45,5 +45,5 @@ class KMeansPMMLModelExportSuite extends SparkFunSuite {
    val pmmlClusteringModel = pmml.getModels.get(0).asInstanceOf[ClusteringModel]
    assert(pmmlClusteringModel.getNumberOfClusters === clusterCenters.length)
  }
-  
+
 }
--- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala
@ -60,25 +60,25 @@ class PMMLModelExportFactorySuite extends SparkFunSuite {
  test("PMMLModelExportFactory create BinaryClassificationPMMLModelExport "
    + "when passing a LogisticRegressionModel or SVMModel") {
    val linearInput = LinearDataGenerator.generateLinearInput(3.0, Array(10.0, 10.0), 1, 17)
-    
+
    val logisticRegressionModel =
      new LogisticRegressionModel(linearInput(0).features, linearInput(0).label)
    val logisticRegressionModelExport =
      PMMLModelExportFactory.createPMMLModelExport(logisticRegressionModel)
    assert(logisticRegressionModelExport.isInstanceOf[BinaryClassificationPMMLModelExport])
-    
+
    val svmModel = new SVMModel(linearInput(0).features, linearInput(0).label)
    val svmModelExport = PMMLModelExportFactory.createPMMLModelExport(svmModel)
    assert(svmModelExport.isInstanceOf[BinaryClassificationPMMLModelExport])
  }
-  
+
  test("PMMLModelExportFactory throw IllegalArgumentException "
    + "when passing a Multinomial Logistic Regression") {
    /** 3 classes, 2 features */
    val multiclassLogisticRegressionModel = new LogisticRegressionModel(
-      weights = Vectors.dense(0.1, 0.2, 0.3, 0.4), intercept = 1.0, 
+      weights = Vectors.dense(0.1, 0.2, 0.3, 0.4), intercept = 1.0,
      numFeatures = 2, numClasses = 3)
-    
+
    intercept[IllegalArgumentException] {
      PMMLModelExportFactory.createPMMLModelExport(multiclassLogisticRegressionModel)
    }
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
@ -26,39 +26,39 @@ class MultivariateGaussianSuite extends SparkFunSuite with MLlibTestSparkContext
  test("univariate") {
    val x1 = Vectors.dense(0.0)
    val x2 = Vectors.dense(1.5)
-                     
+
    val mu = Vectors.dense(0.0)
    val sigma1 = Matrices.dense(1, 1, Array(1.0))
    val dist1 = new MultivariateGaussian(mu, sigma1)
    assert(dist1.pdf(x1) ~== 0.39894 absTol 1E-5)
    assert(dist1.pdf(x2) ~== 0.12952 absTol 1E-5)
-    
+
    val sigma2 = Matrices.dense(1, 1, Array(4.0))
    val dist2 = new MultivariateGaussian(mu, sigma2)
    assert(dist2.pdf(x1) ~== 0.19947 absTol 1E-5)
    assert(dist2.pdf(x2) ~== 0.15057 absTol 1E-5)
  }
-  
+
  test("multivariate") {
    val x1 = Vectors.dense(0.0, 0.0)
    val x2 = Vectors.dense(1.0, 1.0)
-    
+
    val mu = Vectors.dense(0.0, 0.0)
    val sigma1 = Matrices.dense(2, 2, Array(1.0, 0.0, 0.0, 1.0))
    val dist1 = new MultivariateGaussian(mu, sigma1)
    assert(dist1.pdf(x1) ~== 0.15915 absTol 1E-5)
    assert(dist1.pdf(x2) ~== 0.05855 absTol 1E-5)
-    
+
    val sigma2 = Matrices.dense(2, 2, Array(4.0, -1.0, -1.0, 2.0))
    val dist2 = new MultivariateGaussian(mu, sigma2)
    assert(dist2.pdf(x1) ~== 0.060155 absTol 1E-5)
    assert(dist2.pdf(x2) ~== 0.033971 absTol 1E-5)
  }
-  
+
  test("multivariate degenerate") {
    val x1 = Vectors.dense(0.0, 0.0)
    val x2 = Vectors.dense(1.0, 1.0)
-    
+
    val mu = Vectors.dense(0.0, 0.0)
    val sigma = Matrices.dense(2, 2, Array(1.0, 1.0, 1.0, 1.0))
    val dist = new MultivariateGaussian(mu, sigma)
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@ -62,7 +62,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
      val fastSquaredDist3 =
        fastSquaredDistance(v2, norm2, v3, norm3, precision)
      assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, s"failed with m = $m")
-      if (m > 10) { 
+      if (m > 10) {
        val v4 = Vectors.sparse(n, indices.slice(0, m - 10),
          indices.map(i => a(i) + 0.5).slice(0, m - 10))
        val norm4 = Vectors.norm(v4, 2.0)