diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/impl/Utils.scala b/mllib-local/src/main/scala/org/apache/spark/ml/impl/Utils.scala index 8ff5b6a51a..abe1d4b8e4 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/impl/Utils.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/impl/Utils.scala @@ -99,30 +99,42 @@ private[spark] object Utils { /** * Perform in-place softmax conversion. */ - def softmax(values: Array[Double]): Unit = { + def softmax(array: Array[Double]): Unit = + softmax(array, array.length, 0, 1, array) + + /** + * Perform softmax conversion. + */ + def softmax( + input: Array[Double], + n: Int, + offset: Int, + step: Int, + output: Array[Double]): Unit = { var maxValue = Double.MinValue - var i = 0 - while (i < values.length) { - val value = values(i) - if (value.isPosInfinity) { - java.util.Arrays.fill(values, 0) - values(i) = 1.0 + var i = offset + val end = offset + step * n + while (i < end) { + val v = input(i) + if (v.isPosInfinity) { + BLAS.javaBLAS.dscal(n, 0.0, output, offset, step) + output(i) = 1.0 return - } else if (value > maxValue) { - maxValue = value + } else if (v > maxValue) { + maxValue = v } - i += 1 + i += step } var sum = 0.0 - i = 0 - while (i < values.length) { - val exp = math.exp(values(i) - maxValue) - values(i) = exp + i = offset + while (i < end) { + val exp = math.exp(input(i) - maxValue) + output(i) = exp sum += exp - i += 1 + i += step } - BLAS.javaBLAS.dscal(values.length, 1.0 / sum, values, 1) + BLAS.javaBLAS.dscal(n, 1.0 / sum, output, offset, step) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala index 3aea568cd6..37e7b53cfd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala @@ -22,6 +22,8 @@ import java.util.Random import breeze.linalg.{sum => Bsum, DenseMatrix => BDM, DenseVector => BDV} import breeze.numerics.{log => brzlog} +import org.apache.spark.ml.impl.Utils + /** * Trait for loss function */ @@ -79,30 +81,10 @@ private[ann] class SoftmaxLayerModelWithCrossEntropyLoss extends LayerModel with val weights = new BDV[Double](0) override def eval(data: BDM[Double], output: BDM[Double]): Unit = { + require(!data.isTranspose && !output.isTranspose) var j = 0 - // find max value to make sure later that exponent is computable while (j < data.cols) { - var i = 0 - var max = Double.MinValue - while (i < data.rows) { - if (data(i, j) > max) { - max = data(i, j) - } - i += 1 - } - var sum = 0.0 - i = 0 - while (i < data.rows) { - val res = math.exp(data(i, j) - max) - output(i, j) = res - sum += res - i += 1 - } - i = 0 - while (i < data.rows) { - output(i, j) /= sum - i += 1 - } + Utils.softmax(data.data, data.rows, j * data.rows, 1, output.data) j += 1 } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala index 6b1537bcc5..fd19ec3976 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala @@ -23,6 +23,7 @@ import org.json4s.DefaultFormats import org.apache.spark.annotation.Since import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.functions.checkNonNegativeWeight +import org.apache.spark.ml.impl.Utils import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} import org.apache.spark.ml.param.shared.HasWeightCol @@ -527,19 +528,7 @@ class NaiveBayesModel private[ml] ( override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = { rawPrediction match { case dv: DenseVector => - var i = 0 - val size = dv.size - val maxLog = dv.values.max - while (i < size) { - dv.values(i) = math.exp(dv.values(i) - maxLog) - i += 1 - } - val probSum = dv.values.sum - i = 0 - while (i < size) { - dv.values(i) = dv.values(i) / probSum - i += 1 - } + Utils.softmax(dv.values) dv case sv: SparseVector => throw new RuntimeException("Unexpected error in NaiveBayesModel:" + diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala index 0ee5488763..73db4941b3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala @@ -74,7 +74,7 @@ private[ml] class MultinomialLogisticBlockAggregator( new DenseMatrix(numClasses, numFeatures, coefficientsArray) } - private lazy val intercept = if (fitIntercept) { + @transient private lazy val intercept = if (fitIntercept) { new DenseVector(coefficientsArray.takeRight(numClasses)) } else { null @@ -83,8 +83,8 @@ private[ml] class MultinomialLogisticBlockAggregator( // pre-computed margin of an empty vector. // with this variable as an offset, for a sparse vector, we only need to // deal with non-zero values in prediction. - private val marginOffset = if (fitWithMean) { - val offset = intercept.copy + @transient private lazy val marginOffset = if (fitWithMean) { + val offset = new DenseVector(coefficientsArray.takeRight(numClasses)) // intercept BLAS.gemv(-1.0, linear, Vectors.dense(bcScaledMean.value), 1.0, offset) offset } else { @@ -115,7 +115,7 @@ private[ml] class MultinomialLogisticBlockAggregator( val offset = if (fitWithMean) marginOffset else intercept var j = 0 while (j < numClasses) { - java.util.Arrays.fill(arr, j * size, (j + 1) * size, offset(j)) + if (offset(j) != 0) java.util.Arrays.fill(arr, j * size, (j + 1) * size, offset(j)) j += 1 } } @@ -126,28 +126,17 @@ private[ml] class MultinomialLogisticBlockAggregator( var localLossSum = 0.0 var localWeightSum = 0.0 var i = 0 - val probs = Array.ofDim[Double](numClasses) - val multiplierSum = Array.ofDim[Double](numClasses) while (i < size) { val weight = block.getWeight(i) localWeightSum += weight if (weight > 0) { - val label = block.getLabel(i) - var j = 0 - while (j < numClasses) { probs(j) = mat(i, j); j += 1 } - Utils.softmax(probs) - - j = 0 - while (j < numClasses) { - val multiplier = weight * (probs(j) - (if (label == j) 1.0 else 0.0)) - mat.update(i, j, multiplier) - multiplierSum(j) += multiplier - j += 1 - } - - localLossSum -= weight * math.log(probs(label.toInt)) + val labelIndex = i + block.getLabel(i).toInt * size + Utils.softmax(arr, numClasses, i, size, arr) // prob + localLossSum -= weight * math.log(arr(labelIndex)) + if (weight != 1) BLAS.javaBLAS.dscal(numClasses, weight, arr, i, size) + arr(labelIndex) -= weight } else { - var j = 0; while (j < numClasses) { mat.update(i, j, 0.0); j += 1 } + BLAS.javaBLAS.dscal(numClasses, 0, arr, i, size) } i += 1 } @@ -173,16 +162,25 @@ private[ml] class MultinomialLogisticBlockAggregator( linearGradSumMat.foreachActive { (i, j, v) => gradientSumArray(i * numClasses + j) += v } } - if (fitWithMean) { - // above update of the linear part of gradientSumArray does NOT take the centering - // into account, here we need to adjust this part. - // following BLAS.dger operation equals to: gradientSumArray[0 : F X C] -= mat.T X _mm_, - // where _mm_ is a matrix of size S X F with each row equals to array ScaledMean. - BLAS.nativeBLAS.dger(numClasses, numFeatures, -1.0, multiplierSum, 1, - bcScaledMean.value, 1, gradientSumArray, numClasses) - } - if (fitIntercept) { + val multiplierSum = Array.ofDim[Double](numClasses) + var j = 0 + while (j < numClasses) { + var i = j * size + val end = i + size + while (i < end) { multiplierSum(j) += arr(i); i += 1 } + j += 1 + } + + if (fitWithMean) { + // above update of the linear part of gradientSumArray does NOT take the centering + // into account, here we need to adjust this part. + // following BLAS.dger operation equals to: gradientSumArray[0 : F X C] -= mat.T X _mm_, + // where _mm_ is a matrix of size S X F with each row equals to array ScaledMean. + BLAS.nativeBLAS.dger(numClasses, numFeatures, -1.0, multiplierSum, 1, + bcScaledMean.value, 1, gradientSumArray, numClasses) + } + BLAS.javaBLAS.daxpy(numClasses, 1.0, multiplierSum, 0, 1, gradientSumArray, numClasses * numFeatures, 1) } diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 35ce48b926..8861fbf2b5 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -83,7 +83,7 @@ class MultilayerPerceptronClassifierTest(SparkSessionTestCase): result = model.transform(test).head() expected_prediction = 2.0 expected_probability = [0.0, 0.0, 1.0] - expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045] + expected_rawPrediction = [-11.824, -8.298, 22.5299] self.assertTrue(result.prediction, expected_prediction) self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4)) self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, rtol=0.1))