[SPARK-7620] [ML] [MLLIB] Removed calling size, length in while condition to avoid extra JVM call
Author: DB Tsai <dbt@netflix.com>
Closes #6137 from dbtsai/clean and squashes the following commits:
185816d [DB Tsai] fix compilication issue
f418d08 [DB Tsai] first commit
(cherry picked from commit d3db2fd667
)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
This commit is contained in:
parent
82f387fe23
commit
9ab4db29ff
|
@ -258,7 +258,8 @@ class LogisticRegressionModel private[ml] (
|
|||
rawPrediction match {
|
||||
case dv: DenseVector =>
|
||||
var i = 0
|
||||
while (i < dv.size) {
|
||||
val size = dv.size
|
||||
while (i < size) {
|
||||
dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i)))
|
||||
i += 1
|
||||
}
|
||||
|
@ -357,7 +358,8 @@ private[classification] class MultiClassSummarizer extends Serializable {
|
|||
def histogram: Array[Long] = {
|
||||
val result = Array.ofDim[Long](numClasses)
|
||||
var i = 0
|
||||
while (i < result.length) {
|
||||
val len = result.length
|
||||
while (i < len) {
|
||||
result(i) = distinctMap.getOrElse(i, 0L)
|
||||
i += 1
|
||||
}
|
||||
|
@ -480,7 +482,8 @@ private class LogisticAggregator(
|
|||
var i = 0
|
||||
val localThisGradientSumArray = this.gradientSumArray
|
||||
val localOtherGradientSumArray = other.gradientSumArray
|
||||
while (i < localThisGradientSumArray.length) {
|
||||
val len = localThisGradientSumArray.length
|
||||
while (i < len) {
|
||||
localThisGradientSumArray(i) += localOtherGradientSumArray(i)
|
||||
i += 1
|
||||
}
|
||||
|
|
|
@ -98,7 +98,8 @@ private[feature] object Bucketizer {
|
|||
false
|
||||
} else {
|
||||
var i = 0
|
||||
while (i < splits.length - 1) {
|
||||
val n = splits.length - 1
|
||||
while (i < n) {
|
||||
if (splits(i) >= splits(i + 1)) return false
|
||||
i += 1
|
||||
}
|
||||
|
|
|
@ -189,7 +189,8 @@ private object VectorIndexer {
|
|||
|
||||
private def addDenseVector(dv: DenseVector): Unit = {
|
||||
var i = 0
|
||||
while (i < dv.size) {
|
||||
val size = dv.size
|
||||
while (i < size) {
|
||||
if (featureValueSets(i).size <= maxCategories) {
|
||||
featureValueSets(i).add(dv(i))
|
||||
}
|
||||
|
@ -201,7 +202,8 @@ private object VectorIndexer {
|
|||
// TODO: This might be able to handle 0's more efficiently.
|
||||
var vecIndex = 0 // index into vector
|
||||
var k = 0 // index into non-zero elements
|
||||
while (vecIndex < sv.size) {
|
||||
val size = sv.size
|
||||
while (vecIndex < size) {
|
||||
val featureValue = if (k < sv.indices.length && vecIndex == sv.indices(k)) {
|
||||
k += 1
|
||||
sv.values(k - 1)
|
||||
|
|
|
@ -167,7 +167,8 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
|
|||
val weights = {
|
||||
val rawWeights = state.x.toArray.clone()
|
||||
var i = 0
|
||||
while (i < rawWeights.length) {
|
||||
val len = rawWeights.length
|
||||
while (i < len) {
|
||||
rawWeights(i) *= { if (featuresStd(i) != 0.0) yStd / featuresStd(i) else 0.0 }
|
||||
i += 1
|
||||
}
|
||||
|
@ -307,7 +308,8 @@ private class LeastSquaresAggregator(
|
|||
val weightsArray = weights.toArray.clone()
|
||||
var sum = 0.0
|
||||
var i = 0
|
||||
while (i < weightsArray.length) {
|
||||
val len = weightsArray.length
|
||||
while (i < len) {
|
||||
if (featuresStd(i) != 0.0) {
|
||||
weightsArray(i) /= featuresStd(i)
|
||||
sum += weightsArray(i) * featuresMean(i)
|
||||
|
|
|
@ -38,7 +38,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf
|
|||
|
||||
protected def isSorted(array: Array[Int]): Boolean = {
|
||||
var i = 1
|
||||
while (i < array.length) {
|
||||
val len = array.length
|
||||
while (i < len) {
|
||||
if (array(i) < array(i-1)) return false
|
||||
i += 1
|
||||
}
|
||||
|
|
|
@ -116,7 +116,8 @@ class L1Updater extends Updater {
|
|||
// Apply proximal operator (soft thresholding)
|
||||
val shrinkageVal = regParam * thisIterStepSize
|
||||
var i = 0
|
||||
while (i < brzWeights.length) {
|
||||
val len = brzWeights.length
|
||||
while (i < len) {
|
||||
val wi = brzWeights(i)
|
||||
brzWeights(i) = signum(wi) * max(0.0, abs(wi) - shrinkageVal)
|
||||
i += 1
|
||||
|
|
|
@ -69,7 +69,8 @@ class IsotonicRegressionModel (
|
|||
/** Asserts the input array is monotone with the given ordering. */
|
||||
private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = {
|
||||
var i = 1
|
||||
while (i < xs.length) {
|
||||
val len = xs.length
|
||||
while (i < len) {
|
||||
require(ord.compare(xs(i - 1), xs(i)) <= 0,
|
||||
s"Elements (${xs(i - 1)}, ${xs(i)}) are not ordered.")
|
||||
i += 1
|
||||
|
@ -329,11 +330,12 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
|
|||
}
|
||||
|
||||
var i = 0
|
||||
while (i < input.length) {
|
||||
val len = input.length
|
||||
while (i < len) {
|
||||
var j = i
|
||||
|
||||
// Find monotonicity violating sequence, if any.
|
||||
while (j < input.length - 1 && input(j)._1 > input(j + 1)._1) {
|
||||
while (j < len - 1 && input(j)._1 > input(j + 1)._1) {
|
||||
j = j + 1
|
||||
}
|
||||
|
||||
|
|
|
@ -70,23 +70,30 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
|
|||
require(n == sample.size, s"Dimensions mismatch when adding new sample." +
|
||||
s" Expecting $n but got ${sample.size}.")
|
||||
|
||||
val localCurrMean= currMean
|
||||
val localCurrM2n = currM2n
|
||||
val localCurrM2 = currM2
|
||||
val localCurrL1 = currL1
|
||||
val localNnz = nnz
|
||||
val localCurrMax = currMax
|
||||
val localCurrMin = currMin
|
||||
sample.foreachActive { (index, value) =>
|
||||
if (value != 0.0) {
|
||||
if (currMax(index) < value) {
|
||||
currMax(index) = value
|
||||
if (localCurrMax(index) < value) {
|
||||
localCurrMax(index) = value
|
||||
}
|
||||
if (currMin(index) > value) {
|
||||
currMin(index) = value
|
||||
if (localCurrMin(index) > value) {
|
||||
localCurrMin(index) = value
|
||||
}
|
||||
|
||||
val prevMean = currMean(index)
|
||||
val prevMean = localCurrMean(index)
|
||||
val diff = value - prevMean
|
||||
currMean(index) = prevMean + diff / (nnz(index) + 1.0)
|
||||
currM2n(index) += (value - currMean(index)) * diff
|
||||
currM2(index) += value * value
|
||||
currL1(index) += math.abs(value)
|
||||
localCurrMean(index) = prevMean + diff / (localNnz(index) + 1.0)
|
||||
localCurrM2n(index) += (value - localCurrMean(index)) * diff
|
||||
localCurrM2(index) += value * value
|
||||
localCurrL1(index) += math.abs(value)
|
||||
|
||||
nnz(index) += 1.0
|
||||
localNnz(index) += 1.0
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,14 +137,14 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
|
|||
}
|
||||
} else if (totalCnt == 0 && other.totalCnt != 0) {
|
||||
this.n = other.n
|
||||
this.currMean = other.currMean.clone
|
||||
this.currM2n = other.currM2n.clone
|
||||
this.currM2 = other.currM2.clone
|
||||
this.currL1 = other.currL1.clone
|
||||
this.currMean = other.currMean.clone()
|
||||
this.currM2n = other.currM2n.clone()
|
||||
this.currM2 = other.currM2.clone()
|
||||
this.currL1 = other.currL1.clone()
|
||||
this.totalCnt = other.totalCnt
|
||||
this.nnz = other.nnz.clone
|
||||
this.currMax = other.currMax.clone
|
||||
this.currMin = other.currMin.clone
|
||||
this.nnz = other.nnz.clone()
|
||||
this.currMax = other.currMax.clone()
|
||||
this.currMin = other.currMin.clone()
|
||||
}
|
||||
this
|
||||
}
|
||||
|
@ -165,7 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
|
|||
if (denominator > 0.0) {
|
||||
val deltaMean = currMean
|
||||
var i = 0
|
||||
while (i < currM2n.size) {
|
||||
val len = currM2n.length
|
||||
while (i < len) {
|
||||
realVariance(i) =
|
||||
currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * (totalCnt - nnz(i)) / totalCnt
|
||||
realVariance(i) /= denominator
|
||||
|
@ -211,7 +219,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
|
|||
val realMagnitude = Array.ofDim[Double](n)
|
||||
|
||||
var i = 0
|
||||
while (i < currM2.size) {
|
||||
val len = currM2.length
|
||||
while (i < len) {
|
||||
realMagnitude(i) = math.sqrt(currM2(i))
|
||||
i += 1
|
||||
}
|
||||
|
|
|
@ -205,8 +205,10 @@ private[stat] object ChiSqTest extends Logging {
|
|||
val colSums = new Array[Double](numCols)
|
||||
val rowSums = new Array[Double](numRows)
|
||||
val colMajorArr = counts.toArray
|
||||
val colMajorArrLen = colMajorArr.length
|
||||
|
||||
var i = 0
|
||||
while (i < colMajorArr.size) {
|
||||
while (i < colMajorArrLen) {
|
||||
val elem = colMajorArr(i)
|
||||
if (elem < 0.0) {
|
||||
throw new IllegalArgumentException("Contingency table cannot contain negative entries.")
|
||||
|
@ -220,7 +222,7 @@ private[stat] object ChiSqTest extends Logging {
|
|||
// second pass to collect statistic
|
||||
var statistic = 0.0
|
||||
var j = 0
|
||||
while (j < colMajorArr.size) {
|
||||
while (j < colMajorArrLen) {
|
||||
val col = j / numRows
|
||||
val colSum = colSums(col)
|
||||
if (colSum == 0.0) {
|
||||
|
|
|
@ -111,11 +111,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
|
|||
* Add the stats from another calculator into this one, modifying and returning this calculator.
|
||||
*/
|
||||
def add(other: ImpurityCalculator): ImpurityCalculator = {
|
||||
require(stats.size == other.stats.size,
|
||||
require(stats.length == other.stats.length,
|
||||
s"Two ImpurityCalculator instances cannot be added with different counts sizes." +
|
||||
s" Sizes are ${stats.size} and ${other.stats.size}.")
|
||||
s" Sizes are ${stats.length} and ${other.stats.length}.")
|
||||
var i = 0
|
||||
while (i < other.stats.size) {
|
||||
val len = other.stats.length
|
||||
while (i < len) {
|
||||
stats(i) += other.stats(i)
|
||||
i += 1
|
||||
}
|
||||
|
@ -127,11 +128,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
|
|||
* calculator.
|
||||
*/
|
||||
def subtract(other: ImpurityCalculator): ImpurityCalculator = {
|
||||
require(stats.size == other.stats.size,
|
||||
require(stats.length == other.stats.length,
|
||||
s"Two ImpurityCalculator instances cannot be subtracted with different counts sizes." +
|
||||
s" Sizes are ${stats.size} and ${other.stats.size}.")
|
||||
s" Sizes are ${stats.length} and ${other.stats.length}.")
|
||||
var i = 0
|
||||
while (i < other.stats.size) {
|
||||
val len = other.stats.length
|
||||
while (i < len) {
|
||||
stats(i) -= other.stats(i)
|
||||
i += 1
|
||||
}
|
||||
|
|
|
@ -107,7 +107,8 @@ object LinearDataGenerator {
|
|||
|
||||
x.foreach { v =>
|
||||
var i = 0
|
||||
while (i < v.length) {
|
||||
val len = v.length
|
||||
while (i < len) {
|
||||
v(i) = (v(i) - 0.5) * math.sqrt(12.0 * xVariance(i)) + xMean(i)
|
||||
i += 1
|
||||
}
|
||||
|
|
|
@ -122,7 +122,8 @@ private object BucketizerSuite extends FunSuite {
|
|||
def linearSearchForBuckets(splits: Array[Double], feature: Double): Double = {
|
||||
require(feature >= splits.head)
|
||||
var i = 0
|
||||
while (i < splits.length - 1) {
|
||||
val n = splits.length - 1
|
||||
while (i < n) {
|
||||
if (feature < splits(i + 1)) return i
|
||||
i += 1
|
||||
}
|
||||
|
@ -138,7 +139,8 @@ private object BucketizerSuite extends FunSuite {
|
|||
s" ${splits.mkString(", ")}")
|
||||
}
|
||||
var i = 0
|
||||
while (i < splits.length - 1) {
|
||||
val n = splits.length - 1
|
||||
while (i < n) {
|
||||
// Split i should fall in bucket i.
|
||||
testFeature(splits(i), i)
|
||||
// Value between splits i,i+1 should be in i, which is also true if the (i+1)-th split is inf.
|
||||
|
|
|
@ -101,7 +101,8 @@ object LogisticRegressionSuite {
|
|||
// This doesn't work if `vector` is a sparse vector.
|
||||
val vectorArray = vector.toArray
|
||||
var i = 0
|
||||
while (i < vectorArray.length) {
|
||||
val len = vectorArray.length
|
||||
while (i < len) {
|
||||
vectorArray(i) = vectorArray(i) * math.sqrt(xVariance(i)) + xMean(i)
|
||||
i += 1
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue