[SPARK-34860][ML] Multinomial Logistic Regression with intercept support centering
### What changes were proposed in this pull request? 1, use new `MultinomialLogisticBlockAggregator` which support virtual centering 2, remove no-used `BlockLogisticAggregator` ### Why are the changes needed? 1, for better convergence; 2, its solution is much close to GLMNET; ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? updated and new test suites Closes #31985 from zhengruifeng/mlr_center. Authored-by: Ruifeng Zheng <ruifengz@foxmail.com> Signed-off-by: Sean Owen <srowen@gmail.com>
This commit is contained in:
parent
c902f77b42
commit
d372e6e094
|
@ -941,19 +941,21 @@ class LogisticRegression @Since("1.2.0") (
|
||||||
optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = {
|
optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = {
|
||||||
val multinomial = checkMultinomial(numClasses)
|
val multinomial = checkMultinomial(numClasses)
|
||||||
|
|
||||||
// for binary LR, we can center the input vector, if and only if:
|
// for LR, we can center the input vector, if and only if:
|
||||||
// 1, fitIntercept is true;
|
// 1, fitIntercept is true;
|
||||||
// 2, no penalty on the intercept, which is always true in existing impl;
|
// 2, no penalty on the intercept, which is always true in existing impl;
|
||||||
// 3, no bounds on the intercept.
|
// 3, no bounds on the intercept.
|
||||||
val fitWithMean = !multinomial && $(fitIntercept) &&
|
val fitWithMean = $(fitIntercept) &&
|
||||||
(!isSet(lowerBoundsOnIntercepts) || $(lowerBoundsOnIntercepts)(0).isNegInfinity) &&
|
(!isSet(lowerBoundsOnIntercepts) ||
|
||||||
(!isSet(upperBoundsOnIntercepts) || $(upperBoundsOnIntercepts)(0).isPosInfinity)
|
$(lowerBoundsOnIntercepts).toArray.forall(_.isNegInfinity)) &&
|
||||||
|
(!isSet(upperBoundsOnIntercepts) ||
|
||||||
|
$(upperBoundsOnIntercepts).toArray.forall(_.isPosInfinity))
|
||||||
|
|
||||||
val numFeatures = featuresStd.length
|
val numFeatures = featuresStd.length
|
||||||
val inverseStd = featuresStd.map(std => if (std != 0) 1.0 / std else 0.0)
|
val inverseStd = featuresStd.map(std => if (std != 0) 1.0 / std else 0.0)
|
||||||
val scaledMean = Array.tabulate(numFeatures)(i => inverseStd(i) * featuresMean(i))
|
val scaledMean = Array.tabulate(numFeatures)(i => inverseStd(i) * featuresMean(i))
|
||||||
val bcInverseStd = instances.context.broadcast(inverseStd)
|
val bcInverseStd = instances.context.broadcast(inverseStd)
|
||||||
var bcObjects = Seq(bcInverseStd)
|
val bcScaledMean = instances.context.broadcast(scaledMean)
|
||||||
|
|
||||||
val scaled = instances.mapPartitions { iter =>
|
val scaled = instances.mapPartitions { iter =>
|
||||||
val func = StandardScalerModel.getTransformFunc(Array.empty, bcInverseStd.value, false, true)
|
val func = StandardScalerModel.getTransformFunc(Array.empty, bcInverseStd.value, false, true)
|
||||||
|
@ -966,25 +968,30 @@ class LogisticRegression @Since("1.2.0") (
|
||||||
.setName(s"$uid: training blocks (blockSizeInMB=$actualBlockSizeInMB)")
|
.setName(s"$uid: training blocks (blockSizeInMB=$actualBlockSizeInMB)")
|
||||||
|
|
||||||
val costFun = if (multinomial) {
|
val costFun = if (multinomial) {
|
||||||
// TODO: create a separate MultinomialLogisticBlockAggregator for clearness
|
val getAggregatorFunc = new MultinomialLogisticBlockAggregator(bcInverseStd, bcScaledMean,
|
||||||
val getAggregatorFunc = new BlockLogisticAggregator(numFeatures, numClasses,
|
$(fitIntercept), fitWithMean)(_)
|
||||||
$(fitIntercept), true)(_)
|
|
||||||
new RDDLossFunction(blocks, getAggregatorFunc, regularization, $(aggregationDepth))
|
new RDDLossFunction(blocks, getAggregatorFunc, regularization, $(aggregationDepth))
|
||||||
} else {
|
} else {
|
||||||
val bcScaledMean = instances.context.broadcast(scaledMean)
|
|
||||||
bcObjects +:= bcScaledMean
|
|
||||||
val getAggregatorFunc = new BinaryLogisticBlockAggregator(bcInverseStd, bcScaledMean,
|
val getAggregatorFunc = new BinaryLogisticBlockAggregator(bcInverseStd, bcScaledMean,
|
||||||
$(fitIntercept), fitWithMean)(_)
|
$(fitIntercept), fitWithMean)(_)
|
||||||
new RDDLossFunction(blocks, getAggregatorFunc, regularization, $(aggregationDepth))
|
new RDDLossFunction(blocks, getAggregatorFunc, regularization, $(aggregationDepth))
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fitWithMean) {
|
if (fitWithMean) {
|
||||||
// orginal `initialCoefWithInterceptArray` is for problem:
|
if (multinomial) {
|
||||||
// y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
|
val adapt = Array.ofDim[Double](numClasses)
|
||||||
// we should adjust it to the initial solution for problem:
|
BLAS.f2jBLAS.dgemv("N", numClasses, numFeatures, 1.0,
|
||||||
// y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
|
initialSolution, numClasses, scaledMean, 1, 0.0, adapt, 1)
|
||||||
val adapt = BLAS.getBLAS(numFeatures).ddot(numFeatures, initialSolution, 1, scaledMean, 1)
|
BLAS.getBLAS(numFeatures).daxpy(numClasses, 1.0, adapt, 0, 1,
|
||||||
initialSolution(numFeatures) += adapt
|
initialSolution, numClasses * numFeatures, 1)
|
||||||
|
} else {
|
||||||
|
// orginal `initialCoefWithInterceptArray` is for problem:
|
||||||
|
// y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
|
||||||
|
// we should adjust it to the initial solution for problem:
|
||||||
|
// y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
|
||||||
|
val adapt = BLAS.getBLAS(numFeatures).ddot(numFeatures, initialSolution, 1, scaledMean, 1)
|
||||||
|
initialSolution(numFeatures) += adapt
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val states = optimizer.iterations(new CachedDiffFunction(costFun),
|
val states = optimizer.iterations(new CachedDiffFunction(costFun),
|
||||||
|
@ -1002,16 +1009,25 @@ class LogisticRegression @Since("1.2.0") (
|
||||||
arrayBuilder += state.adjustedValue
|
arrayBuilder += state.adjustedValue
|
||||||
}
|
}
|
||||||
blocks.unpersist()
|
blocks.unpersist()
|
||||||
bcObjects.foreach(_.destroy())
|
bcInverseStd.destroy()
|
||||||
|
bcScaledMean.destroy()
|
||||||
|
|
||||||
val solution = if (state == null) null else state.x.toArray
|
val solution = if (state == null) null else state.x.toArray
|
||||||
if (fitWithMean && solution != null) {
|
if (fitWithMean && solution != null) {
|
||||||
// the final solution is for problem:
|
if (multinomial) {
|
||||||
// y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
|
val adapt = Array.ofDim[Double](numClasses)
|
||||||
// we should adjust it back for original problem:
|
BLAS.f2jBLAS.dgemv("N", numClasses, numFeatures, 1.0,
|
||||||
// y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
|
solution, numClasses, scaledMean, 1, 0.0, adapt, 1)
|
||||||
val adapt = BLAS.getBLAS(numFeatures).ddot(numFeatures, solution, 1, scaledMean, 1)
|
BLAS.getBLAS(numFeatures).daxpy(numClasses, -1.0, adapt, 0, 1,
|
||||||
solution(numFeatures) -= adapt
|
solution, numClasses * numFeatures, 1)
|
||||||
|
} else {
|
||||||
|
// the final solution is for problem:
|
||||||
|
// y = f(w1 * (x1 - avg_x1) / std_x1, w2 * (x2 - avg_x2) / std_x2, ..., intercept)
|
||||||
|
// we should adjust it back for original problem:
|
||||||
|
// y = f(w1 * x1 / std_x1, w2 * x2 / std_x2, ..., intercept)
|
||||||
|
val adapt = BLAS.getBLAS(numFeatures).ddot(numFeatures, solution, 1, scaledMean, 1)
|
||||||
|
solution(numFeatures) -= adapt
|
||||||
|
}
|
||||||
}
|
}
|
||||||
(solution, arrayBuilder.result)
|
(solution, arrayBuilder.result)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,264 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.spark.ml.optim.aggregator
|
|
||||||
|
|
||||||
import org.apache.spark.broadcast.Broadcast
|
|
||||||
import org.apache.spark.internal.Logging
|
|
||||||
import org.apache.spark.ml.feature.InstanceBlock
|
|
||||||
import org.apache.spark.ml.impl.Utils
|
|
||||||
import org.apache.spark.ml.linalg._
|
|
||||||
|
|
||||||
/**
|
|
||||||
* BlockLogisticAggregator computes the gradient and loss used in Logistic classification
|
|
||||||
* for blocks in sparse or dense matrix in an online fashion.
|
|
||||||
*
|
|
||||||
* Two BlockLogisticAggregators can be merged together to have a summary of loss and gradient of
|
|
||||||
* the corresponding joint dataset.
|
|
||||||
*
|
|
||||||
* NOTE: The feature values are expected to be standardized before computation.
|
|
||||||
*
|
|
||||||
* @param bcCoefficients The coefficients corresponding to the features.
|
|
||||||
* @param fitIntercept Whether to fit an intercept term.
|
|
||||||
*/
|
|
||||||
private[ml] class BlockLogisticAggregator(
|
|
||||||
numFeatures: Int,
|
|
||||||
numClasses: Int,
|
|
||||||
fitIntercept: Boolean,
|
|
||||||
multinomial: Boolean)(bcCoefficients: Broadcast[Vector])
|
|
||||||
extends DifferentiableLossAggregator[InstanceBlock, BlockLogisticAggregator] with Logging {
|
|
||||||
|
|
||||||
if (multinomial && numClasses <= 2) {
|
|
||||||
logInfo(s"Multinomial logistic regression for binary classification yields separate " +
|
|
||||||
s"coefficients for positive and negative classes. When no regularization is applied, the" +
|
|
||||||
s"result will be effectively the same as binary logistic regression. When regularization" +
|
|
||||||
s"is applied, multinomial loss will produce a result different from binary loss.")
|
|
||||||
}
|
|
||||||
|
|
||||||
private val numFeaturesPlusIntercept = if (fitIntercept) numFeatures + 1 else numFeatures
|
|
||||||
private val coefficientSize = bcCoefficients.value.size
|
|
||||||
protected override val dim: Int = coefficientSize
|
|
||||||
|
|
||||||
if (multinomial) {
|
|
||||||
require(numClasses == coefficientSize / numFeaturesPlusIntercept, s"The number of " +
|
|
||||||
s"coefficients should be ${numClasses * numFeaturesPlusIntercept} but was $coefficientSize")
|
|
||||||
} else {
|
|
||||||
require(coefficientSize == numFeaturesPlusIntercept, s"Expected $numFeaturesPlusIntercept " +
|
|
||||||
s"coefficients but got $coefficientSize")
|
|
||||||
require(numClasses == 1 || numClasses == 2, s"Binary logistic aggregator requires numClasses " +
|
|
||||||
s"in {1, 2} but found $numClasses.")
|
|
||||||
}
|
|
||||||
|
|
||||||
@transient private lazy val coefficientsArray = bcCoefficients.value match {
|
|
||||||
case DenseVector(values) => values
|
|
||||||
case _ => throw new IllegalArgumentException(s"coefficients only supports dense vector but " +
|
|
||||||
s"got type ${bcCoefficients.value.getClass}.)")
|
|
||||||
}
|
|
||||||
|
|
||||||
@transient private lazy val binaryLinear = (multinomial, fitIntercept) match {
|
|
||||||
case (false, true) => Vectors.dense(coefficientsArray.take(numFeatures))
|
|
||||||
case (false, false) => Vectors.dense(coefficientsArray)
|
|
||||||
case _ => null
|
|
||||||
}
|
|
||||||
|
|
||||||
@transient private lazy val multinomialLinear = (multinomial, fitIntercept) match {
|
|
||||||
case (true, true) =>
|
|
||||||
Matrices.dense(numClasses, numFeatures,
|
|
||||||
coefficientsArray.take(numClasses * numFeatures)).toDense
|
|
||||||
case (true, false) =>
|
|
||||||
Matrices.dense(numClasses, numFeatures, coefficientsArray).toDense
|
|
||||||
case _ => null
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a new training instance block to this BlockLogisticAggregator, and update the loss and
|
|
||||||
* gradient of the objective function.
|
|
||||||
*
|
|
||||||
* @param block The instance block of data point to be added.
|
|
||||||
* @return This BlockLogisticAggregator object.
|
|
||||||
*/
|
|
||||||
def add(block: InstanceBlock): this.type = {
|
|
||||||
require(block.matrix.isTransposed)
|
|
||||||
require(numFeatures == block.numFeatures, s"Dimensions mismatch when adding new " +
|
|
||||||
s"instance. Expecting $numFeatures but got ${block.numFeatures}.")
|
|
||||||
require(block.weightIter.forall(_ >= 0),
|
|
||||||
s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
|
|
||||||
|
|
||||||
if (block.weightIter.forall(_ == 0)) return this
|
|
||||||
|
|
||||||
if (multinomial) {
|
|
||||||
multinomialUpdateInPlace(block)
|
|
||||||
} else {
|
|
||||||
binaryUpdateInPlace(block)
|
|
||||||
}
|
|
||||||
|
|
||||||
this
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Update gradient and loss using binary loss function. */
|
|
||||||
private def binaryUpdateInPlace(block: InstanceBlock): Unit = {
|
|
||||||
val size = block.size
|
|
||||||
|
|
||||||
// vec here represents margins or negative dotProducts
|
|
||||||
val vec = if (fitIntercept) {
|
|
||||||
Vectors.dense(Array.fill(size)(coefficientsArray.last)).toDense
|
|
||||||
} else {
|
|
||||||
Vectors.zeros(size).toDense
|
|
||||||
}
|
|
||||||
BLAS.gemv(-1.0, block.matrix, binaryLinear, -1.0, vec)
|
|
||||||
|
|
||||||
// in-place convert margins to multiplier
|
|
||||||
// then, vec represents multiplier
|
|
||||||
var localLossSum = 0.0
|
|
||||||
var i = 0
|
|
||||||
while (i < size) {
|
|
||||||
val weight = block.getWeight(i)
|
|
||||||
if (weight > 0) {
|
|
||||||
val label = block.getLabel(i)
|
|
||||||
val margin = vec(i)
|
|
||||||
if (label > 0) {
|
|
||||||
// The following is equivalent to log(1 + exp(margin)) but more numerically stable.
|
|
||||||
localLossSum += weight * Utils.log1pExp(margin)
|
|
||||||
} else {
|
|
||||||
localLossSum += weight * (Utils.log1pExp(margin) - margin)
|
|
||||||
}
|
|
||||||
val multiplier = weight * (1.0 / (1.0 + math.exp(margin)) - label)
|
|
||||||
vec.values(i) = multiplier
|
|
||||||
} else { vec.values(i) = 0.0 }
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
lossSum += localLossSum
|
|
||||||
weightSum += block.weightIter.sum
|
|
||||||
|
|
||||||
// predictions are all correct, no gradient signal
|
|
||||||
if (vec.values.forall(_ == 0)) return
|
|
||||||
|
|
||||||
block.matrix match {
|
|
||||||
case dm: DenseMatrix =>
|
|
||||||
BLAS.nativeBLAS.dgemv("N", dm.numCols, dm.numRows, 1.0, dm.values, dm.numCols,
|
|
||||||
vec.values, 1, 1.0, gradientSumArray, 1)
|
|
||||||
|
|
||||||
case sm: SparseMatrix if fitIntercept =>
|
|
||||||
val linearGradSumVec = Vectors.zeros(numFeatures).toDense
|
|
||||||
BLAS.gemv(1.0, sm.transpose, vec, 0.0, linearGradSumVec)
|
|
||||||
BLAS.getBLAS(numFeatures).daxpy(numFeatures, 1.0, linearGradSumVec.values, 1,
|
|
||||||
gradientSumArray, 1)
|
|
||||||
|
|
||||||
case sm: SparseMatrix if !fitIntercept =>
|
|
||||||
val gradSumVec = new DenseVector(gradientSumArray)
|
|
||||||
BLAS.gemv(1.0, sm.transpose, vec, 1.0, gradSumVec)
|
|
||||||
|
|
||||||
case m =>
|
|
||||||
throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fitIntercept) gradientSumArray(numFeatures) += vec.values.sum
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Update gradient and loss using multinomial (softmax) loss function. */
|
|
||||||
private def multinomialUpdateInPlace(block: InstanceBlock): Unit = {
|
|
||||||
val size = block.size
|
|
||||||
|
|
||||||
// mat here represents margins, shape: S X C
|
|
||||||
val mat = DenseMatrix.zeros(size, numClasses)
|
|
||||||
if (fitIntercept) {
|
|
||||||
val localCoefficientsArray = coefficientsArray
|
|
||||||
val offset = numClasses * numFeatures
|
|
||||||
var j = 0
|
|
||||||
while (j < numClasses) {
|
|
||||||
val intercept = localCoefficientsArray(offset + j)
|
|
||||||
var i = 0
|
|
||||||
while (i < size) { mat.update(i, j, intercept); i += 1 }
|
|
||||||
j += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BLAS.gemm(1.0, block.matrix, multinomialLinear.transpose, 1.0, mat)
|
|
||||||
|
|
||||||
// in-place convert margins to multipliers
|
|
||||||
// then, mat represents multipliers
|
|
||||||
var localLossSum = 0.0
|
|
||||||
var i = 0
|
|
||||||
val tmp = Array.ofDim[Double](numClasses)
|
|
||||||
val interceptGradSumArr = if (fitIntercept) Array.ofDim[Double](numClasses) else null
|
|
||||||
while (i < size) {
|
|
||||||
val weight = block.getWeight(i)
|
|
||||||
if (weight > 0) {
|
|
||||||
val label = block.getLabel(i)
|
|
||||||
|
|
||||||
var maxMargin = Double.NegativeInfinity
|
|
||||||
var j = 0
|
|
||||||
while (j < numClasses) {
|
|
||||||
tmp(j) = mat(i, j)
|
|
||||||
maxMargin = math.max(maxMargin, tmp(j))
|
|
||||||
j += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// marginOfLabel is margins(label) in the formula
|
|
||||||
val marginOfLabel = tmp(label.toInt)
|
|
||||||
|
|
||||||
var sum = 0.0
|
|
||||||
j = 0
|
|
||||||
while (j < numClasses) {
|
|
||||||
if (maxMargin > 0) tmp(j) -= maxMargin
|
|
||||||
val exp = math.exp(tmp(j))
|
|
||||||
sum += exp
|
|
||||||
tmp(j) = exp
|
|
||||||
j += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
j = 0
|
|
||||||
while (j < numClasses) {
|
|
||||||
val multiplier = weight * (tmp(j) / sum - (if (label == j) 1.0 else 0.0))
|
|
||||||
mat.update(i, j, multiplier)
|
|
||||||
if (fitIntercept) interceptGradSumArr(j) += multiplier
|
|
||||||
j += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if (maxMargin > 0) {
|
|
||||||
localLossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
|
|
||||||
} else {
|
|
||||||
localLossSum += weight * (math.log(sum) - marginOfLabel)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
var j = 0; while (j < numClasses) { mat.update(i, j, 0.0); j += 1 }
|
|
||||||
}
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
lossSum += localLossSum
|
|
||||||
weightSum += block.weightIter.sum
|
|
||||||
|
|
||||||
// mat (multipliers): S X C, dense N
|
|
||||||
// mat.transpose (multipliers): C X S, dense T
|
|
||||||
// block.matrix: S X F, unknown type T
|
|
||||||
// gradSumMat(gradientSumArray): C X FPI (numFeaturesPlusIntercept), dense N
|
|
||||||
block.matrix match {
|
|
||||||
case dm: DenseMatrix =>
|
|
||||||
BLAS.nativeBLAS.dgemm("T", "T", numClasses, numFeatures, size, 1.0,
|
|
||||||
mat.values, size, dm.values, numFeatures, 1.0, gradientSumArray, numClasses)
|
|
||||||
|
|
||||||
case sm: SparseMatrix =>
|
|
||||||
// linearGradSumMat = matrix.T X mat
|
|
||||||
val linearGradSumMat = DenseMatrix.zeros(numFeatures, numClasses)
|
|
||||||
BLAS.gemm(1.0, sm.transpose, mat, 0.0, linearGradSumMat)
|
|
||||||
linearGradSumMat.foreachActive { (i, j, v) => gradientSumArray(i * numClasses + j) += v }
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fitIntercept) {
|
|
||||||
BLAS.getBLAS(numClasses).daxpy(numClasses, 1.0, interceptGradSumArr, 0, 1,
|
|
||||||
gradientSumArray, numClasses * numFeatures, 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -46,6 +46,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
|
||||||
@transient var binaryDataset: DataFrame = _
|
@transient var binaryDataset: DataFrame = _
|
||||||
@transient var binaryDatasetWithSmallVar: DataFrame = _
|
@transient var binaryDatasetWithSmallVar: DataFrame = _
|
||||||
@transient var multinomialDataset: DataFrame = _
|
@transient var multinomialDataset: DataFrame = _
|
||||||
|
@transient var multinomialDatasetWithSmallVar: DataFrame = _
|
||||||
@transient var multinomialDatasetWithZeroVar: DataFrame = _
|
@transient var multinomialDatasetWithZeroVar: DataFrame = _
|
||||||
private val eps: Double = 1e-5
|
private val eps: Double = 1e-5
|
||||||
|
|
||||||
|
@ -118,6 +119,23 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
|
||||||
df
|
df
|
||||||
}
|
}
|
||||||
|
|
||||||
|
multinomialDatasetWithSmallVar = {
|
||||||
|
val nPoints = 50000
|
||||||
|
val coefficients = Array(
|
||||||
|
-0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
|
||||||
|
-0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
|
||||||
|
|
||||||
|
val xMean = Array(5.843, 3.057, 3.758, 10.199)
|
||||||
|
val xVariance = Array(0.6856, 0.1899, 3.116, 0.001)
|
||||||
|
|
||||||
|
val testData = generateMultinomialLogisticInput(
|
||||||
|
coefficients, xMean, xVariance, addIntercept = true, nPoints, seed)
|
||||||
|
|
||||||
|
val df = sc.parallelize(testData, 4).toDF().withColumn("weight", rand(seed))
|
||||||
|
df.cache()
|
||||||
|
df
|
||||||
|
}
|
||||||
|
|
||||||
multinomialDatasetWithZeroVar = {
|
multinomialDatasetWithZeroVar = {
|
||||||
val nPoints = 100
|
val nPoints = 100
|
||||||
val coefficients = Array(
|
val coefficients = Array(
|
||||||
|
@ -141,18 +159,21 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
|
||||||
* so we can validate the training accuracy compared with R's glmnet package.
|
* so we can validate the training accuracy compared with R's glmnet package.
|
||||||
*/
|
*/
|
||||||
ignore("export test data into CSV format") {
|
ignore("export test data into CSV format") {
|
||||||
binaryDataset.rdd.map { case Row(label: Double, features: Vector, weight: Double) =>
|
binaryDataset.rdd.map { case Row(l: Double, f: Vector, w: Double) =>
|
||||||
label + "," + weight + "," + features.toArray.mkString(",")
|
l + "," + w + "," + f.toArray.mkString(",")
|
||||||
}.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/binaryDataset")
|
}.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/binaryDataset")
|
||||||
binaryDatasetWithSmallVar.rdd.map { case Row(label: Double, features: Vector, weight: Double) =>
|
binaryDatasetWithSmallVar.rdd.map { case Row(l: Double, f: Vector, w: Double) =>
|
||||||
label + "," + weight + "," + features.toArray.mkString(",")
|
l + "," + w + "," + f.toArray.mkString(",")
|
||||||
}.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/binaryDatasetWithSmallVar")
|
}.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/binaryDatasetWithSmallVar")
|
||||||
multinomialDataset.rdd.map { case Row(label: Double, features: Vector, weight: Double) =>
|
multinomialDataset.rdd.map { case Row(l: Double, f: Vector, w: Double) =>
|
||||||
label + "," + weight + "," + features.toArray.mkString(",")
|
l + "," + w + "," + f.toArray.mkString(",")
|
||||||
}.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDataset")
|
}.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDataset")
|
||||||
multinomialDatasetWithZeroVar.rdd.map {
|
multinomialDatasetWithSmallVar.rdd.map { case Row(l: Double, f: Vector, w: Double) =>
|
||||||
case Row(label: Double, features: Vector, weight: Double) =>
|
l + "," + w + "," + f.toArray.mkString(",")
|
||||||
label + "," + weight + "," + features.toArray.mkString(",")
|
}.repartition(1)
|
||||||
|
.saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDatasetWithSmallVar")
|
||||||
|
multinomialDatasetWithZeroVar.rdd.map { case Row(l: Double, f: Vector, w: Double) =>
|
||||||
|
l + "," + w + "," + f.toArray.mkString(",")
|
||||||
}.repartition(1)
|
}.repartition(1)
|
||||||
.saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDatasetWithZeroVar")
|
.saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDatasetWithZeroVar")
|
||||||
}
|
}
|
||||||
|
@ -1863,21 +1884,125 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
|
||||||
0.0, 0.0, 0.0, 0.09064661,
|
0.0, 0.0, 0.0, 0.09064661,
|
||||||
-0.1144333, 0.3204703, -0.1621061, -0.2308192,
|
-0.1144333, 0.3204703, -0.1621061, -0.2308192,
|
||||||
0.0, -0.4832131, 0.0, 0.0), isTransposed = true)
|
0.0, -0.4832131, 0.0, 0.0), isTransposed = true)
|
||||||
val interceptsRStd = Vectors.dense(-0.72638218, -0.01737265, 0.74375484)
|
val interceptsRStd = Vectors.dense(-0.69265374, -0.2260274, 0.9186811)
|
||||||
val coefficientsR = new DenseMatrix(3, 4, Array(
|
val coefficientsR = new DenseMatrix(3, 4, Array(
|
||||||
0.0, 0.0, 0.01641412, 0.03570376,
|
0.0, 0.0, 0.01641412, 0.03570376,
|
||||||
-0.05110822, 0.0, -0.21595670, -0.16162836,
|
-0.05110822, 0.0, -0.21595670, -0.16162836,
|
||||||
0.0, 0.0, 0.0, 0.0), isTransposed = true)
|
0.0, 0.0, 0.0, 0.0), isTransposed = true)
|
||||||
val interceptsR = Vectors.dense(-0.44707756, 0.75180900, -0.3047314)
|
val interceptsR = Vectors.dense(-0.44707756, 0.75180900, -0.3047314)
|
||||||
|
|
||||||
assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.05)
|
assert(model1.coefficientMatrix ~== coefficientsRStd absTol 1e-3)
|
||||||
assert(model1.interceptVector ~== interceptsRStd relTol 0.1)
|
assert(model1.interceptVector ~== interceptsRStd relTol 1e-3)
|
||||||
assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
|
assert(model1.interceptVector.toArray.sum ~== 0.0 absTol eps)
|
||||||
assert(model2.coefficientMatrix ~== coefficientsR absTol 0.02)
|
assert(model2.coefficientMatrix ~== coefficientsR absTol 1e-3)
|
||||||
assert(model2.interceptVector ~== interceptsR relTol 0.1)
|
assert(model2.interceptVector ~== interceptsR relTol 1e-3)
|
||||||
assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
|
assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-34860: multinomial logistic regression with intercept, with small var") {
|
||||||
|
val trainer1 = new LogisticRegression().setFitIntercept(true).setStandardization(true)
|
||||||
|
.setWeightCol("weight")
|
||||||
|
val trainer2 = new LogisticRegression().setFitIntercept(true).setStandardization(false)
|
||||||
|
.setWeightCol("weight")
|
||||||
|
val trainer3 = new LogisticRegression().setFitIntercept(true).setStandardization(true)
|
||||||
|
.setElasticNetParam(0.0001).setRegParam(0.5).setWeightCol("weight")
|
||||||
|
|
||||||
|
val model1 = trainer1.fit(multinomialDatasetWithSmallVar)
|
||||||
|
val model2 = trainer2.fit(multinomialDatasetWithSmallVar)
|
||||||
|
val model3 = trainer3.fit(multinomialDatasetWithSmallVar)
|
||||||
|
|
||||||
|
/*
|
||||||
|
Use the following R code to load the data and train the model using glmnet package.
|
||||||
|
library("glmnet")
|
||||||
|
data <- read.csv("path", header=FALSE)
|
||||||
|
label = factor(data$V1)
|
||||||
|
w = data$V2
|
||||||
|
features = as.matrix(data.frame(data$V3, data$V4, data$V5, data$V6))
|
||||||
|
coefficients = coef(glmnet(features, label, weights=w, family="multinomial", alpha = 0,
|
||||||
|
lambda = 0))
|
||||||
|
coefficients
|
||||||
|
$`0`
|
||||||
|
5 x 1 sparse Matrix of class "dgCMatrix"
|
||||||
|
s0
|
||||||
|
2.91748298
|
||||||
|
data.V3 0.21755977
|
||||||
|
data.V4 0.01647541
|
||||||
|
data.V5 0.16507778
|
||||||
|
data.V6 -0.14016680
|
||||||
|
|
||||||
|
$`1`
|
||||||
|
5 x 1 sparse Matrix of class "dgCMatrix"
|
||||||
|
s0
|
||||||
|
-17.5107460
|
||||||
|
data.V3 -0.2443600
|
||||||
|
data.V4 0.7564655
|
||||||
|
data.V5 -0.2955698
|
||||||
|
data.V6 1.3262009
|
||||||
|
|
||||||
|
$`2`
|
||||||
|
5 x 1 sparse Matrix of class "dgCMatrix"
|
||||||
|
s0
|
||||||
|
14.59326301
|
||||||
|
data.V3 0.02680026
|
||||||
|
data.V4 -0.77294095
|
||||||
|
data.V5 0.13049206
|
||||||
|
data.V6 -1.18603411
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
coefficientsStd = coef(glmnet(features, label, weights=w, family="multinomial",
|
||||||
|
alpha = 0.0001, lambda = 0.5, standardize=T))
|
||||||
|
coefficientsStd
|
||||||
|
$`0`
|
||||||
|
5 x 1 sparse Matrix of class "dgCMatrix"
|
||||||
|
s0
|
||||||
|
1.751626027
|
||||||
|
data.V3 0.019970169
|
||||||
|
data.V4 0.079611293
|
||||||
|
data.V5 0.003959452
|
||||||
|
data.V6 0.110024399
|
||||||
|
|
||||||
|
$`1`
|
||||||
|
5 x 1 sparse Matrix of class "dgCMatrix"
|
||||||
|
s0
|
||||||
|
-3.9297124987
|
||||||
|
data.V3 -0.0004788494
|
||||||
|
data.V4 0.0010097453
|
||||||
|
data.V5 -0.0005832701
|
||||||
|
data.V6 .
|
||||||
|
|
||||||
|
$`2`
|
||||||
|
5 x 1 sparse Matrix of class "dgCMatrix"
|
||||||
|
s0
|
||||||
|
2.178086472
|
||||||
|
data.V3 -0.019369990
|
||||||
|
data.V4 -0.080851149
|
||||||
|
data.V5 -0.003319687
|
||||||
|
data.V6 -0.112435972
|
||||||
|
*/
|
||||||
|
val interceptsR = Vectors.dense(2.91748298, -17.5107460, 14.59326301)
|
||||||
|
val coefficientsR = new DenseMatrix(3, 4, Array(
|
||||||
|
0.21755977, 0.01647541, 0.16507778, -0.14016680,
|
||||||
|
-0.2443600, 0.7564655, -0.2955698, 1.3262009,
|
||||||
|
0.02680026, -0.77294095, 0.13049206, -1.18603411), isTransposed = true)
|
||||||
|
|
||||||
|
assert(model1.interceptVector ~== interceptsR relTol 1e-2)
|
||||||
|
assert(model1.coefficientMatrix ~= coefficientsR relTol 1e-1)
|
||||||
|
|
||||||
|
// Without regularization, with or without standardization will converge to the same solution.
|
||||||
|
assert(model2.interceptVector ~== interceptsR relTol 1e-2)
|
||||||
|
assert(model2.coefficientMatrix ~= coefficientsR relTol 1e-1)
|
||||||
|
|
||||||
|
val interceptsR2 = Vectors.dense(1.751626027, -3.9297124987, 2.178086472)
|
||||||
|
val coefficientsR2 = new DenseMatrix(3, 4, Array(
|
||||||
|
0.019970169, 0.079611293, 0.003959452, 0.110024399,
|
||||||
|
-0.0004788494, 0.0010097453, -0.0005832701, 0.0,
|
||||||
|
-0.019369990, -0.080851149, -0.003319687, -0.112435972), isTransposed = true)
|
||||||
|
|
||||||
|
assert(model3.interceptVector ~== interceptsR2 relTol 1e-3)
|
||||||
|
assert(model3.coefficientMatrix ~= coefficientsR2 relTol 1e-2)
|
||||||
|
}
|
||||||
|
|
||||||
test("multinomial logistic regression without intercept with L1 regularization") {
|
test("multinomial logistic regression without intercept with L1 regularization") {
|
||||||
val trainer1 = (new LogisticRegression).setFitIntercept(false)
|
val trainer1 = (new LogisticRegression).setFitIntercept(false)
|
||||||
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight")
|
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight")
|
||||||
|
|
Loading…
Reference in a new issue