[SPARK-9847] [ML] Modified copyValues to distinguish between default, explicit param values

From JIRA: Currently, Params.copyValues copies default parameter values to the paramMap of the target instance, rather than the defaultParamMap. It should copy to the defaultParamMap because explicitly setting a parameter can change the semantics.
This issue arose in SPARK-9789, where 2 params "threshold" and "thresholds" for LogisticRegression can have mutually exclusive values. If thresholds is set, then fit() will copy the default value of threshold as well, easily resulting in inconsistent settings for the 2 params.

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #8115 from jkbradley/copyvalues-fix.
This commit is contained in:
Joseph K. Bradley 2015-08-12 10:48:52 -07:00 committed by Xiangrui Meng
parent 57ec27dd77
commit 70fe558867
2 changed files with 24 additions and 3 deletions

View file

@ -559,13 +559,26 @@ trait Params extends Identifiable with Serializable {
/**
* Copies param values from this instance to another instance for params shared by them.
* @param to the target instance
* @param extra extra params to be copied
*
* This handles default Params and explicitly set Params separately.
* Default Params are copied from and to [[defaultParamMap]], and explicitly set Params are
* copied from and to [[paramMap]].
* Warning: This implicitly assumes that this [[Params]] instance and the target instance
* share the same set of default Params.
*
* @param to the target instance, which should work with the same set of default Params as this
* source instance
* @param extra extra params to be copied to the target's [[paramMap]]
* @return the target instance with param values copied
*/
protected def copyValues[T <: Params](to: T, extra: ParamMap = ParamMap.empty): T = {
val map = extractParamMap(extra)
val map = paramMap ++ extra
params.foreach { param =>
// copy default Params
if (defaultParamMap.contains(param) && to.hasParam(param.name)) {
to.defaultParamMap.put(to.getParam(param.name), defaultParamMap(param))
}
// copy explicitly set Params
if (map.contains(param) && to.hasParam(param.name)) {
to.set(param.name, map(param))
}

View file

@ -200,6 +200,14 @@ class ParamsSuite extends SparkFunSuite {
val inArray = ParamValidators.inArray[Int](Array(1, 2))
assert(inArray(1) && inArray(2) && !inArray(0))
}
test("Params.copyValues") {
val t = new TestParams()
val t2 = t.copy(ParamMap.empty)
assert(!t2.isSet(t2.maxIter))
val t3 = t.copy(ParamMap(t.maxIter -> 20))
assert(t3.isSet(t3.maxIter))
}
}
object ParamsSuite extends SparkFunSuite {