[SPARK-12349][ML] Make spark.ml PCAModel load backwards compatible
Only load explainedVariance in PCAModel if it was written with Spark > 1.6.x jkbradley is this kind of what you had in mind? Author: Sean Owen <sowen@cloudera.com> Closes #10327 from srowen/SPARK-12349.
This commit is contained in:
parent
ce1798b3af
commit
d0f695089e
|
@ -167,14 +167,37 @@ object PCAModel extends MLReadable[PCAModel] {
|
|||
|
||||
private val className = classOf[PCAModel].getName
|
||||
|
||||
/**
|
||||
* Loads a [[PCAModel]] from data located at the input path. Note that the model includes an
|
||||
* `explainedVariance` member that is not recorded by Spark 1.6 and earlier. A model
|
||||
* can be loaded from such older data but will have an empty vector for
|
||||
* `explainedVariance`.
|
||||
*
|
||||
* @param path path to serialized model data
|
||||
* @return a [[PCAModel]]
|
||||
*/
|
||||
override def load(path: String): PCAModel = {
|
||||
val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
|
||||
|
||||
// explainedVariance field is not present in Spark <= 1.6
|
||||
val versionRegex = "([0-9]+)\\.([0-9])+.*".r
|
||||
val hasExplainedVariance = metadata.sparkVersion match {
|
||||
case versionRegex(major, minor) =>
|
||||
(major.toInt >= 2 || (major.toInt == 1 && minor.toInt > 6))
|
||||
case _ => false
|
||||
}
|
||||
|
||||
val dataPath = new Path(path, "data").toString
|
||||
val Row(pc: DenseMatrix, explainedVariance: DenseVector) =
|
||||
sqlContext.read.parquet(dataPath)
|
||||
.select("pc", "explainedVariance")
|
||||
.head()
|
||||
val model = new PCAModel(metadata.uid, pc, explainedVariance)
|
||||
val model = if (hasExplainedVariance) {
|
||||
val Row(pc: DenseMatrix, explainedVariance: DenseVector) =
|
||||
sqlContext.read.parquet(dataPath)
|
||||
.select("pc", "explainedVariance")
|
||||
.head()
|
||||
new PCAModel(metadata.uid, pc, explainedVariance)
|
||||
} else {
|
||||
val Row(pc: DenseMatrix) = sqlContext.read.parquet(dataPath).select("pc").head()
|
||||
new PCAModel(metadata.uid, pc, Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector])
|
||||
}
|
||||
DefaultParamsReader.getAndSetParams(model, metadata)
|
||||
model
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue