[SPARK-8043] [MLLIB] [DOC] update NaiveBayes and SVM examples in doc
jira: https://issues.apache.org/jira/browse/SPARK-8043 I found some issues during testing the save/load examples in markdown Documents, as a part of 1.4 QA plan Author: Yuhao Yang <hhbyyh@gmail.com> Closes #6584 from hhbyyh/naiveDocExample and squashes the following commits: a01a206 [Yuhao Yang] fix for Gaussian mixture 2fb8b96 [Yuhao Yang] update NaiveBayes and SVM examples in doc
This commit is contained in:
parent
ccaa823290
commit
43adbd5611
|
@ -249,11 +249,11 @@ public class GaussianMixtureExample {
|
|||
GaussianMixtureModel gmm = new GaussianMixture().setK(2).run(parsedData.rdd());
|
||||
|
||||
// Save and load GaussianMixtureModel
|
||||
gmm.save(sc, "myGMMModel")
|
||||
GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc, "myGMMModel")
|
||||
gmm.save(sc.sc(), "myGMMModel");
|
||||
GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc.sc(), "myGMMModel");
|
||||
// Output the parameters of the mixture model
|
||||
for(int j=0; j<gmm.k(); j++) {
|
||||
System.out.println("weight=%f\nmu=%s\nsigma=\n%s\n",
|
||||
System.out.printf("weight=%f\nmu=%s\nsigma=\n%s\n",
|
||||
gmm.weights()[j], gmm.gaussians()[j].mu(), gmm.gaussians()[j].sigma());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -163,11 +163,8 @@ object, and make predictions with the resulting model to compute the training
|
|||
error.
|
||||
|
||||
{% highlight scala %}
|
||||
import org.apache.spark.SparkContext
|
||||
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
|
||||
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
|
||||
import org.apache.spark.mllib.regression.LabeledPoint
|
||||
import org.apache.spark.mllib.linalg.Vectors
|
||||
import org.apache.spark.mllib.util.MLUtils
|
||||
|
||||
// Load training data in LIBSVM format.
|
||||
|
@ -231,15 +228,13 @@ calling `.rdd()` on your `JavaRDD` object. A self-contained application example
|
|||
that is equivalent to the provided example in Scala is given bellow:
|
||||
|
||||
{% highlight java %}
|
||||
import java.util.Random;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import org.apache.spark.api.java.*;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
import org.apache.spark.mllib.classification.*;
|
||||
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
|
||||
import org.apache.spark.mllib.linalg.Vector;
|
||||
|
||||
import org.apache.spark.mllib.regression.LabeledPoint;
|
||||
import org.apache.spark.mllib.util.MLUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -282,8 +277,8 @@ public class SVMClassifier {
|
|||
System.out.println("Area under ROC = " + auROC);
|
||||
|
||||
// Save and load model
|
||||
model.save(sc.sc(), "myModelPath");
|
||||
SVMModel sameModel = SVMModel.load(sc.sc(), "myModelPath");
|
||||
model.save(sc, "myModelPath");
|
||||
SVMModel sameModel = SVMModel.load(sc, "myModelPath");
|
||||
}
|
||||
}
|
||||
{% endhighlight %}
|
||||
|
@ -315,15 +310,12 @@ a dependency.
|
|||
</div>
|
||||
|
||||
<div data-lang="python" markdown="1">
|
||||
The following example shows how to load a sample dataset, build Logistic Regression model,
|
||||
The following example shows how to load a sample dataset, build SVM model,
|
||||
and make predictions with the resulting model to compute the training error.
|
||||
|
||||
Note that the Python API does not yet support model save/load but will in the future.
|
||||
|
||||
{% highlight python %}
|
||||
from pyspark.mllib.classification import LogisticRegressionWithSGD
|
||||
from pyspark.mllib.classification import SVMWithSGD, SVMModel
|
||||
from pyspark.mllib.regression import LabeledPoint
|
||||
from numpy import array
|
||||
|
||||
# Load and parse the data
|
||||
def parsePoint(line):
|
||||
|
@ -334,12 +326,16 @@ data = sc.textFile("data/mllib/sample_svm_data.txt")
|
|||
parsedData = data.map(parsePoint)
|
||||
|
||||
# Build the model
|
||||
model = LogisticRegressionWithSGD.train(parsedData)
|
||||
model = SVMWithSGD.train(parsedData, iterations=100)
|
||||
|
||||
# Evaluating the model on training data
|
||||
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
|
||||
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
|
||||
print("Training Error = " + str(trainErr))
|
||||
|
||||
# Save and load model
|
||||
model.save(sc, "myModelPath")
|
||||
sameModel = SVMModel.load(sc, "myModelPath")
|
||||
{% endhighlight %}
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -53,7 +53,7 @@ val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
|
|||
val training = splits(0)
|
||||
val test = splits(1)
|
||||
|
||||
val model = NaiveBayes.train(training, lambda = 1.0, model = "multinomial")
|
||||
val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial")
|
||||
|
||||
val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
|
||||
val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()
|
||||
|
|
Loading…
Reference in a new issue