[SPARK-15771][ML][EXAMPLES] Use 'accuracy' rather than 'precision' in many ML examples
## What changes were proposed in this pull request? Since [SPARK-15617](https://issues.apache.org/jira/browse/SPARK-15617) deprecated ```precision``` in ```MulticlassClassificationEvaluator```, many ML examples broken. ```python pyspark.sql.utils.IllegalArgumentException: u'MulticlassClassificationEvaluator_4c3bb1d73d8cc0cedae6 parameter metricName given invalid value precision.' ``` We should use ```accuracy``` to replace ```precision``` in these examples. ## How was this patch tested? Offline tests. Author: Yanbo Liang <ybliang8@gmail.com> Closes #13519 from yanboliang/spark-15771.
This commit is contained in:
parent
fd8af39713
commit
a95252823e
|
@ -90,7 +90,7 @@ public class JavaDecisionTreeClassificationExample {
|
|||
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("indexedLabel")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision");
|
||||
.setMetricName("accuracy");
|
||||
double accuracy = evaluator.evaluate(predictions);
|
||||
System.out.println("Test Error = " + (1.0 - accuracy));
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ public class JavaGradientBoostedTreeClassifierExample {
|
|||
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("indexedLabel")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision");
|
||||
.setMetricName("accuracy");
|
||||
double accuracy = evaluator.evaluate(predictions);
|
||||
System.out.println("Test Error = " + (1.0 - accuracy));
|
||||
|
||||
|
|
|
@ -57,12 +57,12 @@ public class JavaMultilayerPerceptronClassifierExample {
|
|||
.setMaxIter(100);
|
||||
// train the model
|
||||
MultilayerPerceptronClassificationModel model = trainer.fit(train);
|
||||
// compute precision on the test set
|
||||
// compute accuracy on the test set
|
||||
Dataset<Row> result = model.transform(test);
|
||||
Dataset<Row> predictionAndLabels = result.select("prediction", "label");
|
||||
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
|
||||
.setMetricName("precision");
|
||||
System.out.println("Precision = " + evaluator.evaluate(predictionAndLabels));
|
||||
.setMetricName("accuracy");
|
||||
System.out.println("Accuracy = " + evaluator.evaluate(predictionAndLabels));
|
||||
// $example off$
|
||||
|
||||
spark.stop();
|
||||
|
|
|
@ -50,12 +50,12 @@ public class JavaNaiveBayesExample {
|
|||
NaiveBayes nb = new NaiveBayes();
|
||||
// train the model
|
||||
NaiveBayesModel model = nb.fit(train);
|
||||
// compute precision on the test set
|
||||
// compute accuracy on the test set
|
||||
Dataset<Row> result = model.transform(test);
|
||||
Dataset<Row> predictionAndLabels = result.select("prediction", "label");
|
||||
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
|
||||
.setMetricName("precision");
|
||||
System.out.println("Precision = " + evaluator.evaluate(predictionAndLabels));
|
||||
.setMetricName("accuracy");
|
||||
System.out.println("Accuracy = " + evaluator.evaluate(predictionAndLabels));
|
||||
// $example off$
|
||||
|
||||
spark.stop();
|
||||
|
|
|
@ -71,11 +71,11 @@ public class JavaOneVsRestExample {
|
|||
|
||||
// obtain evaluator.
|
||||
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
|
||||
.setMetricName("precision");
|
||||
.setMetricName("accuracy");
|
||||
|
||||
// compute the classification error on test data.
|
||||
double precision = evaluator.evaluate(predictions);
|
||||
System.out.println("Test Error : " + (1 - precision));
|
||||
double accuracy = evaluator.evaluate(predictions);
|
||||
System.out.println("Test Error : " + (1 - accuracy));
|
||||
// $example off$
|
||||
|
||||
spark.stop();
|
||||
|
|
|
@ -88,7 +88,7 @@ public class JavaRandomForestClassifierExample {
|
|||
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("indexedLabel")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision");
|
||||
.setMetricName("accuracy");
|
||||
double accuracy = evaluator.evaluate(predictions);
|
||||
System.out.println("Test Error = " + (1.0 - accuracy));
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Select (prediction, true label) and compute test error
|
||||
evaluator = MulticlassClassificationEvaluator(
|
||||
labelCol="indexedLabel", predictionCol="prediction", metricName="precision")
|
||||
labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
|
||||
accuracy = evaluator.evaluate(predictions)
|
||||
print("Test Error = %g " % (1.0 - accuracy))
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Select (prediction, true label) and compute test error
|
||||
evaluator = MulticlassClassificationEvaluator(
|
||||
labelCol="indexedLabel", predictionCol="prediction", metricName="precision")
|
||||
labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
|
||||
accuracy = evaluator.evaluate(predictions)
|
||||
print("Test Error = %g" % (1.0 - accuracy))
|
||||
|
||||
|
|
|
@ -43,11 +43,11 @@ if __name__ == "__main__":
|
|||
trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234)
|
||||
# train the model
|
||||
model = trainer.fit(train)
|
||||
# compute precision on the test set
|
||||
# compute accuracy on the test set
|
||||
result = model.transform(test)
|
||||
predictionAndLabels = result.select("prediction", "label")
|
||||
evaluator = MulticlassClassificationEvaluator(metricName="precision")
|
||||
print("Precision:" + str(evaluator.evaluate(predictionAndLabels)))
|
||||
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
|
||||
print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels)))
|
||||
# $example off$
|
||||
|
||||
spark.stop()
|
||||
|
|
|
@ -43,11 +43,11 @@ if __name__ == "__main__":
|
|||
|
||||
# train the model
|
||||
model = nb.fit(train)
|
||||
# compute precision on the test set
|
||||
# compute accuracy on the test set
|
||||
result = model.transform(test)
|
||||
predictionAndLabels = result.select("prediction", "label")
|
||||
evaluator = MulticlassClassificationEvaluator(metricName="precision")
|
||||
print("Precision:" + str(evaluator.evaluate(predictionAndLabels)))
|
||||
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
|
||||
print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels)))
|
||||
# $example off$
|
||||
|
||||
spark.stop()
|
||||
|
|
|
@ -58,11 +58,11 @@ if __name__ == "__main__":
|
|||
predictions = ovrModel.transform(test)
|
||||
|
||||
# obtain evaluator.
|
||||
evaluator = MulticlassClassificationEvaluator(metricName="precision")
|
||||
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
|
||||
|
||||
# compute the classification error on test data.
|
||||
precision = evaluator.evaluate(predictions)
|
||||
print("Test Error : " + str(1 - precision))
|
||||
accuracy = evaluator.evaluate(predictions)
|
||||
print("Test Error : " + str(1 - accuracy))
|
||||
# $example off$
|
||||
|
||||
spark.stop()
|
||||
|
|
|
@ -66,7 +66,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Select (prediction, true label) and compute test error
|
||||
evaluator = MulticlassClassificationEvaluator(
|
||||
labelCol="indexedLabel", predictionCol="prediction", metricName="precision")
|
||||
labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
|
||||
accuracy = evaluator.evaluate(predictions)
|
||||
print("Test Error = %g" % (1.0 - accuracy))
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ object DecisionTreeClassificationExample {
|
|||
val evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("indexedLabel")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision")
|
||||
.setMetricName("accuracy")
|
||||
val accuracy = evaluator.evaluate(predictions)
|
||||
println("Test Error = " + (1.0 - accuracy))
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ object GradientBoostedTreeClassifierExample {
|
|||
val evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("indexedLabel")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision")
|
||||
.setMetricName("accuracy")
|
||||
val accuracy = evaluator.evaluate(predictions)
|
||||
println("Test Error = " + (1.0 - accuracy))
|
||||
|
||||
|
|
|
@ -55,12 +55,12 @@ object MultilayerPerceptronClassifierExample {
|
|||
.setMaxIter(100)
|
||||
// train the model
|
||||
val model = trainer.fit(train)
|
||||
// compute precision on the test set
|
||||
// compute accuracy on the test set
|
||||
val result = model.transform(test)
|
||||
val predictionAndLabels = result.select("prediction", "label")
|
||||
val evaluator = new MulticlassClassificationEvaluator()
|
||||
.setMetricName("precision")
|
||||
println("Precision:" + evaluator.evaluate(predictionAndLabels))
|
||||
.setMetricName("accuracy")
|
||||
println("Accuracy: " + evaluator.evaluate(predictionAndLabels))
|
||||
// $example off$
|
||||
|
||||
spark.stop()
|
||||
|
|
|
@ -49,9 +49,9 @@ object NaiveBayesExample {
|
|||
val evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("label")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision")
|
||||
val precision = evaluator.evaluate(predictions)
|
||||
println("Precision:" + precision)
|
||||
.setMetricName("accuracy")
|
||||
val accuracy = evaluator.evaluate(predictions)
|
||||
println("Accuracy: " + accuracy)
|
||||
// $example off$
|
||||
|
||||
spark.stop()
|
||||
|
|
|
@ -65,11 +65,11 @@ object OneVsRestExample {
|
|||
|
||||
// obtain evaluator.
|
||||
val evaluator = new MulticlassClassificationEvaluator()
|
||||
.setMetricName("precision")
|
||||
.setMetricName("accuracy")
|
||||
|
||||
// compute the classification error on test data.
|
||||
val precision = evaluator.evaluate(predictions)
|
||||
println(s"Test Error : ${1 - precision}")
|
||||
val accuracy = evaluator.evaluate(predictions)
|
||||
println(s"Test Error : ${1 - accuracy}")
|
||||
// $example off$
|
||||
|
||||
spark.stop()
|
||||
|
|
|
@ -83,7 +83,7 @@ object RandomForestClassifierExample {
|
|||
val evaluator = new MulticlassClassificationEvaluator()
|
||||
.setLabelCol("indexedLabel")
|
||||
.setPredictionCol("prediction")
|
||||
.setMetricName("precision")
|
||||
.setMetricName("accuracy")
|
||||
val accuracy = evaluator.evaluate(predictions)
|
||||
println("Test Error = " + (1.0 - accuracy))
|
||||
|
||||
|
|
|
@ -265,7 +265,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
|
|||
"""
|
||||
metricName = Param(Params._dummy(), "metricName",
|
||||
"metric name in evaluation "
|
||||
"(f1|precision|recall|weightedPrecision|weightedRecall|accuracy)",
|
||||
"(f1|weightedPrecision|weightedRecall|accuracy)",
|
||||
typeConverter=TypeConverters.toString)
|
||||
|
||||
@keyword_only
|
||||
|
|
Loading…
Reference in a new issue