[SPARK-13257][IMPROVEMENT] Refine naive Bayes example by checking model after loading it

Refine naive Bayes example by checking model after loading it Author: movelikeriver <mars.lenjoy@gmail.com> Closes #11125 from movelikeriver/naive_bayes.
2016-02-22 23:58:54 -08:00 · 2016-02-22 23:58:54 -08:00 · 5cd3e6f60b
parent 764ca18037
commit 5cd3e6f60b
1 changed files with 15 additions and 2 deletions
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@ -17,9 +17,15 @@

 """
 NaiveBayes Example.
+
+Usage:
+  `spark-submit --master local[4] examples/src/main/python/mllib/naive_bayes_example.py`
 """
+
 from __future__ import print_function

+import shutil
+
 from pyspark import SparkContext
 # $example on$
 from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
@ -50,8 +56,15 @@ if __name__ == "__main__":
    # Make prediction and test accuracy.
    predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+    print('model accuracy {}'.format(accuracy))

    # Save and load model
-    model.save(sc, "target/tmp/myNaiveBayesModel")
-    sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel")
+    output_dir = 'target/tmp/myNaiveBayesModel'
+    shutil.rmtree(output_dir, ignore_errors=True)
+    model.save(sc, output_dir)
+    sameModel = NaiveBayesModel.load(sc, output_dir)
+    predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
+    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+    print('sameModel accuracy {}'.format(accuracy))
+
    # $example off$