[MINOR] [MLLIB] rename some functions of PythonMLLibAPI

Keep the same naming conventions for PythonMLLibAPI. Only the following three functions is different from others ```scala trainNaiveBayes trainGaussianMixture trainWord2Vec ``` So change them to ```scala trainNaiveBayesModel trainGaussianMixtureModel trainWord2VecModel ``` It does not affect any users and public APIs, only to make better understand for developer and code hacker. Author: Yanbo Liang <ybliang8@gmail.com> Closes #7011 from yanboliang/py-mllib-api-rename and squashes the following commits: 771ffec [Yanbo Liang] rename some functions of PythonMLLibAPI
2015-06-25 08:13:17 -07:00 · 2015-06-25 08:13:17 -07:00 · 2519dcc33b
parent f9b397f54d
commit 2519dcc33b
4 changed files with 8 additions and 8 deletions
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@ -278,7 +278,7 @@ private[python] class PythonMLLibAPI extends Serializable {
  /**
   * Java stub for NaiveBayes.train()
   */
-  def trainNaiveBayes(
+  def trainNaiveBayesModel(
      data: JavaRDD[LabeledPoint],
      lambda: Double): JList[Object] = {
    val model = NaiveBayes.train(data.rdd, lambda)
@ -346,7 +346,7 @@ private[python] class PythonMLLibAPI extends Serializable {
   * Java stub for Python mllib GaussianMixture.run()
   * Returns a list containing weights, mean and covariance of each mixture component.
   */
-  def trainGaussianMixture(
+  def trainGaussianMixtureModel(
      data: JavaRDD[Vector],
      k: Int,
      convergenceTol: Double,
@ -553,7 +553,7 @@ private[python] class PythonMLLibAPI extends Serializable {
   * @param seed initial seed for random generator
   * @return A handle to java Word2VecModelWrapper instance at python side
   */
-  def trainWord2Vec(
+  def trainWord2VecModel(
      dataJRDD: JavaRDD[java.util.ArrayList[String]],
      vectorSize: Int,
      learningRate: Double,
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@ -581,7 +581,7 @@ class NaiveBayes(object):
        first = data.first()
        if not isinstance(first, LabeledPoint):
            raise ValueError("`data` should be an RDD of LabeledPoint")
-        labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_)
+        labels, pi, theta = callMLlibFunc("trainNaiveBayesModel", data, lambda_)
        return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta))


--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@ -265,9 +265,9 @@ class GaussianMixture(object):
            initialModelWeights = initialModel.weights
            initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)]
            initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)]
-        weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k,
-                                          convergenceTol, maxIterations, seed, initialModelWeights,
-                                          initialModelMu, initialModelSigma)
+        weight, mu, sigma = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector),
+                                          k, convergenceTol, maxIterations, seed,
+                                          initialModelWeights, initialModelMu, initialModelSigma)
        mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)]
        return GaussianMixtureModel(weight, mvg_obj)

--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@ -549,7 +549,7 @@ class Word2Vec(object):
        """
        if not isinstance(data, RDD):
            raise TypeError("data should be an RDD of list of string")
-        jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize),
+        jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize),
                               float(self.learningRate), int(self.numPartitions),
                               int(self.numIterations), int(self.seed),
                               int(self.minCount))