spark-instrumented-optimizer/python/pyspark/ml/evaluation.py
Sean Owen eb037a8180 [SPARK-28855][CORE][ML][SQL][STREAMING] Remove outdated usages of Experimental, Evolving annotations
### What changes were proposed in this pull request?

The Experimental and Evolving annotations are both (like Unstable) used to express that a an API may change. However there are many things in the code that have been marked that way since even Spark 1.x. Per the dev thread, anything introduced at or before Spark 2.3.0 is pretty much 'stable' in that it would not change without a deprecation cycle. Therefore I'd like to remove most of these annotations. And, remove the `:: Experimental ::` scaladoc tag too. And likewise for Python, R.

The changes below can be summarized as:
- Generally, anything introduced at or before Spark 2.3.0 has been unmarked as neither Evolving nor Experimental
- Obviously experimental items like DSv2, Barrier mode, ExperimentalMethods are untouched
- I _did_ unmark a few MLlib classes introduced in 2.4, as I am quite confident they're not going to change (e.g. KolmogorovSmirnovTest, PowerIterationClustering)

It's a big change to review, so I'd suggest scanning the list of _files_ changed to see if any area seems like it should remain partly experimental and examine those.

### Why are the changes needed?

Many of these annotations are incorrect; the APIs are de facto stable. Leaving them also makes legitimate usages of the annotations less meaningful.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #25558 from srowen/SPARK-28855.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
2019-09-01 10:15:00 -05:00

701 lines
26 KiB
Python

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
from abc import abstractmethod, ABCMeta
from pyspark import since, keyword_only
from pyspark.ml.wrapper import JavaParams
from pyspark.ml.param import Param, Params, TypeConverters
from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol, \
HasFeaturesCol, HasWeightCol
from pyspark.ml.common import inherit_doc
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
'MulticlassClassificationEvaluator', 'MultilabelClassificationEvaluator',
'ClusteringEvaluator', 'RankingEvaluator']
@inherit_doc
class Evaluator(Params):
"""
Base class for evaluators that compute metrics from predictions.
.. versionadded:: 1.4.0
"""
__metaclass__ = ABCMeta
@abstractmethod
def _evaluate(self, dataset):
"""
Evaluates the output.
:param dataset: a dataset that contains labels/observations and
predictions
:return: metric
"""
raise NotImplementedError()
@since("1.4.0")
def evaluate(self, dataset, params=None):
"""
Evaluates the output with optional parameters.
:param dataset: a dataset that contains labels/observations and
predictions
:param params: an optional param map that overrides embedded
params
:return: metric
"""
if params is None:
params = dict()
if isinstance(params, dict):
if params:
return self.copy(params)._evaluate(dataset)
else:
return self._evaluate(dataset)
else:
raise ValueError("Params must be a param map but got %s." % type(params))
@since("1.5.0")
def isLargerBetter(self):
"""
Indicates whether the metric returned by :py:meth:`evaluate` should be maximized
(True, default) or minimized (False).
A given evaluator may support multiple metrics which may be maximized or minimized.
"""
return True
@inherit_doc
class JavaEvaluator(JavaParams, Evaluator):
"""
Base class for :py:class:`Evaluator`s that wrap Java/Scala
implementations.
"""
__metaclass__ = ABCMeta
def _evaluate(self, dataset):
"""
Evaluates the output.
:param dataset: a dataset that contains labels/observations and predictions.
:return: evaluation metric
"""
self._transfer_params_to_java()
return self._java_obj.evaluate(dataset._jdf)
def isLargerBetter(self):
self._transfer_params_to_java()
return self._java_obj.isLargerBetter()
@inherit_doc
class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol, HasWeightCol,
JavaMLReadable, JavaMLWritable):
"""
Evaluator for binary classification, which expects two input columns: rawPrediction and label.
The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label
1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
>>> from pyspark.ml.linalg import Vectors
>>> scoreAndLabels = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1]),
... [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
>>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
...
>>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw")
>>> evaluator.evaluate(dataset)
0.70...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
0.83...
>>> bce_path = temp_path + "/bce"
>>> evaluator.save(bce_path)
>>> evaluator2 = BinaryClassificationEvaluator.load(bce_path)
>>> str(evaluator2.getRawPredictionCol())
'raw'
>>> scoreAndLabelsAndWeight = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1], x[2]),
... [(0.1, 0.0, 1.0), (0.1, 1.0, 0.9), (0.4, 0.0, 0.7), (0.6, 0.0, 0.9),
... (0.6, 1.0, 1.0), (0.6, 1.0, 0.3), (0.8, 1.0, 1.0)])
>>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["raw", "label", "weight"])
...
>>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw", weightCol="weight")
>>> evaluator.evaluate(dataset)
0.70...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
0.82...
.. versionadded:: 1.4.0
"""
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation (areaUnderROC|areaUnderPR)",
typeConverter=TypeConverters.toString)
@keyword_only
def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
metricName="areaUnderROC", weightCol=None):
"""
__init__(self, rawPredictionCol="rawPrediction", labelCol="label", \
metricName="areaUnderROC", weightCol=None)
"""
super(BinaryClassificationEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
self._setDefault(metricName="areaUnderROC")
kwargs = self._input_kwargs
self._set(**kwargs)
@since("1.4.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
return self._set(metricName=value)
@since("1.4.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)
@keyword_only
@since("1.4.0")
def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
metricName="areaUnderROC", weightCol=None):
"""
setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \
metricName="areaUnderROC", weightCol=None)
Sets params for binary classification evaluator.
"""
kwargs = self._input_kwargs
return self._set(**kwargs)
@inherit_doc
class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
JavaMLReadable, JavaMLWritable):
"""
Evaluator for Regression, which expects input columns prediction, label
and an optional weight column.
>>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5),
... (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
>>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
...
>>> evaluator = RegressionEvaluator(predictionCol="raw")
>>> evaluator.evaluate(dataset)
2.842...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"})
0.993...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
2.649...
>>> re_path = temp_path + "/re"
>>> evaluator.save(re_path)
>>> evaluator2 = RegressionEvaluator.load(re_path)
>>> str(evaluator2.getPredictionCol())
'raw'
>>> scoreAndLabelsAndWeight = [(-28.98343821, -27.0, 1.0), (20.21491975, 21.5, 0.8),
... (-25.98418959, -22.0, 1.0), (30.69731842, 33.0, 0.6), (74.69283752, 71.0, 0.2)]
>>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["raw", "label", "weight"])
...
>>> evaluator = RegressionEvaluator(predictionCol="raw", weightCol="weight")
>>> evaluator.evaluate(dataset)
2.740...
.. versionadded:: 1.4.0
"""
metricName = Param(Params._dummy(), "metricName",
"""metric name in evaluation - one of:
rmse - root mean squared error (default)
mse - mean squared error
r2 - r^2 metric
mae - mean absolute error.""",
typeConverter=TypeConverters.toString)
@keyword_only
def __init__(self, predictionCol="prediction", labelCol="label",
metricName="rmse", weightCol=None):
"""
__init__(self, predictionCol="prediction", labelCol="label", \
metricName="rmse", weightCol=None)
"""
super(RegressionEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
self._setDefault(metricName="rmse")
kwargs = self._input_kwargs
self._set(**kwargs)
@since("1.4.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
return self._set(metricName=value)
@since("1.4.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)
@keyword_only
@since("1.4.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="rmse", weightCol=None):
"""
setParams(self, predictionCol="prediction", labelCol="label", \
metricName="rmse", weightCol=None)
Sets params for regression evaluator.
"""
kwargs = self._input_kwargs
return self._set(**kwargs)
@inherit_doc
class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
JavaMLReadable, JavaMLWritable):
"""
Evaluator for Multiclass Classification, which expects two input
columns: prediction and label.
>>> scoreAndLabels = [(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
... (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]
>>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
...
>>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction")
>>> evaluator.evaluate(dataset)
0.66...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
0.66...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "truePositiveRateByLabel",
... evaluator.metricLabel: 1.0})
0.75...
>>> mce_path = temp_path + "/mce"
>>> evaluator.save(mce_path)
>>> evaluator2 = MulticlassClassificationEvaluator.load(mce_path)
>>> str(evaluator2.getPredictionCol())
'prediction'
>>> scoreAndLabelsAndWeight = [(0.0, 0.0, 1.0), (0.0, 1.0, 1.0), (0.0, 0.0, 1.0),
... (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0),
... (2.0, 2.0, 1.0), (2.0, 0.0, 1.0)]
>>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["prediction", "label", "weight"])
...
>>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction",
... weightCol="weight")
>>> evaluator.evaluate(dataset)
0.66...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
0.66...
.. versionadded:: 1.5.0
"""
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation "
"(f1|accuracy|weightedPrecision|weightedRecall|weightedTruePositiveRate|"
"weightedFalsePositiveRate|weightedFMeasure|truePositiveRateByLabel|"
"falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel)",
typeConverter=TypeConverters.toString)
metricLabel = Param(Params._dummy(), "metricLabel",
"The class whose metric will be computed in truePositiveRateByLabel|"
"falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel."
" Must be >= 0. The default value is 0.",
typeConverter=TypeConverters.toFloat)
beta = Param(Params._dummy(), "beta",
"The beta value used in weightedFMeasure|fMeasureByLabel."
" Must be > 0. The default value is 1.",
typeConverter=TypeConverters.toFloat)
@keyword_only
def __init__(self, predictionCol="prediction", labelCol="label",
metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0):
"""
__init__(self, predictionCol="prediction", labelCol="label", \
metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0)
"""
super(MulticlassClassificationEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid)
self._setDefault(metricName="f1", metricLabel=0.0, beta=1.0)
kwargs = self._input_kwargs
self._set(**kwargs)
@since("1.5.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
return self._set(metricName=value)
@since("1.5.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)
@since("3.0.0")
def setMetricLabel(self, value):
"""
Sets the value of :py:attr:`metricLabel`.
"""
return self._set(metricLabel=value)
@since("3.0.0")
def getMetricLabel(self):
"""
Gets the value of metricLabel or its default value.
"""
return self.getOrDefault(self.metricLabel)
@since("3.0.0")
def setBeta(self, value):
"""
Sets the value of :py:attr:`beta`.
"""
return self._set(beta=value)
@since("3.0.0")
def getBeta(self):
"""
Gets the value of beta or its default value.
"""
return self.getOrDefault(self.beta)
@keyword_only
@since("1.5.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0):
"""
setParams(self, predictionCol="prediction", labelCol="label", \
metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0)
Sets params for multiclass classification evaluator.
"""
kwargs = self._input_kwargs
return self._set(**kwargs)
@inherit_doc
class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
JavaMLReadable, JavaMLWritable):
"""
.. note:: Experimental
Evaluator for Multilabel Classification, which expects two input
columns: prediction and label.
>>> scoreAndLabels = [([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),
... ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),
... ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])]
>>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
...
>>> evaluator = MultilabelClassificationEvaluator(predictionCol="prediction")
>>> evaluator.evaluate(dataset)
0.63...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
0.54...
>>> mlce_path = temp_path + "/mlce"
>>> evaluator.save(mlce_path)
>>> evaluator2 = MultilabelClassificationEvaluator.load(mlce_path)
>>> str(evaluator2.getPredictionCol())
'prediction'
.. versionadded:: 3.0.0
"""
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation "
"(subsetAccuracy|accuracy|hammingLoss|precision|recall|f1Measure|"
"precisionByLabel|recallByLabel|f1MeasureByLabel|microPrecision|"
"microRecall|microF1Measure)",
typeConverter=TypeConverters.toString)
metricLabel = Param(Params._dummy(), "metricLabel",
"The class whose metric will be computed in precisionByLabel|"
"recallByLabel|f1MeasureByLabel. "
"Must be >= 0. The default value is 0.",
typeConverter=TypeConverters.toFloat)
@keyword_only
def __init__(self, predictionCol="prediction", labelCol="label",
metricName="f1Measure", metricLabel=0.0):
"""
__init__(self, predictionCol="prediction", labelCol="label", \
metricName="f1Measure", metricLabel=0.0)
"""
super(MultilabelClassificationEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.MultilabelClassificationEvaluator", self.uid)
self._setDefault(metricName="f1Measure", metricLabel=0.0)
kwargs = self._input_kwargs
self._set(**kwargs)
@since("3.0.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
return self._set(metricName=value)
@since("3.0.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)
@since("3.0.0")
def setMetricLabel(self, value):
"""
Sets the value of :py:attr:`metricLabel`.
"""
return self._set(metricLabel=value)
@since("3.0.0")
def getMetricLabel(self):
"""
Gets the value of metricLabel or its default value.
"""
return self.getOrDefault(self.metricLabel)
@keyword_only
@since("3.0.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="f1Measure", metricLabel=0.0):
"""
setParams(self, predictionCol="prediction", labelCol="label", \
metricName="f1Measure", metricLabel=0.0)
Sets params for multilabel classification evaluator.
"""
kwargs = self._input_kwargs
return self._set(**kwargs)
@inherit_doc
class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol,
JavaMLReadable, JavaMLWritable):
"""
Evaluator for Clustering results, which expects two input
columns: prediction and features. The metric computes the Silhouette
measure using the squared Euclidean distance.
The Silhouette is a measure for the validation of the consistency
within clusters. It ranges between 1 and -1, where a value close to
1 means that the points in a cluster are close to the other points
in the same cluster and far from the points of the other clusters.
>>> from pyspark.ml.linalg import Vectors
>>> featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
... [([0.0, 0.5], 0.0), ([0.5, 0.0], 0.0), ([10.0, 11.0], 1.0),
... ([10.5, 11.5], 1.0), ([1.0, 1.0], 0.0), ([8.0, 6.0], 1.0)])
>>> dataset = spark.createDataFrame(featureAndPredictions, ["features", "prediction"])
...
>>> evaluator = ClusteringEvaluator(predictionCol="prediction")
>>> evaluator.evaluate(dataset)
0.9079...
>>> ce_path = temp_path + "/ce"
>>> evaluator.save(ce_path)
>>> evaluator2 = ClusteringEvaluator.load(ce_path)
>>> str(evaluator2.getPredictionCol())
'prediction'
.. versionadded:: 2.3.0
"""
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation (silhouette)",
typeConverter=TypeConverters.toString)
distanceMeasure = Param(Params._dummy(), "distanceMeasure", "The distance measure. " +
"Supported options: 'squaredEuclidean' and 'cosine'.",
typeConverter=TypeConverters.toString)
@keyword_only
def __init__(self, predictionCol="prediction", featuresCol="features",
metricName="silhouette", distanceMeasure="squaredEuclidean"):
"""
__init__(self, predictionCol="prediction", featuresCol="features", \
metricName="silhouette", distanceMeasure="squaredEuclidean")
"""
super(ClusteringEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.ClusteringEvaluator", self.uid)
self._setDefault(metricName="silhouette", distanceMeasure="squaredEuclidean")
kwargs = self._input_kwargs
self._set(**kwargs)
@since("2.3.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
return self._set(metricName=value)
@since("2.3.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)
@keyword_only
@since("2.3.0")
def setParams(self, predictionCol="prediction", featuresCol="features",
metricName="silhouette", distanceMeasure="squaredEuclidean"):
"""
setParams(self, predictionCol="prediction", featuresCol="features", \
metricName="silhouette", distanceMeasure="squaredEuclidean")
Sets params for clustering evaluator.
"""
kwargs = self._input_kwargs
return self._set(**kwargs)
@since("2.4.0")
def setDistanceMeasure(self, value):
"""
Sets the value of :py:attr:`distanceMeasure`.
"""
return self._set(distanceMeasure=value)
@since("2.4.0")
def getDistanceMeasure(self):
"""
Gets the value of `distanceMeasure`
"""
return self.getOrDefault(self.distanceMeasure)
@inherit_doc
class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
JavaMLReadable, JavaMLWritable):
"""
.. note:: Experimental
Evaluator for Ranking, which expects two input
columns: prediction and label.
>>> scoreAndLabels = [([1.0, 6.0, 2.0, 7.0, 8.0, 3.0, 9.0, 10.0, 4.0, 5.0],
... [1.0, 2.0, 3.0, 4.0, 5.0]),
... ([4.0, 1.0, 5.0, 6.0, 2.0, 7.0, 3.0, 8.0, 9.0, 10.0], [1.0, 2.0, 3.0]),
... ([1.0, 2.0, 3.0, 4.0, 5.0], [])]
>>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
...
>>> evaluator = RankingEvaluator(predictionCol="prediction")
>>> evaluator.evaluate(dataset)
0.35...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "precisionAtK", evaluator.k: 2})
0.33...
>>> ranke_path = temp_path + "/ranke"
>>> evaluator.save(ranke_path)
>>> evaluator2 = RankingEvaluator.load(ranke_path)
>>> str(evaluator2.getPredictionCol())
'prediction'
.. versionadded:: 3.0.0
"""
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation "
"(meanAveragePrecision|meanAveragePrecisionAtK|"
"precisionAtK|ndcgAtK|recallAtK)",
typeConverter=TypeConverters.toString)
k = Param(Params._dummy(), "k",
"The ranking position value used in meanAveragePrecisionAtK|precisionAtK|"
"ndcgAtK|recallAtK. Must be > 0. The default value is 10.",
typeConverter=TypeConverters.toInt)
@keyword_only
def __init__(self, predictionCol="prediction", labelCol="label",
metricName="meanAveragePrecision", k=10):
"""
__init__(self, predictionCol="prediction", labelCol="label", \
metricName="meanAveragePrecision", k=10)
"""
super(RankingEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.RankingEvaluator", self.uid)
self._setDefault(metricName="meanAveragePrecision", k=10)
kwargs = self._input_kwargs
self._set(**kwargs)
@since("3.0.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
return self._set(metricName=value)
@since("3.0.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)
@since("3.0.0")
def setK(self, value):
"""
Sets the value of :py:attr:`k`.
"""
return self._set(k=value)
@since("3.0.0")
def getK(self):
"""
Gets the value of k or its default value.
"""
return self.getOrDefault(self.k)
@keyword_only
@since("3.0.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="meanAveragePrecision", k=10):
"""
setParams(self, predictionCol="prediction", labelCol="label", \
metricName="meanAveragePrecision", k=10)
Sets params for ranking evaluator.
"""
kwargs = self._input_kwargs
return self._set(**kwargs)
if __name__ == "__main__":
import doctest
import tempfile
import pyspark.ml.evaluation
from pyspark.sql import SparkSession
globs = pyspark.ml.evaluation.__dict__.copy()
# The small batch size here ensures that we see multiple batches,
# even in these small test examples:
spark = SparkSession.builder\
.master("local[2]")\
.appName("ml.evaluation tests")\
.getOrCreate()
globs['spark'] = spark
temp_path = tempfile.mkdtemp()
globs['temp_path'] = temp_path
try:
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
spark.stop()
finally:
from shutil import rmtree
try:
rmtree(temp_path)
except OSError:
pass
if failure_count:
sys.exit(-1)