2015-01-28 20:14:23 -05:00
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
|
|
|
|
from abc import ABCMeta
|
2015-05-18 15:02:18 -04:00
|
|
|
import copy
|
2015-01-28 20:14:23 -05:00
|
|
|
|
|
|
|
from pyspark.ml.util import Identifiable
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = ['Param', 'Params']
|
|
|
|
|
|
|
|
|
|
|
|
class Param(object):
|
|
|
|
"""
|
2015-04-16 02:49:42 -04:00
|
|
|
A param with self-contained documentation.
|
2015-01-28 20:14:23 -05:00
|
|
|
"""
|
|
|
|
|
2015-04-16 02:49:42 -04:00
|
|
|
def __init__(self, parent, name, doc):
|
2015-05-18 15:02:18 -04:00
|
|
|
if not isinstance(parent, Identifiable):
|
|
|
|
raise TypeError("Parent must be an Identifiable but got type %s." % type(parent))
|
|
|
|
self.parent = parent.uid
|
2015-01-28 20:14:23 -05:00
|
|
|
self.name = str(name)
|
|
|
|
self.doc = str(doc)
|
|
|
|
|
|
|
|
def __str__(self):
|
2015-04-16 02:49:42 -04:00
|
|
|
return str(self.parent) + "__" + self.name
|
2015-01-28 20:14:23 -05:00
|
|
|
|
|
|
|
def __repr__(self):
|
2015-04-16 02:49:42 -04:00
|
|
|
return "Param(parent=%r, name=%r, doc=%r)" % (self.parent, self.name, self.doc)
|
2015-01-28 20:14:23 -05:00
|
|
|
|
2015-05-18 15:02:18 -04:00
|
|
|
def __hash__(self):
|
|
|
|
return hash(str(self))
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
if isinstance(other, Param):
|
|
|
|
return self.parent == other.parent and self.name == other.name
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
2015-01-28 20:14:23 -05:00
|
|
|
|
|
|
|
class Params(Identifiable):
|
|
|
|
"""
|
|
|
|
Components that take parameters. This also provides an internal
|
|
|
|
param map to store parameter values attached to the instance.
|
|
|
|
"""
|
|
|
|
|
|
|
|
__metaclass__ = ABCMeta
|
|
|
|
|
2015-04-16 02:49:42 -04:00
|
|
|
#: internal param map for user-supplied values param map
|
2015-05-18 15:02:18 -04:00
|
|
|
_paramMap = {}
|
2015-04-16 02:49:42 -04:00
|
|
|
|
|
|
|
#: internal param map for default values
|
2015-05-18 15:02:18 -04:00
|
|
|
_defaultParamMap = {}
|
|
|
|
|
|
|
|
#: value returned by :py:func:`params`
|
|
|
|
_params = None
|
2015-01-28 20:14:23 -05:00
|
|
|
|
|
|
|
@property
|
|
|
|
def params(self):
|
|
|
|
"""
|
2015-04-16 02:49:42 -04:00
|
|
|
Returns all params ordered by name. The default implementation
|
|
|
|
uses :py:func:`dir` to get all attributes of type
|
2015-01-28 20:14:23 -05:00
|
|
|
:py:class:`Param`.
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
if self._params is None:
|
|
|
|
self._params = list(filter(lambda attr: isinstance(attr, Param),
|
|
|
|
[getattr(self, x) for x in dir(self) if x != "params"]))
|
|
|
|
return self._params
|
2015-01-28 20:14:23 -05:00
|
|
|
|
2015-05-18 15:02:18 -04:00
|
|
|
def explainParam(self, param):
|
2015-04-16 02:49:42 -04:00
|
|
|
"""
|
|
|
|
Explains a single param and returns its name, doc, and optional
|
|
|
|
default value and user-supplied value in a string.
|
|
|
|
"""
|
|
|
|
param = self._resolveParam(param)
|
|
|
|
values = []
|
|
|
|
if self.isDefined(param):
|
2015-05-18 15:02:18 -04:00
|
|
|
if param in self._defaultParamMap:
|
|
|
|
values.append("default: %s" % self._defaultParamMap[param])
|
|
|
|
if param in self._paramMap:
|
|
|
|
values.append("current: %s" % self._paramMap[param])
|
2015-04-16 02:49:42 -04:00
|
|
|
else:
|
|
|
|
values.append("undefined")
|
|
|
|
valueStr = "(" + ", ".join(values) + ")"
|
|
|
|
return "%s: %s %s" % (param.name, param.doc, valueStr)
|
|
|
|
|
|
|
|
def explainParams(self):
|
|
|
|
"""
|
|
|
|
Returns the documentation of all params with their optionally
|
|
|
|
default values and user-supplied values.
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
return "\n".join([self.explainParam(param) for param in self.params])
|
2015-04-16 02:49:42 -04:00
|
|
|
|
|
|
|
def getParam(self, paramName):
|
|
|
|
"""
|
|
|
|
Gets a param by its name.
|
|
|
|
"""
|
|
|
|
param = getattr(self, paramName)
|
|
|
|
if isinstance(param, Param):
|
|
|
|
return param
|
|
|
|
else:
|
|
|
|
raise ValueError("Cannot find param with name %s." % paramName)
|
|
|
|
|
|
|
|
def isSet(self, param):
|
|
|
|
"""
|
|
|
|
Checks whether a param is explicitly set by user.
|
|
|
|
"""
|
|
|
|
param = self._resolveParam(param)
|
2015-05-18 15:02:18 -04:00
|
|
|
return param in self._paramMap
|
2015-04-16 02:49:42 -04:00
|
|
|
|
|
|
|
def hasDefault(self, param):
|
|
|
|
"""
|
|
|
|
Checks whether a param has a default value.
|
|
|
|
"""
|
|
|
|
param = self._resolveParam(param)
|
2015-05-18 15:02:18 -04:00
|
|
|
return param in self._defaultParamMap
|
2015-04-16 02:49:42 -04:00
|
|
|
|
|
|
|
def isDefined(self, param):
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
Checks whether a param is explicitly set by user or has
|
|
|
|
a default value.
|
2015-04-16 02:49:42 -04:00
|
|
|
"""
|
|
|
|
return self.isSet(param) or self.hasDefault(param)
|
|
|
|
|
2015-05-18 15:02:18 -04:00
|
|
|
def hasParam(self, paramName):
|
|
|
|
"""
|
|
|
|
Tests whether this instance contains a param with a given
|
|
|
|
(string) name.
|
|
|
|
"""
|
|
|
|
param = self._resolveParam(paramName)
|
|
|
|
return param in self.params
|
|
|
|
|
2015-04-16 02:49:42 -04:00
|
|
|
def getOrDefault(self, param):
|
|
|
|
"""
|
|
|
|
Gets the value of a param in the user-supplied param map or its
|
|
|
|
default value. Raises an error if either is set.
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
param = self._resolveParam(param)
|
|
|
|
if param in self._paramMap:
|
|
|
|
return self._paramMap[param]
|
2015-04-16 02:49:42 -04:00
|
|
|
else:
|
2015-05-18 15:02:18 -04:00
|
|
|
return self._defaultParamMap[param]
|
2015-04-16 02:49:42 -04:00
|
|
|
|
2015-05-18 15:02:18 -04:00
|
|
|
def extractParamMap(self, extra={}):
|
2015-04-16 02:49:42 -04:00
|
|
|
"""
|
|
|
|
Extracts the embedded default param values and user-supplied
|
|
|
|
values, and then merges them with extra values from input into
|
|
|
|
a flat param map, where the latter value is used if there exist
|
|
|
|
conflicts, i.e., with ordering: default param values <
|
2015-05-18 15:02:18 -04:00
|
|
|
user-supplied values < extra.
|
|
|
|
:param extra: extra param values
|
2015-04-16 02:49:42 -04:00
|
|
|
:return: merged param map
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
paramMap = self._defaultParamMap.copy()
|
|
|
|
paramMap.update(self._paramMap)
|
|
|
|
paramMap.update(extra)
|
2015-01-28 20:14:23 -05:00
|
|
|
return paramMap
|
|
|
|
|
2015-05-18 15:02:18 -04:00
|
|
|
def copy(self, extra={}):
|
|
|
|
"""
|
|
|
|
Creates a copy of this instance with the same uid and some
|
|
|
|
extra params. The default implementation creates a
|
|
|
|
shallow copy using :py:func:`copy.copy`, and then copies the
|
|
|
|
embedded and extra parameters over and returns the copy.
|
|
|
|
Subclasses should override this method if the default approach
|
|
|
|
is not sufficient.
|
|
|
|
:param extra: Extra parameters to copy to the new instance
|
|
|
|
:return: Copy of this instance
|
|
|
|
"""
|
|
|
|
that = copy.copy(self)
|
|
|
|
that._paramMap = self.extractParamMap(extra)
|
|
|
|
return that
|
|
|
|
|
2015-04-16 02:49:42 -04:00
|
|
|
def _shouldOwn(self, param):
|
|
|
|
"""
|
|
|
|
Validates that the input param belongs to this Params instance.
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
if not (self.uid == param.parent and self.hasParam(param.name)):
|
2015-04-16 02:49:42 -04:00
|
|
|
raise ValueError("Param %r does not belong to %r." % (param, self))
|
|
|
|
|
|
|
|
def _resolveParam(self, param):
|
|
|
|
"""
|
|
|
|
Resolves a param and validates the ownership.
|
|
|
|
:param param: param name or the param instance, which must
|
|
|
|
belong to this Params instance
|
|
|
|
:return: resolved param instance
|
|
|
|
"""
|
|
|
|
if isinstance(param, Param):
|
|
|
|
self._shouldOwn(param)
|
|
|
|
return param
|
|
|
|
elif isinstance(param, str):
|
|
|
|
return self.getParam(param)
|
|
|
|
else:
|
|
|
|
raise ValueError("Cannot resolve %r as a param." % param)
|
|
|
|
|
2015-01-28 20:14:23 -05:00
|
|
|
@staticmethod
|
|
|
|
def _dummy():
|
|
|
|
"""
|
2015-05-18 15:02:18 -04:00
|
|
|
Returns a dummy Params instance used as a placeholder to
|
|
|
|
generate docs.
|
2015-01-28 20:14:23 -05:00
|
|
|
"""
|
|
|
|
dummy = Params()
|
|
|
|
dummy.uid = "undefined"
|
|
|
|
return dummy
|
2015-02-15 23:29:26 -05:00
|
|
|
|
2015-04-16 02:49:42 -04:00
|
|
|
def _set(self, **kwargs):
|
2015-02-15 23:29:26 -05:00
|
|
|
"""
|
2015-04-16 02:49:42 -04:00
|
|
|
Sets user-supplied params.
|
2015-02-15 23:29:26 -05:00
|
|
|
"""
|
2015-04-16 19:20:57 -04:00
|
|
|
for param, value in kwargs.items():
|
2015-05-18 15:02:18 -04:00
|
|
|
self._paramMap[getattr(self, param)] = value
|
2015-02-15 23:29:26 -05:00
|
|
|
return self
|
2015-04-16 02:49:42 -04:00
|
|
|
|
|
|
|
def _setDefault(self, **kwargs):
|
|
|
|
"""
|
|
|
|
Sets default params.
|
|
|
|
"""
|
2015-04-16 19:20:57 -04:00
|
|
|
for param, value in kwargs.items():
|
2015-05-18 15:02:18 -04:00
|
|
|
self._defaultParamMap[getattr(self, param)] = value
|
2015-04-16 02:49:42 -04:00
|
|
|
return self
|
2015-05-18 15:02:18 -04:00
|
|
|
|
|
|
|
def _copyValues(self, to, extra={}):
|
|
|
|
"""
|
|
|
|
Copies param values from this instance to another instance for
|
|
|
|
params shared by them.
|
|
|
|
:param to: the target instance
|
|
|
|
:param extra: extra params to be copied
|
|
|
|
:return: the target instance with param values copied
|
|
|
|
"""
|
|
|
|
paramMap = self.extractParamMap(extra)
|
|
|
|
for p in self.params:
|
|
|
|
if p in paramMap and to.hasParam(p.name):
|
|
|
|
to._set(**{p.name: paramMap[p]})
|
|
|
|
return to
|