spark-instrumented-optimizer/python/pyspark/resource/profile.py
HyukjinKwon 6fb22aa42d
[SPARK-31748][PYTHON] Document resource module in PySpark doc and rename/move classes
### What changes were proposed in this pull request?

This PR is kind of a followup for SPARK-29641 and SPARK-28234. This PR proposes:

1.. Document the new `pyspark.resource` module introduced at 95aec091e4, in PySpark API docs.

2.. Move classes into fewer and simpler modules

Before:

```
pyspark
├── resource
│   ├── executorrequests.py
│   │   ├── class ExecutorResourceRequest
│   │   └── class ExecutorResourceRequests
│   ├── taskrequests.py
│   │   ├── class TaskResourceRequest
│   │   └── class TaskResourceRequests
│   ├── resourceprofilebuilder.py
│   │   └── class ResourceProfileBuilder
│   ├── resourceprofile.py
│   │   └── class ResourceProfile
└── resourceinformation
    └── class ResourceInformation
```

After:

```
pyspark
└── resource
    ├── requests.py
    │   ├── class ExecutorResourceRequest
    │   ├── class ExecutorResourceRequests
    │   ├── class TaskResourceRequest
    │   └── class TaskResourceRequests
    ├── profile.py
    │   ├── class ResourceProfileBuilder
    │   └── class ResourceProfile
    └── information.py
        └── class ResourceInformation
```

3.. Minor docstring fix e.g.:

```diff
-     param name the name of the resource
-     param addresses an array of strings describing the addresses of the resource
+     :param name: the name of the resource
+     :param addresses: an array of strings describing the addresses of the resource
+
+     .. versionadded:: 3.0.0
```

### Why are the changes needed?

To document APIs, and move Python modules to fewer and simpler modules.

### Does this PR introduce _any_ user-facing change?

No, the changes are in unreleased branches.

### How was this patch tested?

Manually tested via:

```bash
cd python
./run-tests --python-executables=python3 --modules=pyspark-core
./run-tests --python-executables=python3 --modules=pyspark-resource
```

Closes #28569 from HyukjinKwon/SPARK-28234-SPARK-29641-followup.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
2020-05-19 17:09:37 -07:00

169 lines
6.9 KiB
Python

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.resource.requests import TaskResourceRequest, TaskResourceRequests, \
ExecutorResourceRequests, ExecutorResourceRequest
class ResourceProfile(object):
"""
.. note:: Evolving
Resource profile to associate with an RDD. A :class:`pyspark.resource.ResourceProfile`
allows the user to specify executor and task requirements for an RDD that will get
applied during a stage. This allows the user to change the resource requirements between
stages. This is meant to be immutable so user cannot change it after building.
.. versionadded:: 3.1.0
"""
def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
if _java_resource_profile is not None:
self._java_resource_profile = _java_resource_profile
else:
self._java_resource_profile = None
self._executor_resource_requests = _exec_req
self._task_resource_requests = _task_req
@property
def id(self):
if self._java_resource_profile is not None:
return self._java_resource_profile.id()
else:
raise RuntimeError("SparkContext must be created to get the id, get the id "
"after adding the ResourceProfile to an RDD")
@property
def taskResources(self):
if self._java_resource_profile is not None:
taskRes = self._java_resource_profile.taskResourcesJMap()
result = {}
for k, v in taskRes.items():
result[k] = TaskResourceRequest(v.resourceName(), v.amount())
return result
else:
return self._task_resource_requests
@property
def executorResources(self):
if self._java_resource_profile is not None:
execRes = self._java_resource_profile.executorResourcesJMap()
result = {}
for k, v in execRes.items():
result[k] = ExecutorResourceRequest(v.resourceName(), v.amount(),
v.discoveryScript(), v.vendor())
return result
else:
return self._executor_resource_requests
class ResourceProfileBuilder(object):
"""
.. note:: Evolving
Resource profile Builder to build a resource profile to associate with an RDD.
A ResourceProfile allows the user to specify executor and task requirements for
an RDD that will get applied during a stage. This allows the user to change the
resource requirements between stages.
.. versionadded:: 3.1.0
"""
def __init__(self):
from pyspark.context import SparkContext
_jvm = SparkContext._jvm
if _jvm is not None:
self._jvm = _jvm
self._java_resource_profile_builder = \
_jvm.org.apache.spark.resource.ResourceProfileBuilder()
else:
self._jvm = None
self._java_resource_profile_builder = None
self._executor_resource_requests = {}
self._task_resource_requests = {}
def require(self, resourceRequest):
if isinstance(resourceRequest, TaskResourceRequests):
if self._java_resource_profile_builder is not None:
if resourceRequest._java_task_resource_requests is not None:
self._java_resource_profile_builder.require(
resourceRequest._java_task_resource_requests)
else:
taskReqs = TaskResourceRequests(self._jvm, resourceRequest.requests)
self._java_resource_profile_builder.require(
taskReqs._java_task_resource_requests)
else:
self._task_resource_requests.update(resourceRequest.requests)
else:
if self._java_resource_profile_builder is not None:
if resourceRequest._java_executor_resource_requests is not None:
self._java_resource_profile_builder.require(
resourceRequest._java_executor_resource_requests)
else:
execReqs = ExecutorResourceRequests(self._jvm, resourceRequest.requests)
self._java_resource_profile_builder.require(
execReqs._java_executor_resource_requests)
else:
self._executor_resource_requests.update(resourceRequest.requests)
return self
def clearExecutorResourceRequests(self):
if self._java_resource_profile_builder is not None:
self._java_resource_profile_builder.clearExecutorResourceRequests()
else:
self._executor_resource_requests = {}
def clearTaskResourceRequests(self):
if self._java_resource_profile_builder is not None:
self._java_resource_profile_builder.clearTaskResourceRequests()
else:
self._task_resource_requests = {}
@property
def taskResources(self):
if self._java_resource_profile_builder is not None:
taskRes = self._java_resource_profile_builder.taskResourcesJMap()
result = {}
for k, v in taskRes.items():
result[k] = TaskResourceRequest(v.resourceName(), v.amount())
return result
else:
return self._task_resource_requests
@property
def executorResources(self):
if self._java_resource_profile_builder is not None:
result = {}
execRes = self._java_resource_profile_builder.executorResourcesJMap()
for k, v in execRes.items():
result[k] = ExecutorResourceRequest(v.resourceName(), v.amount(),
v.discoveryScript(), v.vendor())
return result
else:
return self._executor_resource_requests
@property
def build(self):
if self._java_resource_profile_builder is not None:
jresourceProfile = self._java_resource_profile_builder.build()
return ResourceProfile(_java_resource_profile=jresourceProfile)
else:
return ResourceProfile(_exec_req=self._executor_resource_requests,
_task_req=self._task_resource_requests)