ba84bcb2c4
## What changes were proposed in this pull request? Introducing R Bindings for Spark R on K8s - [x] Running SparkR Job ## How was this patch tested? This patch was tested with - [x] Unit Tests - [x] Integration Tests ## Example: Commands to run example spark job: 1. `dev/make-distribution.sh --pip --r --tgz -Psparkr -Phadoop-2.7 -Pkubernetes` 2. `bin/docker-image-tool.sh -m -t testing build` 3. ``` bin/spark-submit \ --master k8s://https://192.168.64.33:8443 \ --deploy-mode cluster \ --name spark-r \ --conf spark.executor.instances=1 \ --conf spark.kubernetes.container.image=spark-r:testing \ local:///opt/spark/examples/src/main/r/dataframe.R ``` This above spark-submit command works given the distribution. (Will include this integration test in PR once PRB is ready). Author: Ilan Filonenko <if56@cornell.edu> Closes #21584 from ifilonenko/spark-r.
182 lines
5.2 KiB
Bash
Executable file
182 lines
5.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
# This script builds and pushes docker images when run from a release of Spark
|
|
# with Kubernetes support.
|
|
|
|
function error {
|
|
echo "$@" 1>&2
|
|
exit 1
|
|
}
|
|
|
|
if [ -z "${SPARK_HOME}" ]; then
|
|
SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
|
fi
|
|
. "${SPARK_HOME}/bin/load-spark-env.sh"
|
|
|
|
function image_ref {
|
|
local image="$1"
|
|
local add_repo="${2:-1}"
|
|
if [ $add_repo = 1 ] && [ -n "$REPO" ]; then
|
|
image="$REPO/$image"
|
|
fi
|
|
if [ -n "$TAG" ]; then
|
|
image="$image:$TAG"
|
|
fi
|
|
echo "$image"
|
|
}
|
|
|
|
function build {
|
|
local BUILD_ARGS
|
|
local IMG_PATH
|
|
|
|
if [ ! -f "$SPARK_HOME/RELEASE" ]; then
|
|
# Set image build arguments accordingly if this is a source repo and not a distribution archive.
|
|
IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
|
|
BUILD_ARGS=(
|
|
${BUILD_PARAMS}
|
|
--build-arg
|
|
img_path=$IMG_PATH
|
|
--build-arg
|
|
spark_jars=assembly/target/scala-$SPARK_SCALA_VERSION/jars
|
|
)
|
|
else
|
|
# Not passed as an argument to docker, but used to validate the Spark directory.
|
|
IMG_PATH="kubernetes/dockerfiles"
|
|
BUILD_ARGS=(${BUILD_PARAMS})
|
|
fi
|
|
|
|
if [ ! -d "$IMG_PATH" ]; then
|
|
error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
|
|
fi
|
|
local BINDING_BUILD_ARGS=(
|
|
${BUILD_PARAMS}
|
|
--build-arg
|
|
base_img=$(image_ref spark)
|
|
)
|
|
local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
|
|
local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
|
|
local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}
|
|
|
|
docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
|
|
-t $(image_ref spark) \
|
|
-f "$BASEDOCKERFILE" .
|
|
|
|
docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
|
|
-t $(image_ref spark-py) \
|
|
-f "$PYDOCKERFILE" .
|
|
|
|
docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
|
|
-t $(image_ref spark-r) \
|
|
-f "$RDOCKERFILE" .
|
|
}
|
|
|
|
function push {
|
|
docker push "$(image_ref spark)"
|
|
docker push "$(image_ref spark-py)"
|
|
docker push "$(image_ref spark-r)"
|
|
}
|
|
|
|
function usage {
|
|
cat <<EOF
|
|
Usage: $0 [options] [command]
|
|
Builds or pushes the built-in Spark Docker image.
|
|
|
|
Commands:
|
|
build Build image. Requires a repository address to be provided if the image will be
|
|
pushed to a different registry.
|
|
push Push a pre-built image to a registry. Requires a repository address to be provided.
|
|
|
|
Options:
|
|
-f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
|
|
-p file Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
|
|
-R file Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
|
|
-r repo Repository address.
|
|
-t tag Tag to apply to the built image, or to identify the image to be pushed.
|
|
-m Use minikube's Docker daemon.
|
|
-n Build docker image with --no-cache
|
|
-b arg Build arg to build or push the image. For multiple build args, this option needs to
|
|
be used separately for each build arg.
|
|
|
|
Using minikube when building images will do so directly into minikube's Docker daemon.
|
|
There is no need to push the images into minikube in that case, they'll be automatically
|
|
available when running applications inside the minikube cluster.
|
|
|
|
Check the following documentation for more information on using the minikube Docker daemon:
|
|
|
|
https://kubernetes.io/docs/getting-started-guides/minikube/#reusing-the-docker-daemon
|
|
|
|
Examples:
|
|
- Build image in minikube with tag "testing"
|
|
$0 -m -t testing build
|
|
|
|
- Build and push image with tag "v2.3.0" to docker.io/myrepo
|
|
$0 -r docker.io/myrepo -t v2.3.0 build
|
|
$0 -r docker.io/myrepo -t v2.3.0 push
|
|
EOF
|
|
}
|
|
|
|
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
|
usage
|
|
exit 0
|
|
fi
|
|
|
|
REPO=
|
|
TAG=
|
|
BASEDOCKERFILE=
|
|
PYDOCKERFILE=
|
|
RDOCKERFILE=
|
|
NOCACHEARG=
|
|
BUILD_PARAMS=
|
|
while getopts f:p:R:mr:t:n:b: option
|
|
do
|
|
case "${option}"
|
|
in
|
|
f) BASEDOCKERFILE=${OPTARG};;
|
|
p) PYDOCKERFILE=${OPTARG};;
|
|
R) RDOCKERFILE=${OPTARG};;
|
|
r) REPO=${OPTARG};;
|
|
t) TAG=${OPTARG};;
|
|
n) NOCACHEARG="--no-cache";;
|
|
b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
|
|
m)
|
|
if ! which minikube 1>/dev/null; then
|
|
error "Cannot find minikube."
|
|
fi
|
|
eval $(minikube docker-env)
|
|
;;
|
|
esac
|
|
done
|
|
|
|
case "${@: -1}" in
|
|
build)
|
|
build
|
|
;;
|
|
push)
|
|
if [ -z "$REPO" ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
push
|
|
;;
|
|
*)
|
|
usage
|
|
exit 1
|
|
;;
|
|
esac
|