2017-12-21 20:21:11 -05:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
|
|
|
|
# This script builds and pushes docker images when run from a release of Spark
|
|
|
|
# with Kubernetes support.
|
|
|
|
|
2018-01-04 19:34:56 -05:00
|
|
|
function error {
|
|
|
|
echo "$@" 1>&2
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
|
|
|
|
if [ -z "${SPARK_HOME}" ]; then
|
|
|
|
SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
|
|
|
fi
|
|
|
|
. "${SPARK_HOME}/bin/load-spark-env.sh"
|
|
|
|
|
2018-11-27 12:09:16 -05:00
|
|
|
CTX_DIR="$SPARK_HOME/target/tmp/docker"
|
|
|
|
|
|
|
|
function is_dev_build {
|
|
|
|
[ ! -f "$SPARK_HOME/RELEASE" ]
|
|
|
|
}
|
|
|
|
|
|
|
|
function cleanup_ctx_dir {
|
|
|
|
if is_dev_build; then
|
|
|
|
rm -rf "$CTX_DIR"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
trap cleanup_ctx_dir EXIT
|
|
|
|
|
2018-01-04 19:34:56 -05:00
|
|
|
function image_ref {
|
|
|
|
local image="$1"
|
|
|
|
local add_repo="${2:-1}"
|
|
|
|
if [ $add_repo = 1 ] && [ -n "$REPO" ]; then
|
|
|
|
image="$REPO/$image"
|
|
|
|
fi
|
|
|
|
if [ -n "$TAG" ]; then
|
|
|
|
image="$image:$TAG"
|
|
|
|
fi
|
|
|
|
echo "$image"
|
|
|
|
}
|
2017-12-21 20:21:11 -05:00
|
|
|
|
2018-11-21 18:51:37 -05:00
|
|
|
function docker_push {
|
|
|
|
local image_name="$1"
|
|
|
|
if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then
|
|
|
|
docker push "$(image_ref ${image_name})"
|
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
error "Failed to push $image_name Docker image."
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
echo "$(image_ref ${image_name}) image not found. Skipping push for this image."
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2019-01-24 13:11:55 -05:00
|
|
|
function resolve_file {
|
|
|
|
local FILE=$1
|
|
|
|
if [ -n "$FILE" ]; then
|
|
|
|
local DIR=$(dirname $FILE)
|
|
|
|
DIR=$(cd $DIR && pwd)
|
|
|
|
FILE="${DIR}/$(basename $FILE)"
|
|
|
|
fi
|
|
|
|
echo $FILE
|
|
|
|
}
|
|
|
|
|
2018-11-27 12:09:16 -05:00
|
|
|
# Create a smaller build context for docker in dev builds to make the build faster. Docker
|
|
|
|
# uploads all of the current directory to the daemon, and it can get pretty big with dev
|
|
|
|
# builds that contain test log files and other artifacts.
|
|
|
|
#
|
|
|
|
# Three build contexts are created, one for each image: base, pyspark, and sparkr. For them
|
|
|
|
# to have the desired effect, the docker command needs to be executed inside the appropriate
|
|
|
|
# context directory.
|
|
|
|
#
|
|
|
|
# Note: docker does not support symlinks in the build context.
|
|
|
|
function create_dev_build_context {(
|
|
|
|
set -e
|
|
|
|
local BASE_CTX="$CTX_DIR/base"
|
|
|
|
mkdir -p "$BASE_CTX/kubernetes"
|
|
|
|
cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
|
|
|
|
"$BASE_CTX/kubernetes/dockerfiles"
|
|
|
|
|
|
|
|
cp -r "assembly/target/scala-$SPARK_SCALA_VERSION/jars" "$BASE_CTX/jars"
|
|
|
|
cp -r "resource-managers/kubernetes/integration-tests/tests" \
|
|
|
|
"$BASE_CTX/kubernetes/tests"
|
|
|
|
|
|
|
|
mkdir "$BASE_CTX/examples"
|
|
|
|
cp -r "examples/src" "$BASE_CTX/examples/src"
|
|
|
|
# Copy just needed examples jars instead of everything.
|
|
|
|
mkdir "$BASE_CTX/examples/jars"
|
|
|
|
for i in examples/target/scala-$SPARK_SCALA_VERSION/jars/*; do
|
|
|
|
if [ ! -f "$BASE_CTX/jars/$(basename $i)" ]; then
|
|
|
|
cp $i "$BASE_CTX/examples/jars"
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
for other in bin sbin data; do
|
|
|
|
cp -r "$other" "$BASE_CTX/$other"
|
|
|
|
done
|
|
|
|
|
|
|
|
local PYSPARK_CTX="$CTX_DIR/pyspark"
|
|
|
|
mkdir -p "$PYSPARK_CTX/kubernetes"
|
|
|
|
cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
|
|
|
|
"$PYSPARK_CTX/kubernetes/dockerfiles"
|
|
|
|
mkdir "$PYSPARK_CTX/python"
|
|
|
|
cp -r "python/lib" "$PYSPARK_CTX/python/lib"
|
2018-12-03 18:36:41 -05:00
|
|
|
cp -r "python/pyspark" "$PYSPARK_CTX/python/pyspark"
|
2018-11-27 12:09:16 -05:00
|
|
|
|
|
|
|
local R_CTX="$CTX_DIR/sparkr"
|
|
|
|
mkdir -p "$R_CTX/kubernetes"
|
|
|
|
cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
|
|
|
|
"$R_CTX/kubernetes/dockerfiles"
|
|
|
|
cp -r "R" "$R_CTX/R"
|
|
|
|
)}
|
|
|
|
|
|
|
|
function img_ctx_dir {
|
|
|
|
if is_dev_build; then
|
|
|
|
echo "$CTX_DIR/$1"
|
|
|
|
else
|
|
|
|
echo "$SPARK_HOME"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2017-12-21 20:21:11 -05:00
|
|
|
function build {
|
2018-01-11 13:37:35 -05:00
|
|
|
local BUILD_ARGS
|
2018-11-27 12:09:16 -05:00
|
|
|
local SPARK_ROOT="$SPARK_HOME"
|
|
|
|
|
|
|
|
if is_dev_build; then
|
|
|
|
create_dev_build_context || error "Failed to create docker build context."
|
|
|
|
SPARK_ROOT="$CTX_DIR/base"
|
2018-01-11 13:37:35 -05:00
|
|
|
fi
|
|
|
|
|
2018-10-19 18:03:53 -04:00
|
|
|
# Verify that the Docker image content directory is present
|
2018-11-27 12:09:16 -05:00
|
|
|
if [ ! -d "$SPARK_ROOT/kubernetes/dockerfiles" ]; then
|
2018-01-11 13:37:35 -05:00
|
|
|
error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
|
|
|
|
fi
|
2018-10-19 18:03:53 -04:00
|
|
|
|
|
|
|
# Verify that Spark has actually been built/is a runnable distribution
|
2018-10-20 14:04:45 -04:00
|
|
|
# i.e. the Spark JARs that the Docker files will place into the image are present
|
2018-11-27 12:09:16 -05:00
|
|
|
local TOTAL_JARS=$(ls $SPARK_ROOT/jars/spark-* | wc -l)
|
2018-10-19 18:03:53 -04:00
|
|
|
TOTAL_JARS=$(( $TOTAL_JARS ))
|
|
|
|
if [ "${TOTAL_JARS}" -eq 0 ]; then
|
|
|
|
error "Cannot find Spark JARs. This script assumes that Apache Spark has first been built locally or this is a runnable distribution."
|
|
|
|
fi
|
|
|
|
|
2018-11-27 12:09:16 -05:00
|
|
|
local BUILD_ARGS=(${BUILD_PARAMS})
|
2018-11-29 12:59:38 -05:00
|
|
|
|
|
|
|
# If a custom SPARK_UID was set add it to build arguments
|
|
|
|
if [ -n "$SPARK_UID" ]; then
|
|
|
|
BUILD_ARGS+=(--build-arg spark_uid=$SPARK_UID)
|
|
|
|
fi
|
|
|
|
|
2018-06-08 14:18:34 -04:00
|
|
|
local BINDING_BUILD_ARGS=(
|
2019-01-22 13:31:17 -05:00
|
|
|
${BUILD_ARGS[@]}
|
2018-06-08 14:18:34 -04:00
|
|
|
--build-arg
|
|
|
|
base_img=$(image_ref spark)
|
|
|
|
)
|
2019-01-22 13:31:17 -05:00
|
|
|
|
2018-11-27 12:09:16 -05:00
|
|
|
local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"}
|
2018-11-21 18:51:37 -05:00
|
|
|
local PYDOCKERFILE=${PYDOCKERFILE:-false}
|
|
|
|
local RDOCKERFILE=${RDOCKERFILE:-false}
|
2018-03-19 14:29:56 -04:00
|
|
|
|
2018-11-27 12:09:16 -05:00
|
|
|
(cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
|
2018-01-11 13:37:35 -05:00
|
|
|
-t $(image_ref spark) \
|
2018-11-27 12:09:16 -05:00
|
|
|
-f "$BASEDOCKERFILE" .)
|
2018-10-19 18:03:53 -04:00
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
error "Failed to build Spark JVM Docker image, please refer to Docker build output for details."
|
2018-10-18 13:21:37 -04:00
|
|
|
fi
|
2018-06-08 14:18:34 -04:00
|
|
|
|
2018-11-21 18:51:37 -05:00
|
|
|
if [ "${PYDOCKERFILE}" != "false" ]; then
|
2018-11-27 12:09:16 -05:00
|
|
|
(cd $(img_ctx_dir pyspark) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
|
2018-11-21 18:51:37 -05:00
|
|
|
-t $(image_ref spark-py) \
|
2018-11-27 12:09:16 -05:00
|
|
|
-f "$PYDOCKERFILE" .)
|
2018-11-21 18:51:37 -05:00
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
error "Failed to build PySpark Docker image, please refer to Docker build output for details."
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ "${RDOCKERFILE}" != "false" ]; then
|
2018-11-27 12:09:16 -05:00
|
|
|
(cd $(img_ctx_dir sparkr) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
|
2018-11-21 18:51:37 -05:00
|
|
|
-t $(image_ref spark-r) \
|
2018-11-27 12:09:16 -05:00
|
|
|
-f "$RDOCKERFILE" .)
|
2018-10-19 18:03:53 -04:00
|
|
|
if [ $? -ne 0 ]; then
|
2018-11-21 18:51:37 -05:00
|
|
|
error "Failed to build SparkR Docker image, please refer to Docker build output for details."
|
2018-10-19 18:03:53 -04:00
|
|
|
fi
|
2018-10-18 13:21:37 -04:00
|
|
|
fi
|
2017-12-21 20:21:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
function push {
|
2018-11-21 18:51:37 -05:00
|
|
|
docker_push "spark"
|
|
|
|
docker_push "spark-py"
|
|
|
|
docker_push "spark-r"
|
2017-12-21 20:21:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
function usage {
|
2018-01-04 19:34:56 -05:00
|
|
|
cat <<EOF
|
|
|
|
Usage: $0 [options] [command]
|
2018-01-11 13:37:35 -05:00
|
|
|
Builds or pushes the built-in Spark Docker image.
|
2018-01-04 19:34:56 -05:00
|
|
|
|
|
|
|
Commands:
|
2018-01-11 13:37:35 -05:00
|
|
|
build Build image. Requires a repository address to be provided if the image will be
|
|
|
|
pushed to a different registry.
|
|
|
|
push Push a pre-built image to a registry. Requires a repository address to be provided.
|
2018-01-04 19:34:56 -05:00
|
|
|
|
|
|
|
Options:
|
2018-08-17 19:04:02 -04:00
|
|
|
-f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
|
2018-11-21 18:51:37 -05:00
|
|
|
-p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
|
|
|
|
Skips building PySpark docker image if not specified.
|
|
|
|
-R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
|
|
|
|
Skips building SparkR docker image if not specified.
|
2018-08-17 19:04:02 -04:00
|
|
|
-r repo Repository address.
|
|
|
|
-t tag Tag to apply to the built image, or to identify the image to be pushed.
|
|
|
|
-m Use minikube's Docker daemon.
|
|
|
|
-n Build docker image with --no-cache
|
2018-11-29 12:59:38 -05:00
|
|
|
-u uid UID to use in the USER directive to set the user the main Spark process runs as inside the
|
|
|
|
resulting container
|
|
|
|
-b arg Build arg to build or push the image. For multiple build args, this option needs to
|
|
|
|
be used separately for each build arg.
|
2018-01-04 19:34:56 -05:00
|
|
|
|
|
|
|
Using minikube when building images will do so directly into minikube's Docker daemon.
|
|
|
|
There is no need to push the images into minikube in that case, they'll be automatically
|
|
|
|
available when running applications inside the minikube cluster.
|
|
|
|
|
|
|
|
Check the following documentation for more information on using the minikube Docker daemon:
|
|
|
|
|
|
|
|
https://kubernetes.io/docs/getting-started-guides/minikube/#reusing-the-docker-daemon
|
|
|
|
|
|
|
|
Examples:
|
2018-01-11 13:37:35 -05:00
|
|
|
- Build image in minikube with tag "testing"
|
2018-01-04 19:34:56 -05:00
|
|
|
$0 -m -t testing build
|
|
|
|
|
2018-11-21 18:51:37 -05:00
|
|
|
- Build PySpark docker image
|
|
|
|
$0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
|
|
|
|
|
2018-01-11 13:37:35 -05:00
|
|
|
- Build and push image with tag "v2.3.0" to docker.io/myrepo
|
2018-01-04 19:34:56 -05:00
|
|
|
$0 -r docker.io/myrepo -t v2.3.0 build
|
|
|
|
$0 -r docker.io/myrepo -t v2.3.0 push
|
|
|
|
EOF
|
2017-12-21 20:21:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
|
|
|
usage
|
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
2018-01-04 19:34:56 -05:00
|
|
|
REPO=
|
|
|
|
TAG=
|
2018-06-08 14:18:34 -04:00
|
|
|
BASEDOCKERFILE=
|
|
|
|
PYDOCKERFILE=
|
2018-08-17 19:04:02 -04:00
|
|
|
RDOCKERFILE=
|
2018-06-20 20:09:37 -04:00
|
|
|
NOCACHEARG=
|
2018-07-18 17:18:29 -04:00
|
|
|
BUILD_PARAMS=
|
2018-11-29 12:59:38 -05:00
|
|
|
SPARK_UID=
|
|
|
|
while getopts f:p:R:mr:t:nb:u: option
|
2017-12-21 20:21:11 -05:00
|
|
|
do
|
|
|
|
case "${option}"
|
|
|
|
in
|
2019-01-24 13:11:55 -05:00
|
|
|
f) BASEDOCKERFILE=$(resolve_file ${OPTARG});;
|
|
|
|
p) PYDOCKERFILE=$(resolve_file ${OPTARG});;
|
|
|
|
R) RDOCKERFILE=$(resolve_file ${OPTARG});;
|
2017-12-21 20:21:11 -05:00
|
|
|
r) REPO=${OPTARG};;
|
|
|
|
t) TAG=${OPTARG};;
|
2018-06-20 20:09:37 -04:00
|
|
|
n) NOCACHEARG="--no-cache";;
|
2018-07-18 17:18:29 -04:00
|
|
|
b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
|
2018-01-04 19:34:56 -05:00
|
|
|
m)
|
|
|
|
if ! which minikube 1>/dev/null; then
|
|
|
|
error "Cannot find minikube."
|
|
|
|
fi
|
2018-11-07 16:19:31 -05:00
|
|
|
if ! minikube status 1>/dev/null; then
|
|
|
|
error "Cannot contact minikube. Make sure it's running."
|
|
|
|
fi
|
2018-01-04 19:34:56 -05:00
|
|
|
eval $(minikube docker-env)
|
|
|
|
;;
|
2018-11-29 12:59:38 -05:00
|
|
|
u) SPARK_UID=${OPTARG};;
|
2017-12-21 20:21:11 -05:00
|
|
|
esac
|
|
|
|
done
|
|
|
|
|
2018-01-04 19:34:56 -05:00
|
|
|
case "${@: -1}" in
|
|
|
|
build)
|
|
|
|
build
|
|
|
|
;;
|
|
|
|
push)
|
|
|
|
if [ -z "$REPO" ]; then
|
|
|
|
usage
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
push
|
|
|
|
;;
|
|
|
|
*)
|
2017-12-21 20:21:11 -05:00
|
|
|
usage
|
2018-01-04 19:34:56 -05:00
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
esac
|