spark-instrumented-optimizer/dev/create-release/do-release-docker.sh

145 lines
4.2 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Creates a Spark release candidate. The script will update versions, tag the branch,
# build Spark binary packages and documentation, and upload maven artifacts to a staging
# repository. There is also a dry run mode where only local builds are performed, and
# nothing is uploaded to the ASF repos.
#
# Run with "-h" for options.
#
set -e
SELF=$(cd $(dirname $0) && pwd)
. "$SELF/release-util.sh"
function usage {
local NAME=$(basename $0)
cat <<EOF
Usage: $NAME [options]
This script runs the release scripts inside a docker image. The image is hardcoded to be called
"spark-rm" and will be re-generated (as needed) on every invocation of this script.
Options are:
-d [path] : required: working directory (output will be written to an "output" directory in
the working directory).
-n : dry run mode. Performs checks and local builds, but do not upload anything.
-t [tag] : tag for the spark-rm docker image to use for building (default: "latest").
-j [path] : path to local JDK installation to use for building. By default the script will
use openjdk8 installed in the docker image.
-s [step] : runs a single step of the process; valid steps are: tag, build, docs, publish
EOF
}
WORKDIR=
IMGTAG=latest
JAVA=
RELEASE_STEP=
while getopts ":d:hj:ns:t:" opt; do
case $opt in
d) WORKDIR="$OPTARG" ;;
n) DRY_RUN=1 ;;
t) IMGTAG="$OPTARG" ;;
j) JAVA="$OPTARG" ;;
s) RELEASE_STEP="$OPTARG" ;;
h) usage ;;
\?) error "Invalid option. Run with -h for help." ;;
esac
done
if [ -z "$WORKDIR" ] || [ ! -d "$WORKDIR" ]; then
error "Work directory (-d) must be defined and exist. Run with -h for help."
fi
if [ -d "$WORKDIR/output" ]; then
read -p "Output directory already exists. Overwrite and continue? [y/n] " ANSWER
if [ "$ANSWER" != "y" ]; then
error "Exiting."
fi
fi
cd "$WORKDIR"
rm -rf "$WORKDIR/output"
mkdir "$WORKDIR/output"
get_release_info
# Place all RM scripts and necessary data in a local directory that must be defined in the command
# line. This directory is mounted into the image.
for f in "$SELF"/*; do
if [ -f "$f" ]; then
cp "$f" "$WORKDIR"
fi
done
GPG_KEY_FILE="$WORKDIR/gpg.key"
fcreate_secure "$GPG_KEY_FILE"
$GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE"
run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \
docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm"
# Write the release information to a file with environment variables to be used when running the
# image.
ENVFILE="$WORKDIR/env.list"
fcreate_secure "$ENVFILE"
function cleanup {
rm -f "$ENVFILE"
rm -f "$GPG_KEY_FILE"
}
trap cleanup EXIT
cat > $ENVFILE <<EOF
DRY_RUN=$DRY_RUN
SKIP_TAG=$SKIP_TAG
RUNNING_IN_DOCKER=1
GIT_BRANCH=$GIT_BRANCH
NEXT_VERSION=$NEXT_VERSION
RELEASE_VERSION=$RELEASE_VERSION
RELEASE_TAG=$RELEASE_TAG
GIT_REF=$GIT_REF
SPARK_PACKAGE_VERSION=$SPARK_PACKAGE_VERSION
ASF_USERNAME=$ASF_USERNAME
GIT_NAME=$GIT_NAME
GIT_EMAIL=$GIT_EMAIL
GPG_KEY=$GPG_KEY
ASF_PASSWORD=$ASF_PASSWORD
GPG_PASSPHRASE=$GPG_PASSPHRASE
RELEASE_STEP=$RELEASE_STEP
[SPARK-28906][BUILD] Fix incorrect information in bin/spark-submit --version ### What changes were proposed in this pull request? This PR allows `bin/spark-submit --version` to show the correct information while the previous versions, which were created by `dev/create-release/do-release-docker.sh`, show incorrect information. There are two root causes to show incorrect information: 1. Did not pass `USER` environment variable to the docker container 1. Did not keep `.git` directory in the work directory ### Why are the changes needed? The information is missing while the previous versions show the correct information. ### Does this PR introduce any user-facing change? Yes, the following is the console output in branch-2.3 ``` $ bin/spark-submit --version Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.3.4 /_/ Using Scala version 2.11.8, OpenJDK 64-Bit Server VM, 1.8.0_212 Branch HEAD Compiled by user ishizaki on 2019-09-02T02:18:10Z Revision 8c6f8150f3c6298ff4e1c7e06028f12d7eaf0210 Url https://gitbox.apache.org/repos/asf/spark.git Type --help for more information. ``` Without this PR, the console output is as follows ``` $ spark-submit --version Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.3.4 /_/ Using Scala version 2.11.8, OpenJDK 64-Bit Server VM, 1.8.0_212 Branch Compiled by user on 2019-08-26T08:29:39Z Revision Url Type --help for more information. ``` ### How was this patch tested? After building the package, I manually executed `bin/spark-submit --version` Closes #25655 from kiszk/SPARK-28906. Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Signed-off-by: Sean Owen <sean.owen@databricks.com>
2019-09-11 09:12:44 -04:00
USER=$USER
EOF
JAVA_VOL=
if [ -n "$JAVA" ]; then
echo "JAVA_HOME=/opt/spark-java" >> $ENVFILE
JAVA_VOL="--volume $JAVA:/opt/spark-java"
fi
echo "Building $RELEASE_TAG; output will be at $WORKDIR/output"
docker run -ti \
--env-file "$ENVFILE" \
--volume "$WORKDIR:/opt/spark-rm" \
$JAVA_VOL \
"spark-rm:$IMGTAG"