spark-instrumented-optimizer/dev/create-release/do-release-docker.sh

#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Creates a Spark release candidate. The script will update versions, tag the branch,
# build Spark binary packages and documentation, and upload maven artifacts to a staging
# repository. There is also a dry run mode where only local builds are performed, and
# nothing is uploaded to the ASF repos.
#
# Run with "-h" for options.
#

set -e
SELF=$(cd $(dirname $0) && pwd)
. "$SELF/release-util.sh"

function usage {
  local NAME=$(basename $0)
  cat <<EOF
Usage: $NAME [options]

This script runs the release scripts inside a docker image. The image is hardcoded to be called
"spark-rm" and will be re-generated (as needed) on every invocation of this script.

Options are:

  -d [path]   : required: working directory (output will be written to an "output" directory in
                the working directory).
  -n          : dry run mode. Performs checks and local builds, but do not upload anything.
  -t [tag]    : tag for the spark-rm docker image to use for building (default: "latest").
  -j [path]   : path to local JDK installation to use for building. By default the script will
                use openjdk8 installed in the docker image.
  -s [step]   : runs a single step of the process; valid steps are: tag, build, docs, publish
EOF
}

WORKDIR=
IMGTAG=latest
JAVA=
RELEASE_STEP=
while getopts ":d:hj:ns:t:" opt; do
  case $opt in
    d) WORKDIR="$OPTARG" ;;
    n) DRY_RUN=1 ;;
    t) IMGTAG="$OPTARG" ;;
    j) JAVA="$OPTARG" ;;
    s) RELEASE_STEP="$OPTARG" ;;
    h) usage ;;
    \?) error "Invalid option. Run with -h for help." ;;
  esac
done

if [ -z "$WORKDIR" ] || [ ! -d "$WORKDIR" ]; then
  error "Work directory (-d) must be defined and exist. Run with -h for help."
fi

if [ -d "$WORKDIR/output" ]; then
  read -p "Output directory already exists. Overwrite and continue? [y/n] " ANSWER
  if [ "$ANSWER" != "y" ]; then
    error "Exiting."
  fi
fi

cd "$WORKDIR"
rm -rf "$WORKDIR/output"
mkdir "$WORKDIR/output"

get_release_info

# Place all RM scripts and necessary data in a local directory that must be defined in the command
# line. This directory is mounted into the image.
for f in "$SELF"/*; do
  if [ -f "$f" ]; then
    cp "$f" "$WORKDIR"
  fi
done

GPG_KEY_FILE="$WORKDIR/gpg.key"
fcreate_secure "$GPG_KEY_FILE"
$GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE"

run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \
  docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm"

# Write the release information to a file with environment variables to be used when running the
# image.
ENVFILE="$WORKDIR/env.list"
fcreate_secure "$ENVFILE"

function cleanup {
  rm -f "$ENVFILE"
  rm -f "$GPG_KEY_FILE"
}

trap cleanup EXIT

cat > $ENVFILE <<EOF
DRY_RUN=$DRY_RUN
SKIP_TAG=$SKIP_TAG
RUNNING_IN_DOCKER=1
GIT_BRANCH=$GIT_BRANCH
NEXT_VERSION=$NEXT_VERSION
RELEASE_VERSION=$RELEASE_VERSION
RELEASE_TAG=$RELEASE_TAG
GIT_REF=$GIT_REF
SPARK_PACKAGE_VERSION=$SPARK_PACKAGE_VERSION
ASF_USERNAME=$ASF_USERNAME
GIT_NAME=$GIT_NAME
GIT_EMAIL=$GIT_EMAIL
GPG_KEY=$GPG_KEY
ASF_PASSWORD=$ASF_PASSWORD
GPG_PASSPHRASE=$GPG_PASSPHRASE
RELEASE_STEP=$RELEASE_STEP
USER=$USER
EOF

JAVA_VOL=
if [ -n "$JAVA" ]; then
  echo "JAVA_HOME=/opt/spark-java" >> $ENVFILE
  JAVA_VOL="--volume $JAVA:/opt/spark-java"
fi

echo "Building $RELEASE_TAG; output will be at $WORKDIR/output"
docker run -ti \
  --env-file "$ENVFILE" \
  --volume "$WORKDIR:/opt/spark-rm" \
  $JAVA_VOL \
  "spark-rm:$IMGTAG"
[SPARK-24372][BUILD] Add scripts to help with preparing releases. The "do-release.sh" script asks questions about the RC being prepared, trying to find out as much as possible automatically, and then executes the existing scripts with proper arguments to prepare the release. This script was used to prepare the 2.3.1 release candidates, so was tested in that context. The docker version runs that same script inside a docker image especially crafted for building Spark releases. That image is based on the work by Felix C. linked in the bug. At this point is has been only midly tested. I also added a template for the vote e-mail, with placeholders for things that need to be replaced, although there is no automation around that for the moment. It shouldn't be hard to hook up certain things like version and tags to this, or to figure out certain things like the repo URL from the output of the release scripts. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21515 from vanzin/SPARK-24372. 2018-06-22 13:38:34 -04:00			`#!/usr/bin/env bash`

			`#`
			`# Licensed to the Apache Software Foundation (ASF) under one or more`
			`# contributor license agreements. See the NOTICE file distributed with`
			`# this work for additional information regarding copyright ownership.`
			`# The ASF licenses this file to You under the Apache License, Version 2.0`
			`# (the "License"); you may not use this file except in compliance with`
			`# the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`

			`#`
			`# Creates a Spark release candidate. The script will update versions, tag the branch,`
			`# build Spark binary packages and documentation, and upload maven artifacts to a staging`
			`# repository. There is also a dry run mode where only local builds are performed, and`
			`# nothing is uploaded to the ASF repos.`
			`#`
			`# Run with "-h" for options.`
			`#`

			`set -e`
			`SELF=$(cd $(dirname $0) && pwd)`
			`. "$SELF/release-util.sh"`

			`function usage {`
			`local NAME=$(basename $0)`
			`cat <<EOF`
			`Usage: $NAME [options]`

			`This script runs the release scripts inside a docker image. The image is hardcoded to be called`
			`"spark-rm" and will be re-generated (as needed) on every invocation of this script.`

			`Options are:`

			`-d [path] : required: working directory (output will be written to an "output" directory in`
			`the working directory).`
			`-n : dry run mode. Performs checks and local builds, but do not upload anything.`
			`-t [tag] : tag for the spark-rm docker image to use for building (default: "latest").`
			`-j [path] : path to local JDK installation to use for building. By default the script will`
			`use openjdk8 installed in the docker image.`
			`-s [step] : runs a single step of the process; valid steps are: tag, build, docs, publish`
			`EOF`
			`}`

			`WORKDIR=`
			`IMGTAG=latest`
			`JAVA=`
			`RELEASE_STEP=`
[SPARK-31462][INFRA] The usage of getopts and case statement is wrong in do-release.sh ### What changes were proposed in this pull request? This PR (SPARK-31462) fixes the usage of getopts and case statement in `do-release.sh` and `do-release-docker.sh`. ### Why are the changes needed? In the current master, do-release.sh contains the following code. ``` while getopts "bn" opt; do case $opt in b) GIT_BRANCH=$OPTARG ;; n) DRY_RUN=1 ;; ?) error "Invalid option: $OPTARG" ;; esac done ``` There are 3 wrong usage in getopts and case statement. 1. To set $OPTARG to an argument passed for the option "b", the parameter for getopts should be "b:". 2. To set $OPTARG to the invalid option name passed, the parameter for getopts starts with ":". 3. It's minor but to match the character "?", it's better to escape like "\\?". ### Does this PR introduce any user-facing change? No. ### How was this patch tested? I checked that $GIT_BRANCH is set when do-release.sh is launched with -b option. I also checked that the error message contains invalid option name when do-release.sh is launched with an invalid option. Closes #28234 from sarutak/fix-do-release. Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org> 2020-04-16 15:54:10 -04:00			`while getopts ":d:hj:ns:t:" opt; do`
[SPARK-24372][BUILD] Add scripts to help with preparing releases. The "do-release.sh" script asks questions about the RC being prepared, trying to find out as much as possible automatically, and then executes the existing scripts with proper arguments to prepare the release. This script was used to prepare the 2.3.1 release candidates, so was tested in that context. The docker version runs that same script inside a docker image especially crafted for building Spark releases. That image is based on the work by Felix C. linked in the bug. At this point is has been only midly tested. I also added a template for the vote e-mail, with placeholders for things that need to be replaced, although there is no automation around that for the moment. It shouldn't be hard to hook up certain things like version and tags to this, or to figure out certain things like the repo URL from the output of the release scripts. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21515 from vanzin/SPARK-24372. 2018-06-22 13:38:34 -04:00			`case $opt in`
			`d) WORKDIR="$OPTARG" ;;`
			`n) DRY_RUN=1 ;;`
			`t) IMGTAG="$OPTARG" ;;`
			`j) JAVA="$OPTARG" ;;`
			`s) RELEASE_STEP="$OPTARG" ;;`
			`h) usage ;;`
[SPARK-31462][INFRA] The usage of getopts and case statement is wrong in do-release.sh ### What changes were proposed in this pull request? This PR (SPARK-31462) fixes the usage of getopts and case statement in `do-release.sh` and `do-release-docker.sh`. ### Why are the changes needed? In the current master, do-release.sh contains the following code. ``` while getopts "bn" opt; do case $opt in b) GIT_BRANCH=$OPTARG ;; n) DRY_RUN=1 ;; ?) error "Invalid option: $OPTARG" ;; esac done ``` There are 3 wrong usage in getopts and case statement. 1. To set $OPTARG to an argument passed for the option "b", the parameter for getopts should be "b:". 2. To set $OPTARG to the invalid option name passed, the parameter for getopts starts with ":". 3. It's minor but to match the character "?", it's better to escape like "\\?". ### Does this PR introduce any user-facing change? No. ### How was this patch tested? I checked that $GIT_BRANCH is set when do-release.sh is launched with -b option. I also checked that the error message contains invalid option name when do-release.sh is launched with an invalid option. Closes #28234 from sarutak/fix-do-release. Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org> 2020-04-16 15:54:10 -04:00			`\?) error "Invalid option. Run with -h for help." ;;`
[SPARK-24372][BUILD] Add scripts to help with preparing releases. The "do-release.sh" script asks questions about the RC being prepared, trying to find out as much as possible automatically, and then executes the existing scripts with proper arguments to prepare the release. This script was used to prepare the 2.3.1 release candidates, so was tested in that context. The docker version runs that same script inside a docker image especially crafted for building Spark releases. That image is based on the work by Felix C. linked in the bug. At this point is has been only midly tested. I also added a template for the vote e-mail, with placeholders for things that need to be replaced, although there is no automation around that for the moment. It shouldn't be hard to hook up certain things like version and tags to this, or to figure out certain things like the repo URL from the output of the release scripts. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21515 from vanzin/SPARK-24372. 2018-06-22 13:38:34 -04:00			`esac`
			`done`

			`if [ -z "$WORKDIR" ] \|\| [ ! -d "$WORKDIR" ]; then`
			`error "Work directory (-d) must be defined and exist. Run with -h for help."`
			`fi`

			`if [ -d "$WORKDIR/output" ]; then`
			`read -p "Output directory already exists. Overwrite and continue? [y/n] " ANSWER`
			`if [ "$ANSWER" != "y" ]; then`
			`error "Exiting."`
			`fi`
			`fi`

			`cd "$WORKDIR"`
			`rm -rf "$WORKDIR/output"`
			`mkdir "$WORKDIR/output"`

			`get_release_info`

			`# Place all RM scripts and necessary data in a local directory that must be defined in the command`
			`# line. This directory is mounted into the image.`
			`for f in "$SELF"/*; do`
			`if [ -f "$f" ]; then`
			`cp "$f" "$WORKDIR"`
			`fi`
			`done`

			`GPG_KEY_FILE="$WORKDIR/gpg.key"`
			`fcreate_secure "$GPG_KEY_FILE"`
[SPARK-31320] Fix release script for 3.0.0 ### What changes were proposed in this pull request? The release script stops working after https://github.com/apache/spark/commit/d5865493ae71e6369e9f3350dd7e694afcf57298 , as we require `mkdocs 1.0.0`. This PR upgrades `mkdocs` from 0.1.6.3 to 1.0.4. To do that ruby is also upgraded to 2.5. This PR also fixes some small issues. ### Why are the changes needed? to make RC ### Does this PR introduce any user-facing change? no ### How was this patch tested? tested by 3.0.0-rc1 Closes #28088 from cloud-fan/rc. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-04-01 03:43:32 -04:00			`$GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE"`
[SPARK-24372][BUILD] Add scripts to help with preparing releases. The "do-release.sh" script asks questions about the RC being prepared, trying to find out as much as possible automatically, and then executes the existing scripts with proper arguments to prepare the release. This script was used to prepare the 2.3.1 release candidates, so was tested in that context. The docker version runs that same script inside a docker image especially crafted for building Spark releases. That image is based on the work by Felix C. linked in the bug. At this point is has been only midly tested. I also added a template for the vote e-mail, with placeholders for things that need to be replaced, although there is no automation around that for the moment. It shouldn't be hard to hook up certain things like version and tags to this, or to figure out certain things like the repo URL from the output of the release scripts. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21515 from vanzin/SPARK-24372. 2018-06-22 13:38:34 -04:00
			`run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \`
Revert "[SPARK-30879][DOCS] Refine workflow for building docs" This reverts commit 7892f88f84acc8c061aaa3d2987f2c8b71e41963. 2020-03-31 03:11:59 -04:00			`docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm"`
[SPARK-24372][BUILD] Add scripts to help with preparing releases. The "do-release.sh" script asks questions about the RC being prepared, trying to find out as much as possible automatically, and then executes the existing scripts with proper arguments to prepare the release. This script was used to prepare the 2.3.1 release candidates, so was tested in that context. The docker version runs that same script inside a docker image especially crafted for building Spark releases. That image is based on the work by Felix C. linked in the bug. At this point is has been only midly tested. I also added a template for the vote e-mail, with placeholders for things that need to be replaced, although there is no automation around that for the moment. It shouldn't be hard to hook up certain things like version and tags to this, or to figure out certain things like the repo URL from the output of the release scripts. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21515 from vanzin/SPARK-24372. 2018-06-22 13:38:34 -04:00
			`# Write the release information to a file with environment variables to be used when running the`
			`# image.`
			`ENVFILE="$WORKDIR/env.list"`
			`fcreate_secure "$ENVFILE"`

			`function cleanup {`
			`rm -f "$ENVFILE"`
			`rm -f "$GPG_KEY_FILE"`
			`}`

			`trap cleanup EXIT`

			`cat > $ENVFILE <<EOF`
			`DRY_RUN=$DRY_RUN`
			`SKIP_TAG=$SKIP_TAG`
			`RUNNING_IN_DOCKER=1`
			`GIT_BRANCH=$GIT_BRANCH`
			`NEXT_VERSION=$NEXT_VERSION`
			`RELEASE_VERSION=$RELEASE_VERSION`
			`RELEASE_TAG=$RELEASE_TAG`
			`GIT_REF=$GIT_REF`
			`SPARK_PACKAGE_VERSION=$SPARK_PACKAGE_VERSION`
			`ASF_USERNAME=$ASF_USERNAME`
			`GIT_NAME=$GIT_NAME`
			`GIT_EMAIL=$GIT_EMAIL`
			`GPG_KEY=$GPG_KEY`
			`ASF_PASSWORD=$ASF_PASSWORD`
			`GPG_PASSPHRASE=$GPG_PASSPHRASE`
			`RELEASE_STEP=$RELEASE_STEP`
[SPARK-28906][BUILD] Fix incorrect information in bin/spark-submit --version ### What changes were proposed in this pull request? This PR allows `bin/spark-submit --version` to show the correct information while the previous versions, which were created by `dev/create-release/do-release-docker.sh`, show incorrect information. There are two root causes to show incorrect information: 1. Did not pass `USER` environment variable to the docker container 1. Did not keep `.git` directory in the work directory ### Why are the changes needed? The information is missing while the previous versions show the correct information. ### Does this PR introduce any user-facing change? Yes, the following is the console output in branch-2.3 ``` $ bin/spark-submit --version Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.3.4 /_/ Using Scala version 2.11.8, OpenJDK 64-Bit Server VM, 1.8.0_212 Branch HEAD Compiled by user ishizaki on 2019-09-02T02:18:10Z Revision 8c6f8150f3c6298ff4e1c7e06028f12d7eaf0210 Url https://gitbox.apache.org/repos/asf/spark.git Type --help for more information. ``` Without this PR, the console output is as follows ``` $ spark-submit --version Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.3.4 /_/ Using Scala version 2.11.8, OpenJDK 64-Bit Server VM, 1.8.0_212 Branch Compiled by user on 2019-08-26T08:29:39Z Revision Url Type --help for more information. ``` ### How was this patch tested? After building the package, I manually executed `bin/spark-submit --version` Closes #25655 from kiszk/SPARK-28906. Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Signed-off-by: Sean Owen <sean.owen@databricks.com> 2019-09-11 09:12:44 -04:00			`USER=$USER`
[SPARK-24372][BUILD] Add scripts to help with preparing releases. The "do-release.sh" script asks questions about the RC being prepared, trying to find out as much as possible automatically, and then executes the existing scripts with proper arguments to prepare the release. This script was used to prepare the 2.3.1 release candidates, so was tested in that context. The docker version runs that same script inside a docker image especially crafted for building Spark releases. That image is based on the work by Felix C. linked in the bug. At this point is has been only midly tested. I also added a template for the vote e-mail, with placeholders for things that need to be replaced, although there is no automation around that for the moment. It shouldn't be hard to hook up certain things like version and tags to this, or to figure out certain things like the repo URL from the output of the release scripts. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21515 from vanzin/SPARK-24372. 2018-06-22 13:38:34 -04:00			`EOF`

			`JAVA_VOL=`
			`if [ -n "$JAVA" ]; then`
			`echo "JAVA_HOME=/opt/spark-java" >> $ENVFILE`
			`JAVA_VOL="--volume $JAVA:/opt/spark-java"`
			`fi`

			`echo "Building $RELEASE_TAG; output will be at $WORKDIR/output"`
			`docker run -ti \`
			`--env-file "$ENVFILE" \`
			`--volume "$WORKDIR:/opt/spark-rm" \`
			`$JAVA_VOL \`
			`"spark-rm:$IMGTAG"`