a2927cb28b
### What changes were proposed in this pull request? Currently, it fails at `git diff --name-only` when new benchmarks are added, see https://github.com/HyukjinKwon/spark/actions/runs/808870999 We should include untracked files (new benchmark result files) to upload so developers download the results. ### Why are the changes needed? So the new benchmark results can be added and uploaded. ### Does this PR introduce _any_ user-facing change? No, dev-only ### How was this patch tested? Tested at: https://github.com/HyukjinKwon/spark/actions/runs/808867285 Closes #32428 from HyukjinKwon/include-new-benchmarks. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
101 lines
3.9 KiB
YAML
101 lines
3.9 KiB
YAML
name: Run benchmarks
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
class:
|
|
description: 'Benchmark class'
|
|
required: true
|
|
default: '*'
|
|
jdk:
|
|
description: 'JDK version: 8 or 11'
|
|
required: true
|
|
default: '8'
|
|
failfast:
|
|
description: 'Failfast: true or false'
|
|
required: true
|
|
default: 'true'
|
|
num-splits:
|
|
description: 'Number of job splits'
|
|
required: true
|
|
default: '1'
|
|
|
|
jobs:
|
|
matrix-gen:
|
|
name: Generate matrix for job splits
|
|
runs-on: ubuntu-20.04
|
|
outputs:
|
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
env:
|
|
SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
|
|
steps:
|
|
- name: Generate matrix
|
|
id: set-matrix
|
|
run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]"
|
|
|
|
benchmark:
|
|
name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
|
|
needs: matrix-gen
|
|
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
|
|
runs-on: ubuntu-20.04
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}}
|
|
env:
|
|
SPARK_BENCHMARK_FAILFAST: ${{ github.event.inputs.failfast }}
|
|
SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
|
|
SPARK_BENCHMARK_CUR_SPLIT: ${{ matrix.split }}
|
|
SPARK_GENERATE_BENCHMARK_FILES: 1
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to get diff files
|
|
with:
|
|
fetch-depth: 0
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
benchmark-coursier-${{ github.event.inputs.jdk }}
|
|
- name: Install Java ${{ github.event.inputs.jdk }}
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: ${{ github.event.inputs.jdk }}
|
|
- name: Run benchmarks
|
|
run: |
|
|
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl test:package
|
|
# Make less noisy
|
|
cp conf/log4j.properties.template conf/log4j.properties
|
|
sed -i 's/log4j.rootCategory=INFO, console/log4j.rootCategory=WARN, console/g' conf/log4j.properties
|
|
# In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
|
|
bin/spark-submit \
|
|
--driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
|
|
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
|
|
"`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
|
|
"${{ github.event.inputs.class }}"
|
|
# To keep the directory structure and file permissions, tar them
|
|
# See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
|
|
echo "Preparing the benchmark results:"
|
|
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only` `git ls-files --others --exclude-standard`
|
|
- name: Upload benchmark results
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: benchmark-results-${{ github.event.inputs.jdk }}-${{ matrix.split }}
|
|
path: benchmark-results-${{ github.event.inputs.jdk }}.tar
|
|
|