963f434ed5
### What changes were proposed in this pull request?
There is an issue when syncing to the Apache master branch, see also https://github.com/apache/spark/pull/32168:
```
From https://github.com/yaooqinn/spark
* branch SPARK-35044 -> FETCH_HEAD
fatal: Not possible to fast-forward, aborting.
Error: Process completed with exit code 128.
```
This is because we use `--ff-only` option so it assumes that the fork is always based on the latest master branch.
We should make it less strict.
This PR proposes to use the same command when we merge PRs:
c8f56eb7bb/dev/merge_spark_pr.py (L127)
### Why are the changes needed?
To unblock PR testing broken.
### Does this PR introduce _any_ user-facing change?
No, dev-only.
### How was this patch tested?
Locally tested
Co-authored-by: Kent Yao <yaooqinnhotmail.com>
Closes #32168
Closes #32182 from Yikun/SPARK-rm-fast-forward.
Lead-authored-by: Yikun Jiang <yikunkero@gmail.com>
Co-authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
546 lines
20 KiB
YAML
546 lines
20 KiB
YAML
name: Build and test
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- '**'
|
|
- '!branch-*.*'
|
|
|
|
jobs:
|
|
# Build: build Spark and run the tests for specified modules.
|
|
build:
|
|
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
|
|
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
|
|
runs-on: ubuntu-20.04
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
java:
|
|
- 8
|
|
hadoop:
|
|
- hadoop3.2
|
|
hive:
|
|
- hive2.3
|
|
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
|
|
# Kinesis tests depends on external Amazon kinesis service.
|
|
# Note that the modules below are from sparktestsupport/modules.py.
|
|
modules:
|
|
- >-
|
|
core, unsafe, kvstore, avro,
|
|
network-common, network-shuffle, repl, launcher,
|
|
examples, sketch, graphx
|
|
- >-
|
|
catalyst, hive-thriftserver
|
|
- >-
|
|
streaming, sql-kafka-0-10, streaming-kafka-0-10,
|
|
mllib-local, mllib,
|
|
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
|
|
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
|
|
included-tags: [""]
|
|
excluded-tags: [""]
|
|
comment: [""]
|
|
include:
|
|
# Hive tests
|
|
- modules: hive
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
included-tags: org.apache.spark.tags.SlowHiveTest
|
|
comment: "- slow tests"
|
|
- modules: hive
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
excluded-tags: org.apache.spark.tags.SlowHiveTest
|
|
comment: "- other tests"
|
|
# SQL tests
|
|
- modules: sql
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
included-tags: org.apache.spark.tags.ExtendedSQLTest
|
|
comment: "- slow tests"
|
|
- modules: sql
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
|
|
comment: "- other tests"
|
|
env:
|
|
MODULES_TO_TEST: ${{ matrix.modules }}
|
|
EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
|
|
INCLUDED_TAGS: ${{ matrix.included-tags }}
|
|
HADOOP_PROFILE: ${{ matrix.hadoop }}
|
|
HIVE_PROFILE: ${{ matrix.hive }}
|
|
# GitHub Actions' default miniconda to use in pip packaging test.
|
|
CONDA_PREFIX: /usr/share/miniconda
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
repository: apache/spark
|
|
ref: master
|
|
- name: Sync the current branch with the latest in Apache Spark
|
|
if: github.repository != 'apache/spark'
|
|
id: sync-branch
|
|
run: |
|
|
apache_spark_ref=`git rev-parse HEAD`
|
|
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
|
|
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
|
|
- name: Install Java ${{ matrix.java }}
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: ${{ matrix.java }}
|
|
- name: Install Python 3.8
|
|
uses: actions/setup-python@v2
|
|
# We should install one Python that is higher then 3+ for SQL and Yarn because:
|
|
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
|
|
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
|
|
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
|
|
with:
|
|
python-version: 3.8
|
|
architecture: x64
|
|
- name: Install Python packages (Python 3.8)
|
|
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
|
|
run: |
|
|
python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
|
|
python3.8 -m pip list
|
|
# Run the tests.
|
|
- name: Run tests
|
|
run: |
|
|
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
|
|
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
|
|
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
|
|
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
|
|
path: "**/target/unit-tests.log"
|
|
|
|
pyspark:
|
|
name: "Build modules: ${{ matrix.modules }}"
|
|
runs-on: ubuntu-20.04
|
|
container:
|
|
image: dongjoon/apache-spark-github-action-image:20201025
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
modules:
|
|
- >-
|
|
pyspark-sql, pyspark-mllib, pyspark-resource
|
|
- >-
|
|
pyspark-core, pyspark-streaming, pyspark-ml
|
|
- >-
|
|
pyspark-pandas
|
|
env:
|
|
MODULES_TO_TEST: ${{ matrix.modules }}
|
|
HADOOP_PROFILE: hadoop3.2
|
|
HIVE_PROFILE: hive2.3
|
|
# GitHub Actions' default miniconda to use in pip packaging test.
|
|
CONDA_PREFIX: /usr/share/miniconda
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
repository: apache/spark
|
|
ref: master
|
|
- name: Sync the current branch with the latest in Apache Spark
|
|
if: github.repository != 'apache/spark'
|
|
id: sync-branch
|
|
run: |
|
|
apache_spark_ref=`git rev-parse HEAD`
|
|
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
|
|
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
pyspark-coursier-
|
|
- name: Install Python 3.6
|
|
uses: actions/setup-python@v2
|
|
with:
|
|
python-version: 3.6
|
|
architecture: x64
|
|
# This step takes much less time (~30s) than other Python versions so it is not included
|
|
# in the Docker image being used. There is also a technical issue to install Python 3.6 on
|
|
# Ubuntu 20.04. See also SPARK-33162.
|
|
- name: Install Python packages (Python 3.6)
|
|
run: |
|
|
python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
|
|
python3.6 -m pip list
|
|
# Run the tests.
|
|
- name: Run tests
|
|
run: |
|
|
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
|
|
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
|
|
path: "**/target/unit-tests.log"
|
|
|
|
sparkr:
|
|
name: "Build modules: sparkr"
|
|
runs-on: ubuntu-20.04
|
|
container:
|
|
image: dongjoon/apache-spark-github-action-image:20201025
|
|
env:
|
|
HADOOP_PROFILE: hadoop3.2
|
|
HIVE_PROFILE: hive2.3
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
repository: apache/spark
|
|
ref: master
|
|
- name: Sync the current branch with the latest in Apache Spark
|
|
if: github.repository != 'apache/spark'
|
|
id: sync-branch
|
|
run: |
|
|
apache_spark_ref=`git rev-parse HEAD`
|
|
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
|
|
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
sparkr-coursier-
|
|
- name: Run tests
|
|
run: |
|
|
# The followings are also used by `r-lib/actions/setup-r` to avoid
|
|
# R issues at docker environment
|
|
export TZ=UTC
|
|
export _R_CHECK_SYSTEM_CLOCK_=FALSE
|
|
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
|
|
./dev/run-tests --parallelism 2 --modules sparkr
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-sparkr--8-hadoop3.2-hive2.3
|
|
path: "**/target/test-reports/*.xml"
|
|
|
|
# Static analysis, and documentation build
|
|
lint:
|
|
name: Linters, licenses, dependencies and documentation generation
|
|
runs-on: ubuntu-20.04
|
|
env:
|
|
LC_ALL: C.UTF-8
|
|
LANG: C.UTF-8
|
|
container:
|
|
image: dongjoon/apache-spark-github-action-image:20201025
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
docs-coursier-
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: docs-maven-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
docs-maven-
|
|
- name: Install Python 3.6
|
|
uses: actions/setup-python@v2
|
|
with:
|
|
python-version: 3.6
|
|
architecture: x64
|
|
- name: Install Python linter dependencies
|
|
run: |
|
|
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
|
|
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
|
|
python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
|
|
- name: Install R linter dependencies and SparkR
|
|
run: |
|
|
apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
|
|
Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
|
|
Rscript -e "devtools::install_github('jimhester/lintr@v2.0.1')"
|
|
./R/install-dev.sh
|
|
- name: Install dependencies for documentation generation
|
|
run: |
|
|
# pandoc is required to generate PySpark APIs as well in nbsphinx.
|
|
apt-get install -y libcurl4-openssl-dev pandoc
|
|
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
|
|
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
|
|
python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
|
|
apt-get update -y
|
|
apt-get install -y ruby ruby-dev
|
|
Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
|
|
gem install bundler
|
|
cd docs
|
|
bundle install
|
|
- name: Scala linter
|
|
run: ./dev/lint-scala
|
|
- name: Java linter
|
|
run: ./dev/lint-java
|
|
- name: Python linter
|
|
run: ./dev/lint-python
|
|
- name: R linter
|
|
run: ./dev/lint-r
|
|
- name: License test
|
|
run: ./dev/check-license
|
|
- name: Dependencies test
|
|
run: ./dev/test-dependencies.sh
|
|
- name: Run documentation build
|
|
run: |
|
|
cd docs
|
|
bundle exec jekyll build
|
|
|
|
java-11:
|
|
name: Java 11 build with Maven
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: java11-maven-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
java11-maven-
|
|
- name: Install Java 11
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 11
|
|
- name: Build with Maven
|
|
run: |
|
|
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
|
|
export MAVEN_CLI_OPTS="--no-transfer-progress"
|
|
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
|
|
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
|
|
rm -rf ~/.m2/repository/org/apache/spark
|
|
|
|
scala-213:
|
|
name: Scala 2.13 build with SBT
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
scala-213-coursier-
|
|
- name: Install Java 8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 8
|
|
- name: Build with SBT
|
|
run: |
|
|
./dev/change-scala-version.sh 2.13
|
|
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
|
|
|
|
hadoop-2:
|
|
name: Hadoop 2 build with SBT
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
hadoop-2-coursier-
|
|
- name: Install Java 8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 8
|
|
- name: Build with SBT
|
|
run: |
|
|
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
|
|
|
|
tpcds-1g:
|
|
name: Run TPC-DS queries with SF=1
|
|
runs-on: ubuntu-20.04
|
|
env:
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache TPC-DS generated data
|
|
id: cache-tpcds-sf-1
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ./tpcds-sf-1
|
|
key: tpcds-556111e35d400f56cb0625dc16e9063d54628320
|
|
- name: Checkout TPC-DS (SF=1) generated data repository
|
|
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: maropu/spark-tpcds-sf-1
|
|
ref: 556111e35d400f56cb0625dc16e9063d54628320
|
|
path: ./tpcds-sf-1
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
tpcds-coursier-
|
|
- name: Install Java 8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 8
|
|
- name: Run TPC-DS queries
|
|
run: |
|
|
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-tpcds--8-hadoop3.2-hive2.3
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
|
|
path: "**/target/unit-tests.log"
|