dac6f175a6
### What changes were proposed in this pull request? After merging https://github.com/apache/spark/pull/32439, there is flaky error from the Github action job "Java 11 build with Maven": ``` Error: ## Exception when compiling 473 sources to /home/runner/work/spark/spark/sql/catalyst/target/scala-2.12/classes java.lang.StackOverflowError scala.reflect.internal.Trees.itransform(Trees.scala:1376) scala.reflect.internal.Trees.itransform$(Trees.scala:1374) scala.reflect.internal.SymbolTable.itransform(SymbolTable.scala:28) scala.reflect.internal.SymbolTable.itransform(SymbolTable.scala:28) scala.reflect.api.Trees$Transformer.transform(Trees.scala:2563) scala.tools.nsc.transform.TypingTransformers$TypingTransformer.transform(TypingTransformers.scala:51) ``` We can resolve it by increasing the stack size of JVM to 256M. The container for Github action jobs has 7G memory so this should be fine. ### Why are the changes needed? Fix flaky test failure in Java 11 build test ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Github action test Closes #32521 from gengliangwang/increaseStackSize. Authored-by: Gengliang Wang <ltnwgl@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
561 lines
21 KiB
YAML
561 lines
21 KiB
YAML
name: Build and test
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- '**'
|
|
- '!branch-*.*'
|
|
|
|
jobs:
|
|
# Build: build Spark and run the tests for specified modules.
|
|
build:
|
|
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
|
|
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
|
|
runs-on: ubuntu-20.04
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
java:
|
|
- 8
|
|
hadoop:
|
|
- hadoop3.2
|
|
hive:
|
|
- hive2.3
|
|
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
|
|
# Kinesis tests depends on external Amazon kinesis service.
|
|
# Note that the modules below are from sparktestsupport/modules.py.
|
|
modules:
|
|
- >-
|
|
core, unsafe, kvstore, avro,
|
|
network-common, network-shuffle, repl, launcher,
|
|
examples, sketch, graphx
|
|
- >-
|
|
catalyst, hive-thriftserver
|
|
- >-
|
|
streaming, sql-kafka-0-10, streaming-kafka-0-10,
|
|
mllib-local, mllib,
|
|
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
|
|
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
|
|
included-tags: [""]
|
|
excluded-tags: [""]
|
|
comment: [""]
|
|
include:
|
|
# Hive tests
|
|
- modules: hive
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
included-tags: org.apache.spark.tags.SlowHiveTest
|
|
comment: "- slow tests"
|
|
- modules: hive
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
excluded-tags: org.apache.spark.tags.SlowHiveTest
|
|
comment: "- other tests"
|
|
# SQL tests
|
|
- modules: sql
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
included-tags: org.apache.spark.tags.ExtendedSQLTest
|
|
comment: "- slow tests"
|
|
- modules: sql
|
|
java: 8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
|
|
comment: "- other tests"
|
|
env:
|
|
MODULES_TO_TEST: ${{ matrix.modules }}
|
|
EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
|
|
INCLUDED_TAGS: ${{ matrix.included-tags }}
|
|
HADOOP_PROFILE: ${{ matrix.hadoop }}
|
|
HIVE_PROFILE: ${{ matrix.hive }}
|
|
# GitHub Actions' default miniconda to use in pip packaging test.
|
|
CONDA_PREFIX: /usr/share/miniconda
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
repository: apache/spark
|
|
ref: master
|
|
- name: Sync the current branch with the latest in Apache Spark
|
|
if: github.repository != 'apache/spark'
|
|
id: sync-branch
|
|
run: |
|
|
apache_spark_ref=`git rev-parse HEAD`
|
|
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
|
|
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
|
|
- name: Install Java ${{ matrix.java }}
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: ${{ matrix.java }}
|
|
- name: Install Python 3.8
|
|
uses: actions/setup-python@v2
|
|
# We should install one Python that is higher then 3+ for SQL and Yarn because:
|
|
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
|
|
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
|
|
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
|
|
with:
|
|
python-version: 3.8
|
|
architecture: x64
|
|
- name: Install Python packages (Python 3.8)
|
|
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
|
|
run: |
|
|
python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
|
|
python3.8 -m pip list
|
|
# Run the tests.
|
|
- name: Run tests
|
|
run: |
|
|
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
|
|
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
|
|
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
|
|
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
|
|
path: "**/target/unit-tests.log"
|
|
|
|
pyspark:
|
|
name: "Build modules: ${{ matrix.modules }}"
|
|
runs-on: ubuntu-20.04
|
|
container:
|
|
image: dongjoon/apache-spark-github-action-image:20201025
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
modules:
|
|
- >-
|
|
pyspark-sql, pyspark-mllib, pyspark-resource
|
|
- >-
|
|
pyspark-core, pyspark-streaming, pyspark-ml
|
|
- >-
|
|
pyspark-pandas
|
|
env:
|
|
MODULES_TO_TEST: ${{ matrix.modules }}
|
|
HADOOP_PROFILE: hadoop3.2
|
|
HIVE_PROFILE: hive2.3
|
|
# GitHub Actions' default miniconda to use in pip packaging test.
|
|
CONDA_PREFIX: /usr/share/miniconda
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
repository: apache/spark
|
|
ref: master
|
|
- name: Sync the current branch with the latest in Apache Spark
|
|
if: github.repository != 'apache/spark'
|
|
id: sync-branch
|
|
run: |
|
|
apache_spark_ref=`git rev-parse HEAD`
|
|
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
|
|
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
pyspark-coursier-
|
|
- name: Install Python 3.6
|
|
uses: actions/setup-python@v2
|
|
with:
|
|
python-version: 3.6
|
|
architecture: x64
|
|
# This step takes much less time (~30s) than other Python versions so it is not included
|
|
# in the Docker image being used. There is also a technical issue to install Python 3.6 on
|
|
# Ubuntu 20.04. See also SPARK-33162.
|
|
- name: Install Python packages (Python 3.6)
|
|
run: |
|
|
python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
|
|
python3.6 -m pip list
|
|
# Run the tests.
|
|
- name: Run tests
|
|
run: |
|
|
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
|
|
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
|
|
path: "**/target/unit-tests.log"
|
|
|
|
sparkr:
|
|
name: "Build modules: sparkr"
|
|
runs-on: ubuntu-20.04
|
|
container:
|
|
image: dongjoon/apache-spark-github-action-image:20201025
|
|
env:
|
|
HADOOP_PROFILE: hadoop3.2
|
|
HIVE_PROFILE: hive2.3
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
repository: apache/spark
|
|
ref: master
|
|
- name: Sync the current branch with the latest in Apache Spark
|
|
if: github.repository != 'apache/spark'
|
|
id: sync-branch
|
|
run: |
|
|
apache_spark_ref=`git rev-parse HEAD`
|
|
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
|
|
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
|
|
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
sparkr-coursier-
|
|
- name: Run tests
|
|
run: |
|
|
# The followings are also used by `r-lib/actions/setup-r` to avoid
|
|
# R issues at docker environment
|
|
export TZ=UTC
|
|
export _R_CHECK_SYSTEM_CLOCK_=FALSE
|
|
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
|
|
./dev/run-tests --parallelism 2 --modules sparkr
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-sparkr--8-hadoop3.2-hive2.3
|
|
path: "**/target/test-reports/*.xml"
|
|
|
|
# Static analysis, and documentation build
|
|
lint:
|
|
name: Linters, licenses, dependencies and documentation generation
|
|
runs-on: ubuntu-20.04
|
|
env:
|
|
LC_ALL: C.UTF-8
|
|
LANG: C.UTF-8
|
|
container:
|
|
image: dongjoon/apache-spark-github-action-image:20201025
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
docs-coursier-
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: docs-maven-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
docs-maven-
|
|
- name: Install Python 3.6
|
|
uses: actions/setup-python@v2
|
|
with:
|
|
python-version: 3.6
|
|
architecture: x64
|
|
- name: Install Python linter dependencies
|
|
run: |
|
|
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
|
|
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
|
|
# Jinja2 3.0.0+ causes error when building with Sphinx.
|
|
# See also https://issues.apache.org/jira/browse/SPARK-35375.
|
|
python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc 'jinja2<3.0.0'
|
|
- name: Install R linter dependencies and SparkR
|
|
run: |
|
|
apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
|
|
Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
|
|
Rscript -e "devtools::install_github('jimhester/lintr@v2.0.1')"
|
|
./R/install-dev.sh
|
|
- name: Instll JavaScript linter dependencies
|
|
run: |
|
|
apt update
|
|
apt-get install -y nodejs npm
|
|
- name: Install dependencies for documentation generation
|
|
run: |
|
|
# pandoc is required to generate PySpark APIs as well in nbsphinx.
|
|
apt-get install -y libcurl4-openssl-dev pandoc
|
|
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
|
|
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
|
|
# Jinja2 3.0.0+ causes error when building with Sphinx.
|
|
# See also https://issues.apache.org/jira/browse/SPARK-35375.
|
|
python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
|
|
apt-get update -y
|
|
apt-get install -y ruby ruby-dev
|
|
Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
|
|
gem install bundler
|
|
cd docs
|
|
bundle install
|
|
- name: Scala linter
|
|
run: ./dev/lint-scala
|
|
- name: Java linter
|
|
run: ./dev/lint-java
|
|
- name: Python linter
|
|
run: ./dev/lint-python
|
|
- name: R linter
|
|
run: ./dev/lint-r
|
|
- name: JS linter
|
|
run: ./dev/lint-js
|
|
- name: License test
|
|
run: ./dev/check-license
|
|
- name: Dependencies test
|
|
run: ./dev/test-dependencies.sh
|
|
- name: Run documentation build
|
|
run: |
|
|
cd docs
|
|
bundle exec jekyll build
|
|
|
|
java-11:
|
|
name: Java 11 build with Maven
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: java11-maven-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
java11-maven-
|
|
- name: Install Java 11
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 11
|
|
- name: Build with Maven
|
|
run: |
|
|
export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
|
|
export MAVEN_CLI_OPTS="--no-transfer-progress"
|
|
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
|
|
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
|
|
rm -rf ~/.m2/repository/org/apache/spark
|
|
|
|
scala-213:
|
|
name: Scala 2.13 build with SBT
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
scala-213-coursier-
|
|
- name: Install Java 8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 8
|
|
- name: Build with SBT
|
|
run: |
|
|
./dev/change-scala-version.sh 2.13
|
|
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
|
|
|
|
hadoop-2:
|
|
name: Hadoop 2 build with SBT
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
hadoop-2-coursier-
|
|
- name: Install Java 8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 8
|
|
- name: Build with SBT
|
|
run: |
|
|
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
|
|
|
|
tpcds-1g:
|
|
name: Run TPC-DS queries with SF=1
|
|
runs-on: ubuntu-20.04
|
|
env:
|
|
SPARK_LOCAL_IP: localhost
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Scala, SBT and Maven
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/scala-*
|
|
build/*.jar
|
|
~/.sbt
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Coursier local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.cache/coursier
|
|
key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
tpcds-coursier-
|
|
- name: Install Java 8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 8
|
|
- name: Cache TPC-DS generated data
|
|
id: cache-tpcds-sf-1
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ./tpcds-sf-1
|
|
key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
|
|
- name: Checkout tpcds-kit repository
|
|
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: databricks/tpcds-kit
|
|
path: ./tpcds-kit
|
|
- name: Build tpcds-kit
|
|
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
|
|
run: cd tpcds-kit/tools && make OS=LINUX
|
|
- name: Generate TPC-DS (SF=1) table data
|
|
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
|
|
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
|
|
- name: Run TPC-DS queries
|
|
run: |
|
|
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-tpcds--8-hadoop3.2-hive2.3
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
|
|
path: "**/target/unit-tests.log"
|