513b6f5af2
### What changes were proposed in this pull request? SPARK-32926 added a build test to GitHub Action for Scala 2.13 but it's only with Maven. As SPARK-32873 reported, some compilation error happens only with SBT so I think we need to add another build test to GitHub Action for SBT. Unfortunately, we don't have abundant resources for GitHub Actions so instead of just adding the new SBT job, let's replace the existing Maven job with the new SBT job for Scala 2.13. ### Why are the changes needed? To ensure build test passes even with SBT for Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GitHub Actions' job. Closes #29958 from sarutak/add-sbt-job-for-scala-2.13. Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
319 lines
12 KiB
YAML
319 lines
12 KiB
YAML
name: Build and test
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- master
|
|
pull_request:
|
|
branches:
|
|
- master
|
|
workflow_dispatch:
|
|
inputs:
|
|
target:
|
|
description: 'Target branch to run'
|
|
required: true
|
|
|
|
jobs:
|
|
# Build: build Spark and run the tests for specified modules.
|
|
build:
|
|
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
|
|
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
|
|
runs-on: ubuntu-20.04
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
java:
|
|
- 1.8
|
|
hadoop:
|
|
- hadoop3.2
|
|
hive:
|
|
- hive2.3
|
|
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
|
|
# Kinesis tests depends on external Amazon kinesis service.
|
|
# Note that the modules below are from sparktestsupport/modules.py.
|
|
modules:
|
|
- >-
|
|
core, unsafe, kvstore, avro,
|
|
network-common, network-shuffle, repl, launcher,
|
|
examples, sketch, graphx
|
|
- >-
|
|
catalyst, hive-thriftserver
|
|
- >-
|
|
streaming, sql-kafka-0-10, streaming-kafka-0-10,
|
|
mllib-local, mllib,
|
|
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
|
|
- >-
|
|
pyspark-sql, pyspark-mllib, pyspark-resource
|
|
- >-
|
|
pyspark-core, pyspark-streaming, pyspark-ml
|
|
- >-
|
|
sparkr
|
|
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
|
|
included-tags: [""]
|
|
excluded-tags: [""]
|
|
comment: [""]
|
|
include:
|
|
# Hive tests
|
|
- modules: hive
|
|
java: 1.8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
included-tags: org.apache.spark.tags.SlowHiveTest
|
|
comment: "- slow tests"
|
|
- modules: hive
|
|
java: 1.8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
excluded-tags: org.apache.spark.tags.SlowHiveTest
|
|
comment: "- other tests"
|
|
# SQL tests
|
|
- modules: sql
|
|
java: 1.8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
included-tags: org.apache.spark.tags.ExtendedSQLTest
|
|
comment: "- slow tests"
|
|
- modules: sql
|
|
java: 1.8
|
|
hadoop: hadoop3.2
|
|
hive: hive2.3
|
|
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
|
|
comment: "- other tests"
|
|
env:
|
|
MODULES_TO_TEST: ${{ matrix.modules }}
|
|
EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
|
|
INCLUDED_TAGS: ${{ matrix.included-tags }}
|
|
HADOOP_PROFILE: ${{ matrix.hadoop }}
|
|
HIVE_PROFILE: ${{ matrix.hive }}
|
|
# GitHub Actions' default miniconda to use in pip packaging test.
|
|
CONDA_PREFIX: /usr/share/miniconda
|
|
GITHUB_PREV_SHA: ${{ github.event.before }}
|
|
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
# In order to fetch changed files
|
|
with:
|
|
fetch-depth: 0
|
|
- name: Merge dispatched input branch
|
|
if: ${{ github.event.inputs.target != '' }}
|
|
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
|
|
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
|
|
- name: Cache Scala, SBT, Maven and Zinc
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: |
|
|
build/apache-maven-*
|
|
build/zinc-*
|
|
build/scala-*
|
|
build/*.jar
|
|
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
|
|
restore-keys: |
|
|
build-
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
${{ matrix.java }}-${{ matrix.hadoop }}-maven-
|
|
- name: Cache Ivy local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.ivy2/cache
|
|
key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
|
|
- name: Install JDK ${{ matrix.java }}
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: ${{ matrix.java }}
|
|
# PySpark
|
|
- name: Install PyPy3
|
|
# Note that order of Python installations here matters because default python3 is
|
|
# overridden by pypy3.
|
|
uses: actions/setup-python@v2
|
|
if: contains(matrix.modules, 'pyspark')
|
|
with:
|
|
python-version: pypy3
|
|
architecture: x64
|
|
- name: Install Python 3.6
|
|
uses: actions/setup-python@v2
|
|
if: contains(matrix.modules, 'pyspark')
|
|
with:
|
|
python-version: 3.6
|
|
architecture: x64
|
|
- name: Install Python 3.8
|
|
uses: actions/setup-python@v2
|
|
# We should install one Python that is higher then 3+ for SQL and Yarn because:
|
|
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
|
|
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
|
|
if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
|
|
with:
|
|
python-version: 3.8
|
|
architecture: x64
|
|
- name: Install Python packages (Python 3.6 and PyPy3)
|
|
if: contains(matrix.modules, 'pyspark')
|
|
# PyArrow is not supported in PyPy yet, see ARROW-2651.
|
|
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
|
|
run: |
|
|
python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
|
|
python3.6 -m pip list
|
|
# PyPy does not have xmlrunner
|
|
pypy3 -m pip install numpy pandas
|
|
pypy3 -m pip list
|
|
- name: Install Python packages (Python 3.8)
|
|
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
|
|
run: |
|
|
python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
|
|
python3.8 -m pip list
|
|
# SparkR
|
|
- name: Install R 4.0
|
|
uses: r-lib/actions/setup-r@v1
|
|
if: contains(matrix.modules, 'sparkr')
|
|
with:
|
|
r-version: 4.0
|
|
- name: Install R packages
|
|
if: contains(matrix.modules, 'sparkr')
|
|
run: |
|
|
# qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
|
|
sudo apt-get install -y libcurl4-openssl-dev qpdf
|
|
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
|
|
# Show installed packages in R.
|
|
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
|
|
# Run the tests.
|
|
- name: Run tests
|
|
run: |
|
|
# Hive tests become flaky when running in parallel as it's too intensive.
|
|
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
|
|
mkdir -p ~/.m2
|
|
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
|
|
rm -rf ~/.m2/repository/org/apache/spark
|
|
- name: Upload test results to report
|
|
if: always()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
|
|
path: "**/target/test-reports/*.xml"
|
|
- name: Upload unit tests log files
|
|
if: failure()
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
|
|
path: "**/target/unit-tests.log"
|
|
|
|
# Static analysis, and documentation build
|
|
lint:
|
|
name: Linters, licenses, dependencies and documentation generation
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
docs-maven-
|
|
- name: Install JDK 1.8
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 1.8
|
|
- name: Install Python 3.6
|
|
uses: actions/setup-python@v2
|
|
with:
|
|
python-version: 3.6
|
|
architecture: x64
|
|
- name: Install Python linter dependencies
|
|
run: |
|
|
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
|
|
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
|
|
pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx
|
|
- name: Install R 4.0
|
|
uses: r-lib/actions/setup-r@v1
|
|
with:
|
|
r-version: 4.0
|
|
- name: Install R linter dependencies and SparkR
|
|
run: |
|
|
sudo apt-get install -y libcurl4-openssl-dev
|
|
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
|
|
sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
|
|
./R/install-dev.sh
|
|
- name: Install Ruby 2.7 for documentation generation
|
|
uses: actions/setup-ruby@v1
|
|
with:
|
|
ruby-version: 2.7
|
|
- name: Install dependencies for documentation generation
|
|
run: |
|
|
# pandoc is required to generate PySpark APIs as well in nbsphinx.
|
|
sudo apt-get install -y libcurl4-openssl-dev pandoc
|
|
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
|
|
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
|
|
pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx
|
|
gem install jekyll jekyll-redirect-from rouge
|
|
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
|
|
- name: Scala linter
|
|
run: ./dev/lint-scala
|
|
- name: Java linter
|
|
run: ./dev/lint-java
|
|
- name: Python linter
|
|
run: ./dev/lint-python
|
|
- name: R linter
|
|
run: ./dev/lint-r
|
|
- name: License test
|
|
run: ./dev/check-license
|
|
- name: Dependencies test
|
|
run: ./dev/test-dependencies.sh
|
|
- name: Run documentation build
|
|
run: |
|
|
cd docs
|
|
jekyll build
|
|
|
|
java11:
|
|
name: Java 11 build
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Maven local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.m2/repository
|
|
key: java11-maven-${{ hashFiles('**/pom.xml') }}
|
|
restore-keys: |
|
|
java11-maven-
|
|
- name: Install Java 11
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 11
|
|
- name: Build with Maven
|
|
run: |
|
|
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
|
|
export MAVEN_CLI_OPTS="--no-transfer-progress"
|
|
mkdir -p ~/.m2
|
|
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
|
|
rm -rf ~/.m2/repository/org/apache/spark
|
|
|
|
scala-213:
|
|
name: Scala 2.13 build
|
|
runs-on: ubuntu-20.04
|
|
steps:
|
|
- name: Checkout Spark repository
|
|
uses: actions/checkout@v2
|
|
- name: Cache Ivy local repository
|
|
uses: actions/cache@v2
|
|
with:
|
|
path: ~/.ivy2/cache
|
|
key: scala-213-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
|
|
restore-keys: |
|
|
scala-213-ivy-
|
|
- name: Install Java 11
|
|
uses: actions/setup-java@v1
|
|
with:
|
|
java-version: 11
|
|
- name: Build with SBT
|
|
run: |
|
|
./dev/change-scala-version.sh 2.13
|
|
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
|