diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 881e11d5ea..009ebe90dd 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -164,7 +164,8 @@ jobs: - name: Install R packages if: contains(matrix.modules, 'sparkr') run: | - sudo apt-get install -y libcurl4-openssl-dev + # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. + sudo apt-get install -y libcurl4-openssl-dev qpdf sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" # Show installed packages in R. sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' diff --git a/dev/run-tests.py b/dev/run-tests.py index 5d8b7a0758..6aae3bdaef 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -610,8 +610,14 @@ def main(): # Install SparkR should_only_test_modules = opts.modules is not None - if not should_only_test_modules: + test_modules = [] + if should_only_test_modules: + str_test_modules = [m.strip() for m in opts.modules.split(",")] + test_modules = [m for m in modules.all_modules if m.name in str_test_modules] + + if not should_only_test_modules or modules.sparkr in test_modules: # If tests modules are specified, we will not run R linter. + # SparkR needs the manual SparkR installation. if which("R"): run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) else: @@ -642,15 +648,11 @@ def main(): "and Hive profile", hive_version, "under environment", test_env) extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version) - changed_modules = None - test_modules = None - changed_files = None + changed_modules = [] + changed_files = [] included_tags = [] excluded_tags = [] if should_only_test_modules: - str_test_modules = [m.strip() for m in opts.modules.split(",")] - test_modules = [m for m in modules.all_modules if m.name in str_test_modules] - # If we're running the tests in Github Actions, attempt to detect and test # only the affected modules. if test_env == "github_actions":