diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md index bea8f9fbe4..d6084c7a7c 100644 --- a/R/CRAN_RELEASE.md +++ b/R/CRAN_RELEASE.md @@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control. -Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. +Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`). To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible. diff --git a/R/check-cran.sh b/R/check-cran.sh index c5f042848c..1288e7fc9f 100755 --- a/R/check-cran.sh +++ b/R/check-cran.sh @@ -34,8 +34,9 @@ if [ ! -z "$R_HOME" ] fi R_SCRIPT_PATH="$(dirname $(which R))" fi -echo "USING R_HOME = $R_HOME" +echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}" +# Install the package (this is required for code in vignettes to run when building it later) # Build the latest docs, but not vignettes, which is built with the package next $FWDIR/create-docs.sh @@ -82,4 +83,20 @@ else # This will run tests and/or build vignettes, and require SPARK_HOME SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz fi + +# Install source package to get it to generate vignettes rds files, etc. +if [ -n "$CLEAN_INSTALL" ] +then + echo "Removing lib path and installing from source package" + LIB_DIR="$FWDIR/lib" + rm -rf $LIB_DIR + mkdir -p $LIB_DIR + "$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR + + # Zip the SparkR package so that it can be distributed to worker nodes on YARN + pushd $LIB_DIR > /dev/null + jar cfM "$LIB_DIR/sparkr.zip" SparkR + popd > /dev/null +fi + popd > /dev/null diff --git a/R/install-dev.sh b/R/install-dev.sh index ada6303a72..0f881208bc 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -46,7 +46,7 @@ if [ ! -z "$R_HOME" ] fi R_SCRIPT_PATH="$(dirname $(which R))" fi -echo "USING R_HOME = $R_HOME" +echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}" # Generate Rd files if devtools is installed "$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore index 544d203a6d..f12f8c275a 100644 --- a/R/pkg/.Rbuildignore +++ b/R/pkg/.Rbuildignore @@ -1,5 +1,8 @@ ^.*\.Rproj$ ^\.Rproj\.user$ ^\.lintr$ +^cran-comments\.md$ +^NEWS\.md$ +^README\.Rmd$ ^src-native$ ^html$ diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 981ae12464..0cb3a80a6e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,8 +1,8 @@ Package: SparkR Type: Package -Title: R Frontend for Apache Spark Version: 2.1.0 -Date: 2016-11-06 +Title: R Frontend for Apache Spark +Description: The SparkR package provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "shivaram@cs.berkeley.edu"), person("Xiangrui", "Meng", role = "aut", @@ -10,19 +10,18 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), person("Felix", "Cheung", role = "aut", email = "felixcheung@apache.org"), person(family = "The Apache Software Foundation", role = c("aut", "cph"))) +License: Apache License (== 2.0) URL: http://www.apache.org/ http://spark.apache.org/ BugReports: http://spark.apache.org/contributing.html Depends: R (>= 3.0), methods Suggests: + knitr, + rmarkdown, testthat, e1071, - survival, - knitr, - rmarkdown -Description: The SparkR package provides an R frontend for Apache Spark. -License: Apache License (== 2.0) + survival Collate: 'schema.R' 'generics.R' diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index daee09de88..377f9429ae 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -3,7 +3,7 @@ importFrom("methods", "setGeneric", "setMethod", "setOldClass") importFrom("methods", "is", "new", "signature", "show") importFrom("stats", "gaussian", "setNames") -importFrom("utils", "download.file", "object.size", "packageVersion", "untar") +importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "untar") # Disable native libraries till we figure out how to package it # See SPARKR-7839 diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index aa42750f26..8863ee6cd7 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -150,7 +150,7 @@ if [[ "$1" == "package" ]]; then NAME=$1 FLAGS=$2 ZINC_PORT=$3 - BUILD_PIP_PACKAGE=$4 + BUILD_PACKAGE=$4 cp -r spark spark-$SPARK_VERSION-bin-$NAME cd spark-$SPARK_VERSION-bin-$NAME @@ -172,11 +172,30 @@ if [[ "$1" == "package" ]]; then MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'` - if [ -z "$BUILD_PIP_PACKAGE" ]; then - echo "Creating distribution without PIP package" + if [ -z "$BUILD_PACKAGE" ]; then + echo "Creating distribution without PIP/R package" ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \ -DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log cd .. + elif [[ "$BUILD_PACKAGE" == "withr" ]]; then + echo "Creating distribution with R package" + ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --r $FLAGS \ + -DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log + cd .. + + echo "Copying and signing R source package" + R_DIST_NAME=SparkR_$SPARK_VERSION.tar.gz + cp spark-$SPARK_VERSION-bin-$NAME/R/$R_DIST_NAME . + + echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \ + --output $R_DIST_NAME.asc \ + --detach-sig $R_DIST_NAME + echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ + MD5 $R_DIST_NAME > \ + $R_DIST_NAME.md5 + echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ + SHA512 $R_DIST_NAME > \ + $R_DIST_NAME.sha else echo "Creating distribution with PIP package" ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \ @@ -222,7 +241,7 @@ if [[ "$1" == "package" ]]; then make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" & make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" & make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" & - make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" & + make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" "withr" & wait rm -rf spark-$SPARK_VERSION-bin-*/ diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 49b46fbc3f..fe281bbaa2 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -34,6 +34,7 @@ DISTDIR="$SPARK_HOME/dist" MAKE_TGZ=false MAKE_PIP=false +MAKE_R=false NAME=none MVN="$SPARK_HOME/build/mvn" @@ -41,7 +42,7 @@ function exit_with_usage { echo "make-distribution.sh - tool for making binary distributions of Spark" echo "" echo "usage:" - cl_options="[--name] [--tgz] [--pip] [--mvn ]" + cl_options="[--name] [--tgz] [--pip] [--r] [--mvn ]" echo "make-distribution.sh $cl_options " echo "See Spark's \"Building Spark\" doc for correct Maven options." echo "" @@ -71,6 +72,9 @@ while (( "$#" )); do --pip) MAKE_PIP=true ;; + --r) + MAKE_R=true + ;; --mvn) MVN="$2" shift @@ -208,11 +212,24 @@ cp -r "$SPARK_HOME/data" "$DISTDIR" # Make pip package if [ "$MAKE_PIP" == "true" ]; then echo "Building python distribution package" - cd $SPARK_HOME/python + pushd "$SPARK_HOME/python" > /dev/null python setup.py sdist - cd .. + popd > /dev/null else - echo "Skipping creating pip installable PySpark" + echo "Skipping building python distribution package" +fi + +# Make R package - this is used for both CRAN release and packing R layout into distribution +if [ "$MAKE_R" == "true" ]; then + echo "Building R source package" + pushd "$SPARK_HOME/R" > /dev/null + # Build source package and run full checks + # Install source package to get it to generate vignettes, etc. + # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME + NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh + popd > /dev/null +else + echo "Skipping building R source package" fi # Copy other things