diff --git a/.gitignore b/.gitignore index 60a12e3d7b..198fdee39b 100644 --- a/.gitignore +++ b/.gitignore @@ -18,8 +18,6 @@ .idea_modules/ .project .pydevproject -.python-version -.ruby-version .scala_dependencies .settings /lib/ diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh index cda21ebdcd..694a87bf78 100755 --- a/dev/create-release/do-release-docker.sh +++ b/dev/create-release/do-release-docker.sh @@ -96,7 +96,7 @@ fcreate_secure "$GPG_KEY_FILE" $GPG --export-secret-key --armor "$GPG_KEY" > "$GPG_KEY_FILE" run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \ - docker build --no-cache -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm" + docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm" # Write the release information to a file with environment variables to be used when running the # image. diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index d310aaf988..63451687ee 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -20,9 +20,9 @@ # Includes: # * Java 8 # * Ivy -# * Python 3.7 -# * Ruby 2.7 +# * Python (2.7.15/3.6.7) # * R-base/R-base-dev (3.6.1) +# * Ruby 2.3 build utilities FROM ubuntu:18.04 @@ -33,11 +33,15 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true # These arguments are just for reuse and not really meant to be customized. ARG APT_INSTALL="apt-get install --no-install-recommends -y" -ARG PIP_PKGS="sphinx==2.3.1 mkdocs==1.0.4 numpy==1.18.1" -ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0" +ARG BASE_PIP_PKGS="setuptools wheel" +ARG PIP_PKGS="pyopenssl numpy sphinx" # Install extra needed repos and refresh. # - CRAN repo +# - Ruby repo (for doc generation) +# +# This is all in a single "RUN" command so that if anything changes, "apt update" is run to fetch +# the most current package versions (instead of potentially using old versions cached by docker). RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' >> /etc/apt/sources.list && \ gpg --keyserver keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ @@ -46,43 +50,36 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ rm -rf /var/lib/apt/lists/* && \ apt-get clean && \ apt-get update && \ + $APT_INSTALL software-properties-common && \ + apt-add-repository -y ppa:brightbox/ruby-ng && \ + apt-get update && \ # Install openjdk 8. $APT_INSTALL openjdk-8-jdk && \ update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && \ # Install build / source control tools $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \ - pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev - -ENV PATH "$PATH:/root/.pyenv/bin:/root/.pyenv/shims" -RUN curl -L https://github.com/pyenv/pyenv-installer/raw/dd3f7d0914c5b4a416ca71ffabdf2954f2021596/bin/pyenv-installer | bash -RUN $APT_INSTALL libbz2-dev libreadline-dev libsqlite3-dev -RUN pyenv install 3.7.6 -RUN pyenv global 3.7.6 -RUN python --version -RUN pip install --upgrade pip -RUN pip --version -RUN pip install $PIP_PKGS - -ENV PATH "$PATH:/root/.rbenv/bin:/root/.rbenv/shims" -RUN curl -fsSL https://github.com/rbenv/rbenv-installer/raw/108c12307621a0aa06f19799641848dde1987deb/bin/rbenv-installer | bash -RUN rbenv install 2.7.0 -RUN rbenv global 2.7.0 -RUN ruby --version -RUN $APT_INSTALL g++ -RUN gem --version -RUN gem install --no-document $GEM_PKGS - -RUN \ + pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \ curl -sL https://deb.nodesource.com/setup_11.x | bash && \ - $APT_INSTALL nodejs - -# Install R packages and dependencies used when building. -# R depends on pandoc*, libssl (which are installed above). -RUN \ + $APT_INSTALL nodejs && \ + # Install needed python packages. Use pip for installing packages (for consistency). + $APT_INSTALL libpython3-dev python3-pip && \ + # Change default python version to python3. + update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ + update-alternatives --set python /usr/bin/python3.6 && \ + pip3 install $BASE_PIP_PKGS && \ + pip3 install $PIP_PKGS && \ + # Install R packages and dependencies used when building. + # R depends on pandoc*, libssl (which are installed above). $APT_INSTALL r-base r-base-dev && \ $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \ Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \ - Rscript -e "devtools::install_github('jimhester/lintr')" + Rscript -e "devtools::install_github('jimhester/lintr')" && \ + # Install tools needed to build the documentation. + $APT_INSTALL ruby2.3 ruby2.3-dev mkdocs && \ + gem install jekyll --no-rdoc --no-ri -v 3.8.6 && \ + gem install jekyll-redirect-from -v 0.15.0 && \ + gem install rouge WORKDIR /opt/spark-rm/output diff --git a/docs/README.md b/docs/README.md index c16f67c2c8..22039871cf 100644 --- a/docs/README.md +++ b/docs/README.md @@ -31,49 +31,19 @@ whichever version of Spark you currently have checked out of revision control. The Spark documentation build uses a number of tools to build HTML docs and API docs in Scala, Java, Python, R and SQL. -You need to have Ruby 2 (preferably Ruby 2.6+) and Python 3 (preferably Python 3.7+) installed. - -You'll also need to install the following libraries: +You need to have [Ruby](https://www.ruby-lang.org/en/documentation/installation/) and +[Python](https://docs.python.org/2/using/unix.html#getting-and-installing-the-latest-version-of-python) +installed. Also install the following libraries: ```sh -gem install jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0 +$ sudo gem install jekyll jekyll-redirect-from rouge ``` -### Using rbenv and pyenv - -A handy way to install and manage various versions of Ruby and Python is with [`rbenv`] and [`pyenv`]. - -[`rbenv`]: https://github.com/rbenv/rbenv -[`pyenv`]: https://github.com/pyenv/pyenv - -On macOS you can install them with Homebrew: - -```sh -brew install rbenv pyenv -``` - -To activate them, you'll need to run these commands or add them to the end of your `.bash_profile`: - -```sh -eval "$(rbenv init -)" -eval "$(pyenv init -)" -``` - -You can now use them to install specific versions of Ruby and Python and associate them with -the Spark home directory. Whenever you navigate to this directory or any of its subdirectories, these versions of Ruby and Python will be automatically activated. - -```sh -rbenv install 2.7.0 -pyenv install 3.7.6 - -cd /path/to/spark/root -rbenv local 2.7.0 -pyenv local 3.7.6 -``` +Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0. ### R Documentation -If you'd like to generate R documentation, you'll need to install R, [install Pandoc](https://pandoc.org/installing.html), +If you'd like to generate R documentation, you'll need to [install Pandoc](https://pandoc.org/installing.html) and install these libraries: ```sh @@ -88,7 +58,7 @@ Note: Other versions of roxygen2 might work in SparkR documentation generation b To generate API docs for any language, you'll need to install these libraries: ```sh -pip install sphinx==2.3.1 mkdocs==1.0.4 numpy==1.18.1 +$ sudo pip install sphinx mkdocs numpy ``` ## Generating the Documentation HTML