diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 2011104a19..4657404ace 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -342,8 +342,10 @@ jobs: python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc apt-get update -y apt-get install -y ruby ruby-dev - gem install jekyll jekyll-redirect-from rouge Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" + gem install bundler + cd docs + bundle install - name: Scala linter run: ./dev/lint-scala - name: Java linter @@ -361,7 +363,7 @@ jobs: cd docs export LC_ALL=C.UTF-8 export LANG=C.UTF-8 - jekyll build + bundle exec jekyll build java-11: name: Java 11 build with Maven diff --git a/.gitignore b/.gitignore index 9c145fba1b..917eac1e6c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ dev/pr-deps/ dist/ docs/_site/ docs/api +docs/.local_ruby_bundle sql/docs sql/site lib_managed/ diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh index 8f53f4a4e1..19a5345e65 100755 --- a/dev/create-release/do-release-docker.sh +++ b/dev/create-release/do-release-docker.sh @@ -91,6 +91,11 @@ for f in "$SELF"/*; do fi done +# Add the fallback version of Gemfile, Gemfile.lock and .bundle/config to the local directory. +cp "$SELF/../../docs/Gemfile" "$WORKDIR" +cp "$SELF/../../docs/Gemfile.lock" "$WORKDIR" +cp -r "$SELF/../../docs/.bundle" "$WORKDIR" + GPG_KEY_FILE="$WORKDIR/gpg.key" fcreate_secure "$GPG_KEY_FILE" $GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE" diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index f657c0fb16..a39ea6e82b 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -333,7 +333,13 @@ if [[ "$1" == "docs" ]]; then echo "Building Spark docs" cd docs # TODO: Make configurable to add this: PRODUCTION=1 - PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build + if [ ! -f "Gemfile" ]; then + cp "$SELF/Gemfile" . + cp "$SELF/Gemfile.lock" . + cp -r "$SELF/.bundle" . + fi + bundle install + PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" bundle exec jekyll build cd .. cd .. diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 8735d1fd23..2751f3a180 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -41,7 +41,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y" # See also https://github.com/sphinx-doc/sphinx/issues/7551. # We should use the latest Sphinx version once this is fixed. ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0" -ARG GEM_PKGS="jekyll:4.2.0 jekyll-redirect-from:0.16.0 rouge:3.26.0" +ARG GEM_PKGS="bundler:2.2.9" # Install extra needed repos and refresh. # - CRAN repo diff --git a/dev/lint-python b/dev/lint-python index 9de4c2816b..24923c1714 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -274,7 +274,8 @@ SPARK_ROOT_DIR="$(dirname "${SCRIPT_DIR}")" pushd "$SPARK_ROOT_DIR" &> /dev/null -PYTHON_SOURCE="$(find . -name "*.py")" +# skipping local ruby bundle directory from the search +PYTHON_SOURCE="$(find . -path ./docs/.local_ruby_bundle -prune -false -o -name "*.py")" compile_python_test "$PYTHON_SOURCE" pycodestyle_test "$PYTHON_SOURCE" diff --git a/dev/run-tests.py b/dev/run-tests.py index d9d1ac85d5..e54e098551 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -240,18 +240,19 @@ def run_sparkr_style_checks(): def build_spark_documentation(): set_title_and_block("Building Spark Documentation", "BLOCK_DOCUMENTATION") - os.environ["PRODUCTION"] = "1 jekyll build" + os.environ["PRODUCTION"] = "1" os.chdir(os.path.join(SPARK_HOME, "docs")) - jekyll_bin = which("jekyll") + bundle_bin = which("bundle") - if not jekyll_bin: - print("[error] Cannot find a version of `jekyll` on the system; please", - " install one and retry to build documentation.") + if not bundle_bin: + print("[error] Cannot find a version of `bundle` on the system; please", + " install one with `gem install bundler` and retry to build documentation.") sys.exit(int(os.environ.get("CURRENT_BLOCK", 255))) else: - run_cmd([jekyll_bin, "build"]) + run_cmd([bundle_bin, "install"]) + run_cmd([bundle_bin, "exec", "jekyll", "build"]) os.chdir(SPARK_HOME) @@ -754,7 +755,7 @@ def main(): run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment - # note - the below commented out until *all* Jenkins workers can get `jekyll` installed + # note - the below commented out until *all* Jenkins workers can get the Bundler gem installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() diff --git a/docs/.bundle/config b/docs/.bundle/config new file mode 100644 index 0000000000..b13821f801 --- /dev/null +++ b/docs/.bundle/config @@ -0,0 +1,2 @@ +--- +BUNDLE_PATH: ".local_ruby_bundle" diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 0000000000..fa2f23d71c --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "https://rubygems.org" + +gem "jekyll", "4.2.0" +gem "rouge", "3.26.0" +gem "jekyll-redirect-from", "0.16.0" +gem "webrick", "1.7" diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock new file mode 100644 index 0000000000..54722d6351 --- /dev/null +++ b/docs/Gemfile.lock @@ -0,0 +1,73 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.7.0) + public_suffix (>= 2.0.2, < 5.0) + colorator (1.1.0) + concurrent-ruby (1.1.8) + em-websocket (0.5.2) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0.6.0) + eventmachine (1.2.7) + ffi (1.14.2) + forwardable-extended (2.6.0) + http_parser.rb (0.6.0) + i18n (1.8.9) + concurrent-ruby (~> 1.0) + jekyll (4.2.0) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 1.0) + jekyll-sass-converter (~> 2.0) + jekyll-watch (~> 2.0) + kramdown (~> 2.3) + kramdown-parser-gfm (~> 1.0) + liquid (~> 4.0) + mercenary (~> 0.4.0) + pathutil (~> 0.9) + rouge (~> 3.0) + safe_yaml (~> 1.0) + terminal-table (~> 2.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-sass-converter (2.1.0) + sassc (> 2.0.1, < 3.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + kramdown (2.3.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.3) + listen (3.4.1) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.4.0) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (4.0.6) + rb-fsevent (0.10.4) + rb-inotify (0.10.1) + ffi (~> 1.0) + rexml (3.2.4) + rouge (3.26.0) + safe_yaml (1.0.5) + sassc (2.4.0) + ffi (~> 1.9) + terminal-table (2.0.0) + unicode-display_width (~> 1.1, >= 1.1.1) + unicode-display_width (1.7.0) + webrick (1.7.0) + +PLATFORMS + ruby + +DEPENDENCIES + jekyll (= 4.2.0) + jekyll-redirect-from (= 0.16.0) + rouge (= 3.26.0) + webrick (= 1.7) + +BUNDLED WITH + 2.2.9 diff --git a/docs/README.md b/docs/README.md index dd3ee86efa..410a415b28 100644 --- a/docs/README.md +++ b/docs/README.md @@ -33,16 +33,17 @@ Python, R and SQL. You need to have [Ruby](https://www.ruby-lang.org/en/documentation/installation/) and [Python](https://docs.python.org/2/using/unix.html#getting-and-installing-the-latest-version-of-python) -installed. Also install the following libraries: +installed. Make sure the `bundle` command is available, if not install the Gem containing it: ```sh -$ sudo gem install jekyll jekyll-redirect-from rouge +$ sudo gem install bundler ``` -If your ruby version is 3.0 or higher, you should also install `webrick`. +After this all the required ruby dependencies can be installed from the `docs/` directory via the Bundler: ```sh -$ sudo gem install jekyll jekyll-redirect-from webrick +$ cd docs +$ bundle install ``` Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0. @@ -83,26 +84,26 @@ you have checked out or downloaded. In this directory you will find text files formatted using Markdown, with an ".md" suffix. You can read those text files directly if you want. Start with `index.md`. -Execute `jekyll build` from the `docs/` directory to compile the site. Compiling the site with +Execute `bundle exec jekyll build` from the `docs/` directory to compile the site. Compiling the site with Jekyll will create a directory called `_site` containing `index.html` as well as the rest of the compiled files. ```sh $ cd docs -$ jekyll build +$ bundle exec jekyll build ``` You can modify the default Jekyll build as follows: ```sh # Skip generating API docs (which takes a while) -$ SKIP_API=1 jekyll build +$ SKIP_API=1 bundle exec jekyll build # Serve content locally on port 4000 -$ jekyll serve --watch +$ bundle exec jekyll serve --watch # Build the site with extra features used on the live page -$ PRODUCTION=1 jekyll build +$ PRODUCTION=1 bundle exec jekyll build ``` ## API Docs (Scaladoc, Javadoc, Sphinx, roxygen2, MkDocs) @@ -115,7 +116,7 @@ public in `__init__.py`. The SparkR docs can be built by running `$SPARK_HOME/R/ the SQL docs can be built by running `$SPARK_HOME/sql/create-docs.sh` after [building Spark](https://github.com/apache/spark#building-spark) first. -When you run `jekyll build` in the `docs` directory, it will also copy over the scaladoc and javadoc for the various +When you run `bundle exec jekyll build` in the `docs` directory, it will also copy over the scaladoc and javadoc for the various Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a jekyll plugin to run `./build/sbt unidoc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc and javadoc using [Unidoc](https://github.com/sbt/sbt-unidoc). @@ -124,12 +125,12 @@ using [roxygen2](https://cran.r-project.org/web/packages/roxygen2/index.html) an using [MkDocs](https://www.mkdocs.org/). NOTE: To skip the step of building and copying over the Scala, Java, Python, R and SQL API docs, run `SKIP_API=1 -jekyll build`. In addition, `SKIP_SCALADOC=1`, `SKIP_PYTHONDOC=1`, `SKIP_RDOC=1` and `SKIP_SQLDOC=1` can be used +bundle exec jekyll build`. In addition, `SKIP_SCALADOC=1`, `SKIP_PYTHONDOC=1`, `SKIP_RDOC=1` and `SKIP_SQLDOC=1` can be used to skip a single step of the corresponding language. `SKIP_SCALADOC` indicates skipping both the Scala and Java docs. ### Automatically Rebuilding API Docs -`jekyll serve --watch` will only watch what's in `docs/`, and it won't follow symlinks. That means it won't monitor your API docs under `python/docs` or elsewhere. +`bundle exec jekyll serve --watch` will only watch what's in `docs/`, and it won't follow symlinks. That means it won't monitor your API docs under `python/docs` or elsewhere. To work around this limitation for Python, install [`entr`](http://eradman.com/entrproject/) and run the following in a separate shell: diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst index a41b8a1a1d..8100bcbafb 100644 --- a/python/docs/source/development/contributing.rst +++ b/python/docs/source/development/contributing.rst @@ -53,7 +53,7 @@ under the `docs `_ directory: .. code-block:: bash - SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll serve --watch + SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll serve --watch PySpark uses Sphinx to generate its release PySpark documentation. Therefore, if you want to build only PySpark documentation alone, you can build under `python/docs `_ directory by: