From 2c94fbc71e0811f0023a7776c82ff3d69d6cfad4 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Jun 2021 10:05:27 -0700 Subject: [PATCH] initial commit for skeleton ansible for jenkins worker config ### What changes were proposed in this pull request? this is the skeleton of the ansible used to configure jenkins workers in the riselab/apache spark build system ### Why are the changes needed? they are not needed, but will help the community understand how to build systems to test multiple versions of spark, as well as propose changes that i can integrate in to the "production" riselab repo. since we're sunsetting jenkins by EOY 2021, this will potentially be useful for migrating the build system. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ansible-lint and much wailing and gnashing of teeth. Closes #32178 from shaneknapp/initial-ansible-commit. Lead-authored-by: shane knapp Co-authored-by: shane Signed-off-by: shane knapp --- dev/.rat-excludes | 1 + dev/ansible-for-test-node/README.md | 25 +++ .../deploy-jenkins-worker.yml | 8 + .../roles/common/README.md | 4 + .../roles/common/tasks/main.yml | 4 + .../common/tasks/setup_local_userspace.yml | 8 + .../roles/common/tasks/system_packages.yml | 73 +++++++ .../roles/jenkins-worker/README.md | 15 ++ .../roles/jenkins-worker/defaults/main.yml | 30 +++ .../python_environments/base-py3-pip.txt | 3 + .../python_environments/base-py3-spec.txt | 21 ++ .../files/python_environments/py36.txt | 49 +++++ .../python_environments/spark-py2-pip.txt | 8 + .../python_environments/spark-py36-spec.txt | 61 ++++++ .../python_environments/spark-py3k-spec.txt | 42 ++++ .../files/scripts/jenkins-gitcache-cron | 7 + .../files/util_scripts/kill_zinc_nailgun.py | 60 ++++++ .../util_scripts/post_github_pr_comment.py | 81 ++++++++ .../util_scripts/session_lock_resource.py | 152 +++++++++++++++ .../jenkins-worker/files/worker-limits.conf | 5 + .../roles/jenkins-worker/tasks/cleanup.yml | 12 ++ .../jenkins-worker/tasks/install_anaconda.yml | 79 ++++++++ .../tasks/install_build_packages.yml | 21 ++ .../jenkins-worker/tasks/install_docker.yml | 33 ++++ .../jenkins-worker/tasks/install_minikube.yml | 16 ++ .../tasks/install_spark_build_packages.yml | 183 ++++++++++++++++++ .../tasks/jenkins_userspace.yml | 119 ++++++++++++ .../roles/jenkins-worker/tasks/main.yml | 22 +++ .../roles/jenkins-worker/vars/main.yml | 9 + dev/tox.ini | 4 +- 30 files changed, 1153 insertions(+), 2 deletions(-) create mode 100644 dev/ansible-for-test-node/README.md create mode 100644 dev/ansible-for-test-node/deploy-jenkins-worker.yml create mode 100644 dev/ansible-for-test-node/roles/common/README.md create mode 100644 dev/ansible-for-test-node/roles/common/tasks/main.yml create mode 100644 dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml create mode 100644 dev/ansible-for-test-node/roles/common/tasks/system_packages.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/README.md create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron create mode 100755 dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py create mode 100755 dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py create mode 100755 dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml create mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 6c809f4341..a35d4ce976 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -134,4 +134,5 @@ flights_tiny.txt.1 over1k over10k exported_table/* +ansible-for-test-node/* node_modules \ No newline at end of file diff --git a/dev/ansible-for-test-node/README.md b/dev/ansible-for-test-node/README.md new file mode 100644 index 0000000000..846bea514a --- /dev/null +++ b/dev/ansible-for-test-node/README.md @@ -0,0 +1,25 @@ +# jenkins-infra + +This is a rough skeleton of the ansible used to deploy RISELab/Apache Spark Jenkins build workers on Ubuntu 20LTS. + +WARNING: this will not work "directly out of the box" and will need to be tweaked to work on any ubuntu servers you might want to try this on. + +### deploy a new worker node +#### TL;DR: +all of the configs for the workers live in roles/common/... and roles/jenkins-worker... + +#### prereqs: +* fresh install of ubuntu 20 +* a service account w/sudo +* python 3, ansible, ansible-playbook installed locally +* add hostname(s) to the `hosts` file +* add this to your `~/.ansible.cfg`: +```[defaults] host_key_checking = False``` + +#### fire ansible cannon! +`ansible-playbook -u deploy-jenkins-worker.yml -i -k -b -K` + +tips: +* if you are installing more than a few workers, it's best to run the playbook on smaller (2-3) batches at a time. this way it's easier to track down errors, as ansible is very noisy. +* when you encounter an error, you should comment out any previously-run plays and tasks. this saves time when debugging, and let's you easily track where you are in the process. +* `apt-get remove ` and `apt-get purge ` are your friends diff --git a/dev/ansible-for-test-node/deploy-jenkins-worker.yml b/dev/ansible-for-test-node/deploy-jenkins-worker.yml new file mode 100644 index 0000000000..7ab5f54349 --- /dev/null +++ b/dev/ansible-for-test-node/deploy-jenkins-worker.yml @@ -0,0 +1,8 @@ +--- +# top-level setup for jenkins workers +- name: set up ubuntu jenkins workers + hosts: ubuntu-build-node + roles: + - common + - jenkins-worker + become: yes diff --git a/dev/ansible-for-test-node/roles/common/README.md b/dev/ansible-for-test-node/roles/common/README.md new file mode 100644 index 0000000000..bff1359cd7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/README.md @@ -0,0 +1,4 @@ +Role Name +========= + +common -- just some common things to set up for any jenkins worker node diff --git a/dev/ansible-for-test-node/roles/common/tasks/main.yml b/dev/ansible-for-test-node/roles/common/tasks/main.yml new file mode 100644 index 0000000000..607f5f2875 --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/tasks/main.yml @@ -0,0 +1,4 @@ +--- +# tasks file for common +- include: system_packages.yml +- include: setup_local_userspace.yml diff --git a/dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml b/dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml new file mode 100644 index 0000000000..669700a34c --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml @@ -0,0 +1,8 @@ +--- +# set up userspace +- name: create local groups for external contributors + group: + name: "{{ item }}" + state: present + loop: + - jenkins diff --git a/dev/ansible-for-test-node/roles/common/tasks/system_packages.yml b/dev/ansible-for-test-node/roles/common/tasks/system_packages.yml new file mode 100644 index 0000000000..e6628987a8 --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/tasks/system_packages.yml @@ -0,0 +1,73 @@ +--- +# base system packages for jenkins master and workers +- name: get software for apt repository management. + apt: + pkg: + - python3-apt + - python3-pycurl + state: present + +- name: install java11 + apt: + pkg: openjdk-11-jdk + state: present + update_cache: yes + force: yes + +- name: install java8 + apt: + pkg: openjdk-8-jdk + state: present + update_cache: yes + force: yes + +- name: make easy to remember dir for java symlinks + file: + path: /usr/java + state: directory + +- name: link java8 to /usr/java/latest + file: + src: /usr/lib/jvm/java-8-openjdk-amd64/ + dest: /usr/java/latest + state: link + +- name: link java8 to /usr/java/java8 + file: + src: /usr/lib/jvm/java-8-openjdk-amd64/ + dest: /usr/java/java8 + state: link + +- name: link java11 to /usr/java/java11 + file: + src: /usr/lib/jvm/java-11-openjdk-amd64/ + dest: /usr/java/java11 + state: link + +- name: set java8 as default + alternatives: + name: "{{ item.command }}" + path: "{{ item.path }}" + loop: + - { command: 'java', path: '/usr/java/latest/bin/java' } + - { command: 'javah', path: '/usr/java/latest/bin/javah' } + - { command: 'javac', path: '/usr/java/latest/bin/javac' } + - { command: 'jar', path: '/usr/java/latest/bin/jar' } + +- name: install base system packages + apt: + pkg: + - apt-transport-https + - gnupg-agent + - ipmitool + - git + - ntp + - htop + - tree + - gzip + - bzip2 + - curl + - wget + - unzip + state: present + update_cache: yes diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/README.md b/dev/ansible-for-test-node/roles/jenkins-worker/README.md new file mode 100644 index 0000000000..71b0a218c7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/README.md @@ -0,0 +1,15 @@ +Role Name +========= + +jenkins-worker -- set up the craziness of a jenkins worker to build and test Apache Spark + +Requirements +------------ + +Oh jeez. This is just a framework to help others get started. If you try and deploy this locally, you'll need a service account, auth set up, etc etc. + +Role Variables +-------------- + +vars/main.yml: git caches, and url construction for minikube downloads +defaults/main.yml: urls, versions, install targets, etc diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml b/dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml new file mode 100644 index 0000000000..98092229ca --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml @@ -0,0 +1,30 @@ +--- +# defaults file for jenkins-worker +anaconda_installer: https://repo.continuum.io/archive/{{ anaconda_version }} +anaconda_version: Anaconda2-2019.10-Linux-x86_64.sh +anaconda_home: /home/jenkins/anaconda2 +anaconda_py3_pip_requirements: base-py3-pip.txt + +spark_py36_environment: spark-py36-spec.txt +spark_py3k_environment: spark-py3k-spec.txt +spark_py2_pip_requirements: spark-py2-pip.txt + +jenkins_home: /home/jenkins + +minikube_version: 1.18.1 +minikube_checksum: sha256:1a7960b845301107cb6a0c29001c8df310d7bce586cf88ceacfc78f22b622ba5 +minikube_mirror: https://github.com/kubernetes/minikube/releases/download +minikube_target: "minikube_{{ minikube_version }}-0_amd64.deb" + +k8s_version: 1.17.3 + +kubectl_version: v1.17.3 +kubectl_mirror: https://dl.k8s.io/release +kubectl_target: 'kubectl' +kubectl_install_dir: /usr/local/bin + +pypy_pip_module: get-pip.py +pypy_pip_mirror: https://bootstrap.pypa.io + +r_cran_repo: "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/" +r_cran_repo_key: E298A3A825C0D65DFD57CBB651716619E084DAB9 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt new file mode 100644 index 0000000000..af3cbd04e5 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt @@ -0,0 +1,3 @@ +awscli==1.16.46 +pytest-benchmark==3.1.1 +pytest-html==1.19.0 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt new file mode 100644 index 0000000000..91c988017a --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt @@ -0,0 +1,21 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2018.03.07-0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-hf484d3e_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.0.2p-h14c3975_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-ha838bed_2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20170329-h6b74fdf_2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.24.0-h84994c4_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.6-hc3d631a_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/certifi-2018.8.24-py36_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.2.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.31.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-10.0.1-py36_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt new file mode 100644 index 0000000000..50b72f41fe --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt @@ -0,0 +1,49 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/ca-certificates-2020.1.1-0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.6-h14c3975_1002.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/icu-58.2-hf484d3e_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/openssl-1.1.1-h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/boost-cpp-1.68.0-h11c811c_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.6.1-hdbcaa40_1001.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/thrift-cpp-0.12.0-h0a07b25_1002.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/certifi-2019.11.28-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-4.5.2-py36h7b6447c_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/entrypoints-0.3-py36_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/mccabe-0.6.1-py36_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.16.2-py36hde5b4d6_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/pycodestyle-2.5.0-py36_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/pyflakes-2.1.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/xmlrunner-1.7.7-py_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl_random-1.0.2-py36hd81dba3_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/flake8-3.7.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/arrow-cpp-0.12.1-py36h0e61e49_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/parquet-cpp-1.5.1-4.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pandas-0.24.2-py36hf484d3e_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-0.12.1-py36hbbcf98d_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl_fft-1.0.10-py36ha843d7b_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.16.2-py36h7e9f1db_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.2.1-py36h7c811a0_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt new file mode 100644 index 0000000000..74ec2eefd7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt @@ -0,0 +1,8 @@ +coverage==4.5.2 +dask +flake8==3.6.0 +numpy +pandas +pyarrow==0.8.0 +pycodestyle==2.4.0 +pyflakes==2.0.0 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt new file mode 100644 index 0000000000..029c535831 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt @@ -0,0 +1,61 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2019.11.28-hecc5488_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.7-he1b5a44_1000.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h516909a_2.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.15.0-h516909a_1001.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.1.5-he1b5a44_2.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1002.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/glog-0.4.0-he1b5a44_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/icu-58.2-hf484d3e_1000.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.8.3-he1b5a44_1001.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1d-h516909a_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/re2-2020.03.03-he1b5a44_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.8-he1b5a44_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/uriparser-0.9.3-he1b5a44_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/boost-cpp-1.68.0-h11c811c_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h72c5cf5_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libprotobuf-3.11.2-hd408876_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.3.7-h0b5b093_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/grpc-cpp-1.26.0-hf8bcb03_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libboost-1.71.0-h97c9712_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/thrift-cpp-0.11.0-h02b749d_3.conda +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/certifi-2019.11.28-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-4.5.2-py36h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mccabe-0.6.1-py36_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.16.2-py36hde5b4d6_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pycodestyle-2.4.0-py36_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/pyflakes-2.0.0-py36_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/xmlrunner-1.7.7-py_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl_random-1.0.2-py36hd81dba3_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/flake8-3.6.0-py36_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pandas-0.24.2-py36hf484d3e_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/arrow-cpp-0.15.1-py36h7cd5009_5.conda +https://repo.anaconda.com/pkgs/main/linux-64/mkl_fft-1.0.10-py36ha843d7b_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.16.2-py36h7e9f1db_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pyarrow-0.15.1-py36h0573a6f_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.2.1-py36h7c811a0_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt new file mode 100644 index 0000000000..3f6770b70c --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt @@ -0,0 +1,42 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2019.3.9-hecc5488_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.6-h14c3975_1002.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/icu-58.2-hf484d3e_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1b-h14c3975_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/boost-cpp-1.68.0-h11c811c_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.6.1-hdbcaa40_1001.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/thrift-cpp-0.12.0-h0a07b25_1002.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/certifi-2019.3.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-4.5.2-py36h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py36h3dfced4_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/xmlrunner-1.7.7-py_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/arrow-cpp-0.8.0-py36_4.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/mkl_fft-1.0.11-py36h14c3975_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/mkl_random-1.0.2-py36h637b7d7_2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pandas-0.19.2-np111py36_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/parquet-cpp-1.4.0.pre-2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-0.8.0-py36_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron b/dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron new file mode 100644 index 0000000000..482c86c930 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron @@ -0,0 +1,7 @@ +# this must be run as the jenkins user! +SHELL=/bin/bash +PATH=/home/jenkins/git2/bin:/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin + +*/5 * * * * cd /home/jenkins/gitcaches/adam.reference && git remote update >/dev/null 2>&1 +*/5 * * * * cd /home/jenkins/gitcaches/alluxio.reference && git remote update >/dev/null 2>&1 +*/5 * * * * cd /home/jenkins/gitcaches/spark.reference && git remote update >/dev/null 2>&1 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py new file mode 100755 index 0000000000..40887e8977 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +"""Kill a Zinc process that is listening on a given port""" +import argparse +import os +import re +import signal +import subprocess +import sys + + +def _parse_args(): + zinc_port_var = "ZINC_PORT" + zinc_port_option = "--zinc-port" + parser = argparse.ArgumentParser() + parser.add_argument(zinc_port_option, + type=int, + default=int(os.environ.get(zinc_port_var, "0")), + help="Specify zinc port") + args = parser.parse_args() + if not args.zinc_port: + parser.error("Specify either environment variable {0} or option {1}".format( + zinc_port_var, zinc_port_option)) + return args + + +def _kill_processes_listening_on_port(port): + killed = set() + for pid in _yield_processes_listening_on_port(port): + if not pid in killed: + killed.add(pid) + os.kill(pid, signal.SIGTERM) + + +def _yield_processes_listening_on_port(port): + pattern = re.compile(r":{0} \(LISTEN\)".format(port)) + innocuous_errors = re.compile( + r"^\s*Output information may be incomplete.\s*$" + r"|^lsof: WARNING: can't stat\(\) (?:tracefs|nsfs|overlay|tmpfs|aufs|zfs) file system .*$" + r"|^\s*$") + lsof_process = subprocess.Popen(["lsof", "-P"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + stdout, stderr = lsof_process.communicate() + if lsof_process.returncode != 0: + raise OSError("Can't run lsof -P, stderr:\n{}".format(stderr)) + for line in stderr.split("\n"): + if not innocuous_errors.match(line): + sys.stderr.write(line + "\n") + for line in stdout.split("\n"): + if pattern.search(line): + yield int(line.split()[1]) + + +def _main(): + args = _parse_args() + _kill_processes_listening_on_port(args.zinc_port) + return 0 + + +if __name__ == "__main__": + sys.exit(_main()) diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py new file mode 100755 index 0000000000..68e31d4528 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +"""Utility program to post a comment to a github PR""" +import argparse +import json +import os +import sys +import urllib.parse +from urllib.error import HTTPError, URLError +from urllib.request import urlopen, Request + + +def _parse_args(): + pr_link_var = "ghprbPullLink" + pr_link_option = "--pr-link" + github_oauth_key_var = "GITHUB_OAUTH_KEY" + github_oauth_key_option = "--github-oauth-key" + parser = argparse.ArgumentParser() + parser.add_argument("-pr", pr_link_option, + default=os.environ.get(pr_link_var, ""), + help="Specify pull request link") + parser.add_argument(github_oauth_key_option, + default=os.environ.get(github_oauth_key_var, ""), + help="Specify github oauth key") + args = parser.parse_args() + if not args.pr_link: + parser.error("Specify either environment variable {} or option {}".format( + pr_link_var, pr_link_option)) + if not args.github_oauth_key: + parser.error("Specify either environment variable {} or option {}".format( + github_oauth_key_var, github_oauth_key_option)) + return args + + +def post_message_to_github(msg, github_oauth_key, pr_link): + print("Attempting to post to Github...") + + ghprb_pull_id = os.environ["ghprbPullId"] + api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark") + url = api_url + "/issues/" + ghprb_pull_id + "/comments" + + posted_message = json.dumps({"body": msg}) + request = Request(url, + headers={ + "Authorization": "token {}".format(github_oauth_key), + "Content-Type": "application/json" + }, + data=posted_message.encode('utf-8')) + try: + response = urlopen(request) + + if response.getcode() == 201: + print(" > Post successful.") + else: + print_err("Surprising post response.") + print_err(" > http_code: {}".format(response.getcode())) + print_err(" > api_response: {}".format(response.read())) + print_err(" > data: {}".format(posted_message)) + except HTTPError as http_e: + print_err("Failed to post message to Github.") + print_err(" > http_code: {}".format(http_e.code)) + print_err(" > api_response: {}".format(http_e.read())) + print_err(" > data: {}".format(posted_message)) + except URLError as url_e: + print_err("Failed to post message to Github.") + print_err(" > urllib_status: {}".format(url_e.reason[1])) + print_err(" > data: {}".format(posted_message)) + + +def print_err(msg): + print(msg, file=sys.stderr) + + +def _main(): + args = _parse_args() + msg = sys.stdin.read() + post_message_to_github(msg, args.github_oauth_key, args.pr_link) + return 0 + + +if __name__ == "__main__": + sys.exit(_main()) diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py new file mode 100755 index 0000000000..f5153d5161 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python2 +"""Utility program to hold resources for a given session. + +A session is defined as the lifetime of some process (externally specified, +or the original forking process of this process) + +This program will daemonize and return 0 if the lock succeeds. If the lock +fails, it will return a non-zero exit code. + +After the parent program (or other specified pid) exits, it will also exit, +unlocking the file +""" +import argparse +import errno +import fcntl +import os +import sys +import time + + +_LOCK_DIR = "/tmp/session_locked_resources" + + +def _parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser() + parser.add_argument("-t", "--timeout-secs", type=int, + help="How long to wait for lock acquisition, in seconds") + parser.add_argument("-p", "--pid", type=int, + help="PID to wait for exit (defaults to parent pid)") + parser.add_argument("resource", help="Resource to lock") + return parser.parse_args() + + +def _acquire_lock(filename, timeout_secs, message): + """Acquire a lock file. + + Returns True iff the file could be locked within the timeout + """ + f = open(filename, "a+") + time_attempted = 0 + while True: + try: + fcntl.lockf(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + # Lock acquired + f.truncate(0) + f.write(message) + f.flush() + return f + except IOError: + # Locking failed + time_attempted += 1 + if timeout_secs and time_attempted >= timeout_secs: + # Timeout exceeded + raise IOError("Can't get child lock") + time.sleep(1) + + +def _daemonize(child_body): + """Daemonize. Returns whether the child was successful. + + Child body is a function to call in the child. It should take one + argument, which is a function that is called with a boolean + that indicates that the child succeeded/failed in its initialization + """ + CHILD_FAIL = '\2' + CHILD_SUCCESS = '\0' + r_fd, w_fd = os.pipe() + if os.fork() != 0: + # We are the original script. Read success/fail from the final + # child and log an error message if needed. + child_code = os.read(r_fd, 1) # .decode('utf-8') + return child_code == CHILD_SUCCESS + # First child + os.setsid() + if os.fork() != 0: + # Still in first child + _close_std_streams() + os._exit(0) + # Second child (daemon process) + + def _write_to_parent(success): + parent_message = CHILD_SUCCESS if success else CHILD_FAIL + os.write(w_fd, parent_message) + child_body(_write_to_parent) + os._exit(0) + + +def _close_std_streams(): + """Close all our stdin/stdout/stderr streams.""" + sys.stdin.close() + sys.stdout.close() + sys.stderr.close() + os.close(0) + os.close(1) + os.close(2) + + +def _wait_for_pid_exit(pid): + while _is_pid_running(pid): + time.sleep(1) + + +def _is_pid_running(pid): + """Wait for a pid to finish. + + From Stack Overflow: https://stackoverflow.com/questions/7653178 + """ + try: + os.kill(pid, 0) + except OSError as err: + if err.errno == errno.ESRCH: + return False + return True + + +def _lock_and_wait(lock_success_callback, resource, timeout_secs, + controlling_pid): + """Attempt to lock the file then wait. + + lock_success_callback will be called if the locking worked. + """ + lock_filename = os.path.join(_LOCK_DIR, resource) + lock_message = ("Session lock on " + resource + + ", controlling pid " + str(controlling_pid) + "\n") + try: + f = _acquire_lock(lock_filename, timeout_secs, lock_message) + except IOError: + lock_success_callback(False) + return + lock_success_callback(True) + _wait_for_pid_exit(controlling_pid) + + +def main(): + """Main program""" + args = _parse_args() + if not os.path.exists(_LOCK_DIR): + os.mkdir(_LOCK_DIR) + controlling_pid = args.pid or os.getppid() + child_body_func = lambda success_callback: _lock_and_wait( + success_callback, args.resource, args.timeout_secs, + controlling_pid) + if _daemonize(child_body_func): + return 0 + else: + print("Could not acquire lock") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf b/dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf new file mode 100644 index 0000000000..1a9901a636 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf @@ -0,0 +1,5 @@ +* soft nproc 1024 +root soft nproc unlimited +jenkins soft nofile 100000 +jenkins hard nofile 200000 +jenkins soft nproc 1600000 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml new file mode 100644 index 0000000000..ec700ce091 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml @@ -0,0 +1,12 @@ +- name: purge unneeded dependencies + apt: + purge: yes + autoremove: yes + +- name: clean up unneeded cached packages + apt: + autoclean: yes + +- name: reset apt to a useful state + apt: + update_cache: yes diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml new file mode 100644 index 0000000000..7a90431cc6 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml @@ -0,0 +1,79 @@ +--- +- name: Check for Anaconda + stat: + path: "{{ anaconda_home }}" + register: anaconda_dir + +- block: + - name: Download Anaconda + get_url: + url: "{{ anaconda_installer }}" + dest: /tmp/{{ anaconda_version }} + + - name: Install Anaconda + command: "bash /tmp/{{ anaconda_version }} -b -p {{ anaconda_home }}" + become: yes + become_user: jenkins + + - name: add anaconda bin to jenkins PATH in bashrc + lineinfile: + name: /home/jenkins/.bashrc + state: present + insertafter: EOF + line: "export PATH=/home/jenkins/anaconda2/bin:$PATH" + + - name: delete Anaconda download + file: + path: "/tmp/{{ anaconda_version }}" + state: absent + when: anaconda_dir.stat.islnk is not defined + +- name: check for py3 env + command: "conda env list" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + changed_when: False + check_mode: no + register: py3_check + +- block: + - name: Create Python Env + command: "{{ anaconda_home }}/bin/conda create -y --name py3 python=3.6" + become: yes + become_user: jenkins + when: "'py3' not in py3_check.stdout" + +- name: update anaconda pip for py2.7 + command: "{{ anaconda_home }}/bin/pip install --upgrade pip" + become: yes + become_user: jenkins + tags: + - skip_ansible_lint + +- name: update anaconda pip for py3 + command: "{{ anaconda_home }}/envs/py3/bin/pip install --upgrade pip" + become: yes + become_user: jenkins + tags: + - skip_ansible_lint + +- name: copy pip requirments for py3 + copy: + src: "python_environments/{{ anaconda_py3_pip_requirements }}" + dest: "/tmp/{{ anaconda_py3_pip_requirements }}" + owner: jenkins + group: jenkins + mode: 0660 + +- name: install py3 pip packages + pip: + requirements: "/tmp/{{ anaconda_py3_pip_requirements }}" + environment: + PATH: "{{ anaconda_home }}/envs/py3/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + +- name: delete py3 pip requirements file + file: + path: "/tmp/{{ anaconda_py3_pip_requirements }}" + state: absent diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml new file mode 100644 index 0000000000..e55ab06549 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml @@ -0,0 +1,21 @@ +--- +# set up the required system packages for a host +- name: install base build/compilation tools + apt: + pkg: + - autoconf + - maven + - gcc + - make + - cmake + - g++ + - libssl-dev + - libev-dev + - libevent-dev + - libffi-dev + - curl + - python-dev + - clang-format + - pkg-config + state: present + update_cache: yes diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml new file mode 100644 index 0000000000..eacc56e253 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml @@ -0,0 +1,33 @@ +--- +# set up and install docker +- name: docker repo signing key + apt_key: + url: "https://download.docker.com/linux/ubuntu/gpg" + state: present + +- name: docker repo + apt_repository: + repo: 'deb [arch=amd64] https://download.docker.com/linux/ubuntu bionic stable' + state: present + update_cache: yes + filename: docker + +- name: install docker-ce and docker-compose + apt: + pkg: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-compose + state: present + update_cache: yes + +- name: enable docker service + service: + name: docker + enabled: yes + +- name: put jenkins in docker group + user: + name: jenkins + groups: docker diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml new file mode 100644 index 0000000000..7173df46b5 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml @@ -0,0 +1,16 @@ +--- +# install minikube +- name: download kubectl binary + get_url: + url: '{{ kubectl_url }}' + dest: '{{ kubectl_install_dir }}/{{ kubectl_target }}' + mode: 0755 + +- name: install minikube + apt: + deb: '{{ minikube_url }}' + +- name: set k8s version + command: "/usr/bin/minikube config set kubernetes-version {{ k8s_version }}" + become: yes + become_user: jenkins diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml new file mode 100644 index 0000000000..663954fde7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml @@ -0,0 +1,183 @@ +--- +# install spark build packages +- name: install pypy3 + apt: + pkg: + - pypy3 + state: present + update_cache: yes + +- name: download pip module for pypy + get_url: + url: '{{ pypy_pip_mirror }}/{{ pypy_pip_module }}' + dest: '/tmp/{{ pypy_pip_module }}' + mode: 0644 + +- name: install pip module for pypy + command: 'pypy3 /tmp/{{ pypy_pip_module }}' + changed_when: False + +- name: remove temporary pip module for pypy + file: + path: '/tmp/{{ pypy_pip_module }}' + state: absent + +- name: install test coverage package for pypy + command: 'pypy3 -m pip install coverage==4.5.2' + changed_when: False + +- name: check for spark py3k env + command: "conda env list" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + changed_when: False + check_mode: no + register: py3k_check + +- block: + - name: copy python 3.6 conda environment for spark testing (2.4) + copy: + src: "python_environments/{{ spark_py3k_environment }}" + dest: "/tmp/{{ spark_py3k_environment }}" + owner: jenkins + group: jenkins + mode: 0660 + + - name: create py3k conda environment for spark + command: "conda create -y --name py3k --file /tmp/{{ spark_py3k_environment }}" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + + - name: delete py3k conda environment file + file: + path: "/tmp/{{ spark_py3k_environment }}" + state: absent + when: "'py3k' not in py3k_check.stdout" + +- block: + - name: copy python 3.6 conda environment for spark testing (master, 3.x+) + copy: + src: "python_environments/{{ spark_py36_environment }}" + dest: "/tmp/{{ spark_py36_environment }}" + owner: jenkins + group: jenkins + mode: 0660 + + - name: create py36 conda environment for spark + command: "conda create -y --name py36 --file /tmp/{{ spark_py36_environment }}" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + + - name: delete py36 conda environment file + file: + path: "/tmp/{{ spark_py36_environment }}" + state: absent + when: "'py36' not in py3k_check.stdout" + + +- name: create symlink for anaconda homedir + file: + src: "{{ anaconda_home }}" + dest: "/home/anaconda" + state: link + +- name: copy python2 pip requirements file + copy: + src: "python_environments/{{ spark_py2_pip_requirements }}" + dest: "/tmp/{{ spark_py2_pip_requirements }}" + owner: jenkins + group: jenkins + mode: 0660 + +- name: install python2 doc and test packages for spark + pip: + requirements: "/tmp/{{ spark_py2_pip_requirements }}" + extra_args: --ignore-installed + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + +- name: delete python2 pip requirements file + file: + path: "/tmp/{{ spark_py2_pip_requirements }}" + state: absent + +- name: add cran apt repo key + apt_key: + keyserver: keyserver.ubuntu.com + id: "{{ r_cran_repo_key }}" + state: present + +- name: add cran apt repo + apt_repository: + repo: "{{ r_cran_repo }}" + update_cache: yes + +- name: install sparkR system deps + apt: + pkg: + - default-jre + - default-jre-headless # both for r-cran-rjava + - libcurl4-openssl-dev # devtools package dep + - libxml2-dev # lintr dep + - qpdf + - libssh2-1-dev # devtools dep + - libssl-dev # git2r dep + - libodbc1 + state: present + +- name: install base R packages + apt: + pkg: + - r-base-core + - r-base-dev + state: present + +- name: ensure R java environment is properly set up + command: "/usr/bin/R CMD javareconf" + environment: + JAVA_HOME: "/usr/java/latest" + register: r_result + changed_when: False + failed_when: "r_result.rc != 0 or 'had non-zero exit status' in r_result.stderr" + +- name: install rJava + apt: + name: r-cran-rjava + state: present + +- name: install next set java R packages + apt: + pkg: + - r-cran-rodbc + - r-mathlib + +- name: install required R packages via Rscript (default version) + command: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('{{ item }}' %in% installed.packages()[,'Package'])) { install.packages(pkgs='{{ item }}'); print('Added'); } else { print('Already installed'); }" + register: r_result + failed_when: "r_result.rc != 0 or 'had non-zero exit status' in r_result.stderr" + changed_when: "'Added' in r_result.stdout" + loop: + - digest + - knitr + - devtools + - plyr + - roxygen2 + - rmarkdown + - e1071 + - testthat + +- name: get list of installed packages + command: /usr/bin/Rscript -e "installed.packages()[,'Package']" + changed_when: False + check_mode: no + register: r_check + +- name: install lintr v2.0.0 + command: /usr/bin/Rscript --slave --no-save --no-restore-history -e "devtools::install_github('jimhester/lintr@v2.0.0')" + when: "'lintr' not in r_check.stdout" diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml new file mode 100644 index 0000000000..04438e1963 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml @@ -0,0 +1,119 @@ +--- +# set up the jenkins worker userspace +- name: ensure groups jenkins will need are present + group: + name: "{{ item }}" + state: present + loop: + - jenkins + - docker + +- name: create jenkins login + user: + name: jenkins + group: jenkins + groups: docker + generate_ssh_key: yes + ssh_key_bits: 2048 + ssh_key_file: ".ssh/id_rsa" + shell: /bin/bash + +- name: create the jenkins workspace directory + file: + path: "{{ jenkins_home }}/workspace" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: create the jenkins worker bin directory + file: + path: "{{ jenkins_home }}/bin" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: copy utility scripts to jenkins bin dir + copy: + src: "util_scripts/{{ item }}" + dest: "{{ jenkins_home }}/bin/" + mode: 0755 + owner: jenkins + group: jenkins + loop: + - session_lock_resource.py + - kill_zinc_nailgun.py + - post_github_pr_comment.py + +- name: update the jenkins proc and open file limits + copy: + src: worker-limits.conf + dest: /etc/security/limits.conf + owner: root + group: root + mode: 0644 + +- name: create per-executor ivy caches for spark builds + file: + path: "{{ jenkins_home }}/sparkivy" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: create the jenkins git cache directory + file: + path: "{{ jenkins_home }}/gitcaches" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: create specific local gitcaches for some projects to speed clone time + git: + accept_hostkey: yes + bare: yes + dest: "{{ item.value.dir }}" + repo: "{{ item.value.repo }}" + update: no + become: yes + become_user: jenkins + loop: "{{ lookup('dict', git_caches) }}" + tags: + - skip_ansible_lint + +- name: configure git cache repos + git_config: + name: gc.pruneexpire + value: never + repo: "{{ item.value.dir }}" + scope: local + become: yes + become_user: jenkins + loop: "{{ lookup('dict', git_caches) }}" + tags: + - skip_ansible_lint + +- name: perform initial fetch + shell: + "cd {{ item.value.dir }} && /usr/bin/git fetch --all" + become: yes + become_user: jenkins + loop: "{{ lookup('dict', git_caches) }}" + changed_when: False + tags: + - skip_ansible_lint + +- name: set up cron job as jenkins to update gitcaches + cron: + name: "{{ item.name }}" + minute: "*/5" + job: "{{ item.cmd }}" + user: jenkins + loop: + - { + name: "Spark reference update", + cmd: "cd /home/jenkins/gitcaches/spark.reference && git remote update >/dev/null 2>&1" + } + diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml new file mode 100644 index 0000000000..a673bdd626 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml @@ -0,0 +1,22 @@ +--- +# set up the jenkins user first, before anything is installed and set up +- name: set up the jenkins environment + include: jenkins_userspace.yml + +- name: install system packages for software builds + include: install_build_packages.yml + +- name: install docker + include: install_docker.yml + +- name: install minikube + include: install_minikube.yml + +- name: install anaconda and associated packages + include: install_anaconda.yml + +- name: install spark build packages + include: install_spark_build_packages.yml + +- name: clean up apt + include: cleanup.yml diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml b/dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml new file mode 100644 index 0000000000..7aa3d25066 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml @@ -0,0 +1,9 @@ +--- +minikube_url: '{{ minikube_mirror }}/v{{ minikube_version }}/{{ minikube_target }}' + +kubectl_url: '{{ kubectl_mirror }}/{{ kubectl_version }}/bin/linux/amd64/{{ kubectl_target }}' + +git_caches: + spark: + dir: '{{ jenkins_home }}/gitcaches/spark.reference' + repo: 'https://github.com/apache/spark.git' diff --git a/dev/tox.ini b/dev/tox.ini index a0f04ef0cb..e1a4cf5ce8 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -16,11 +16,11 @@ [pycodestyle] ignore=E203,E226,E241,E305,E402,E722,E731,E741,W503,W504 max-line-length=100 -exclude=*/target/*,python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* +exclude=*/target/*,python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,dev/ansible-for-test-node/* [flake8] select = E901,E999,F821,F822,F823,F401,F405,B006 # Ignore F821 for plot documents in pandas API on Spark. ignore = F821 -exclude = python/docs/build/html/*,*/target/*,python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi +exclude = python/docs/build/html/*,*/target/*,python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi,dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/*.py max-line-length = 100