diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 6c809f4341..a35d4ce976 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -134,4 +134,5 @@ flights_tiny.txt.1 over1k over10k exported_table/* +ansible-for-test-node/* node_modules \ No newline at end of file diff --git a/dev/ansible-for-test-node/README.md b/dev/ansible-for-test-node/README.md new file mode 100644 index 0000000000..846bea514a --- /dev/null +++ b/dev/ansible-for-test-node/README.md @@ -0,0 +1,25 @@ +# jenkins-infra + +This is a rough skeleton of the ansible used to deploy RISELab/Apache Spark Jenkins build workers on Ubuntu 20LTS. + +WARNING: this will not work "directly out of the box" and will need to be tweaked to work on any ubuntu servers you might want to try this on. + +### deploy a new worker node +#### TL;DR: +all of the configs for the workers live in roles/common/... and roles/jenkins-worker... + +#### prereqs: +* fresh install of ubuntu 20 +* a service account w/sudo +* python 3, ansible, ansible-playbook installed locally +* add hostname(s) to the `hosts` file +* add this to your `~/.ansible.cfg`: +```[defaults] host_key_checking = False``` + +#### fire ansible cannon! +`ansible-playbook -u deploy-jenkins-worker.yml -i -k -b -K` + +tips: +* if you are installing more than a few workers, it's best to run the playbook on smaller (2-3) batches at a time. this way it's easier to track down errors, as ansible is very noisy. +* when you encounter an error, you should comment out any previously-run plays and tasks. this saves time when debugging, and let's you easily track where you are in the process. +* `apt-get remove ` and `apt-get purge ` are your friends diff --git a/dev/ansible-for-test-node/deploy-jenkins-worker.yml b/dev/ansible-for-test-node/deploy-jenkins-worker.yml new file mode 100644 index 0000000000..7ab5f54349 --- /dev/null +++ b/dev/ansible-for-test-node/deploy-jenkins-worker.yml @@ -0,0 +1,8 @@ +--- +# top-level setup for jenkins workers +- name: set up ubuntu jenkins workers + hosts: ubuntu-build-node + roles: + - common + - jenkins-worker + become: yes diff --git a/dev/ansible-for-test-node/roles/common/README.md b/dev/ansible-for-test-node/roles/common/README.md new file mode 100644 index 0000000000..bff1359cd7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/README.md @@ -0,0 +1,4 @@ +Role Name +========= + +common -- just some common things to set up for any jenkins worker node diff --git a/dev/ansible-for-test-node/roles/common/tasks/main.yml b/dev/ansible-for-test-node/roles/common/tasks/main.yml new file mode 100644 index 0000000000..607f5f2875 --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/tasks/main.yml @@ -0,0 +1,4 @@ +--- +# tasks file for common +- include: system_packages.yml +- include: setup_local_userspace.yml diff --git a/dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml b/dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml new file mode 100644 index 0000000000..669700a34c --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/tasks/setup_local_userspace.yml @@ -0,0 +1,8 @@ +--- +# set up userspace +- name: create local groups for external contributors + group: + name: "{{ item }}" + state: present + loop: + - jenkins diff --git a/dev/ansible-for-test-node/roles/common/tasks/system_packages.yml b/dev/ansible-for-test-node/roles/common/tasks/system_packages.yml new file mode 100644 index 0000000000..e6628987a8 --- /dev/null +++ b/dev/ansible-for-test-node/roles/common/tasks/system_packages.yml @@ -0,0 +1,73 @@ +--- +# base system packages for jenkins master and workers +- name: get software for apt repository management. + apt: + pkg: + - python3-apt + - python3-pycurl + state: present + +- name: install java11 + apt: + pkg: openjdk-11-jdk + state: present + update_cache: yes + force: yes + +- name: install java8 + apt: + pkg: openjdk-8-jdk + state: present + update_cache: yes + force: yes + +- name: make easy to remember dir for java symlinks + file: + path: /usr/java + state: directory + +- name: link java8 to /usr/java/latest + file: + src: /usr/lib/jvm/java-8-openjdk-amd64/ + dest: /usr/java/latest + state: link + +- name: link java8 to /usr/java/java8 + file: + src: /usr/lib/jvm/java-8-openjdk-amd64/ + dest: /usr/java/java8 + state: link + +- name: link java11 to /usr/java/java11 + file: + src: /usr/lib/jvm/java-11-openjdk-amd64/ + dest: /usr/java/java11 + state: link + +- name: set java8 as default + alternatives: + name: "{{ item.command }}" + path: "{{ item.path }}" + loop: + - { command: 'java', path: '/usr/java/latest/bin/java' } + - { command: 'javah', path: '/usr/java/latest/bin/javah' } + - { command: 'javac', path: '/usr/java/latest/bin/javac' } + - { command: 'jar', path: '/usr/java/latest/bin/jar' } + +- name: install base system packages + apt: + pkg: + - apt-transport-https + - gnupg-agent + - ipmitool + - git + - ntp + - htop + - tree + - gzip + - bzip2 + - curl + - wget + - unzip + state: present + update_cache: yes diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/README.md b/dev/ansible-for-test-node/roles/jenkins-worker/README.md new file mode 100644 index 0000000000..71b0a218c7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/README.md @@ -0,0 +1,15 @@ +Role Name +========= + +jenkins-worker -- set up the craziness of a jenkins worker to build and test Apache Spark + +Requirements +------------ + +Oh jeez. This is just a framework to help others get started. If you try and deploy this locally, you'll need a service account, auth set up, etc etc. + +Role Variables +-------------- + +vars/main.yml: git caches, and url construction for minikube downloads +defaults/main.yml: urls, versions, install targets, etc diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml b/dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml new file mode 100644 index 0000000000..98092229ca --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/defaults/main.yml @@ -0,0 +1,30 @@ +--- +# defaults file for jenkins-worker +anaconda_installer: https://repo.continuum.io/archive/{{ anaconda_version }} +anaconda_version: Anaconda2-2019.10-Linux-x86_64.sh +anaconda_home: /home/jenkins/anaconda2 +anaconda_py3_pip_requirements: base-py3-pip.txt + +spark_py36_environment: spark-py36-spec.txt +spark_py3k_environment: spark-py3k-spec.txt +spark_py2_pip_requirements: spark-py2-pip.txt + +jenkins_home: /home/jenkins + +minikube_version: 1.18.1 +minikube_checksum: sha256:1a7960b845301107cb6a0c29001c8df310d7bce586cf88ceacfc78f22b622ba5 +minikube_mirror: https://github.com/kubernetes/minikube/releases/download +minikube_target: "minikube_{{ minikube_version }}-0_amd64.deb" + +k8s_version: 1.17.3 + +kubectl_version: v1.17.3 +kubectl_mirror: https://dl.k8s.io/release +kubectl_target: 'kubectl' +kubectl_install_dir: /usr/local/bin + +pypy_pip_module: get-pip.py +pypy_pip_mirror: https://bootstrap.pypa.io + +r_cran_repo: "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/" +r_cran_repo_key: E298A3A825C0D65DFD57CBB651716619E084DAB9 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt new file mode 100644 index 0000000000..af3cbd04e5 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-pip.txt @@ -0,0 +1,3 @@ +awscli==1.16.46 +pytest-benchmark==3.1.1 +pytest-html==1.19.0 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt new file mode 100644 index 0000000000..91c988017a --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/base-py3-spec.txt @@ -0,0 +1,21 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2018.03.07-0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-hf484d3e_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.0.2p-h14c3975_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-ha838bed_2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20170329-h6b74fdf_2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.24.0-h84994c4_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.6-hc3d631a_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/certifi-2018.8.24-py36_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.2.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.31.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-10.0.1-py36_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt new file mode 100644 index 0000000000..50b72f41fe --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt @@ -0,0 +1,49 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/ca-certificates-2020.1.1-0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.6-h14c3975_1002.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/icu-58.2-hf484d3e_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/openssl-1.1.1-h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/boost-cpp-1.68.0-h11c811c_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.6.1-hdbcaa40_1001.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/thrift-cpp-0.12.0-h0a07b25_1002.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/certifi-2019.11.28-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-4.5.2-py36h7b6447c_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/entrypoints-0.3-py36_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/mccabe-0.6.1-py36_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.16.2-py36hde5b4d6_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/pycodestyle-2.5.0-py36_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/pyflakes-2.1.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/xmlrunner-1.7.7-py_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl_random-1.0.2-py36hd81dba3_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 +https://conda.anaconda.org/anaconda/linux-64/flake8-3.7.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/arrow-cpp-0.12.1-py36h0e61e49_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/parquet-cpp-1.5.1-4.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pandas-0.24.2-py36hf484d3e_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-0.12.1-py36hbbcf98d_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl_fft-1.0.10-py36ha843d7b_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.16.2-py36h7e9f1db_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.2.1-py36h7c811a0_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt new file mode 100644 index 0000000000..74ec2eefd7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py2-pip.txt @@ -0,0 +1,8 @@ +coverage==4.5.2 +dask +flake8==3.6.0 +numpy +pandas +pyarrow==0.8.0 +pycodestyle==2.4.0 +pyflakes==2.0.0 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt new file mode 100644 index 0000000000..029c535831 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py36-spec.txt @@ -0,0 +1,61 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2019.11.28-hecc5488_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.7-he1b5a44_1000.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h516909a_2.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.15.0-h516909a_1001.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.1.5-he1b5a44_2.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1002.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/glog-0.4.0-he1b5a44_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/icu-58.2-hf484d3e_1000.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.8.3-he1b5a44_1001.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1d-h516909a_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/re2-2020.03.03-he1b5a44_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.8-he1b5a44_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/uriparser-0.9.3-he1b5a44_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/boost-cpp-1.68.0-h11c811c_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h72c5cf5_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libprotobuf-3.11.2-hd408876_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.3.7-h0b5b093_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/grpc-cpp-1.26.0-hf8bcb03_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libboost-1.71.0-h97c9712_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/thrift-cpp-0.11.0-h02b749d_3.conda +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/certifi-2019.11.28-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-4.5.2-py36h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mccabe-0.6.1-py36_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.16.2-py36hde5b4d6_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pycodestyle-2.4.0-py36_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/pyflakes-2.0.0-py36_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/xmlrunner-1.7.7-py_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl_random-1.0.2-py36hd81dba3_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/flake8-3.6.0-py36_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pandas-0.24.2-py36hf484d3e_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/arrow-cpp-0.15.1-py36h7cd5009_5.conda +https://repo.anaconda.com/pkgs/main/linux-64/mkl_fft-1.0.10-py36ha843d7b_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.16.2-py36h7e9f1db_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pyarrow-0.15.1-py36h0573a6f_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.2.1-py36h7c811a0_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt new file mode 100644 index 0000000000..3f6770b70c --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/spark-py3k-spec.txt @@ -0,0 +1,42 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2019.3.9-hecc5488_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-8.2.0-hdf63c60_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.3.0-hdf63c60_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-8.2.0-hdf63c60_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.6-h14c3975_1002.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/icu-58.2-hf484d3e_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2019.3-199.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.1-he6710b0_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1b-h14c3975_1.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.4-h14c3975_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/boost-cpp-1.68.0-h11c811c_1000.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20181209-hc058e9b_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.6.1-hdbcaa40_1001.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/readline-7.0-h7b6447c_5.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.8-hbc83047_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.27.2-h7b6447c_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/thrift-cpp-0.12.0-h0a07b25_1002.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.6.8-h0371630_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/certifi-2019.3.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-4.5.2-py36h7b6447c_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py36h3dfced4_4.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2018.9-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/six-1.12.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/noarch/xmlrunner-1.7.7-py_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/arrow-cpp-0.8.0-py36_4.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/mkl_fft-1.0.11-py36h14c3975_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/mkl_random-1.0.2-py36h637b7d7_2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.8.0-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-40.8.0-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pandas-0.19.2-np111py36_1.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/parquet-cpp-1.4.0.pre-2.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.33.1-py36_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/pip-19.0.3-py36_0.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-0.8.0-py36_0.tar.bz2 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron b/dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron new file mode 100644 index 0000000000..482c86c930 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/scripts/jenkins-gitcache-cron @@ -0,0 +1,7 @@ +# this must be run as the jenkins user! +SHELL=/bin/bash +PATH=/home/jenkins/git2/bin:/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin + +*/5 * * * * cd /home/jenkins/gitcaches/adam.reference && git remote update >/dev/null 2>&1 +*/5 * * * * cd /home/jenkins/gitcaches/alluxio.reference && git remote update >/dev/null 2>&1 +*/5 * * * * cd /home/jenkins/gitcaches/spark.reference && git remote update >/dev/null 2>&1 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py new file mode 100755 index 0000000000..40887e8977 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +"""Kill a Zinc process that is listening on a given port""" +import argparse +import os +import re +import signal +import subprocess +import sys + + +def _parse_args(): + zinc_port_var = "ZINC_PORT" + zinc_port_option = "--zinc-port" + parser = argparse.ArgumentParser() + parser.add_argument(zinc_port_option, + type=int, + default=int(os.environ.get(zinc_port_var, "0")), + help="Specify zinc port") + args = parser.parse_args() + if not args.zinc_port: + parser.error("Specify either environment variable {0} or option {1}".format( + zinc_port_var, zinc_port_option)) + return args + + +def _kill_processes_listening_on_port(port): + killed = set() + for pid in _yield_processes_listening_on_port(port): + if not pid in killed: + killed.add(pid) + os.kill(pid, signal.SIGTERM) + + +def _yield_processes_listening_on_port(port): + pattern = re.compile(r":{0} \(LISTEN\)".format(port)) + innocuous_errors = re.compile( + r"^\s*Output information may be incomplete.\s*$" + r"|^lsof: WARNING: can't stat\(\) (?:tracefs|nsfs|overlay|tmpfs|aufs|zfs) file system .*$" + r"|^\s*$") + lsof_process = subprocess.Popen(["lsof", "-P"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + stdout, stderr = lsof_process.communicate() + if lsof_process.returncode != 0: + raise OSError("Can't run lsof -P, stderr:\n{}".format(stderr)) + for line in stderr.split("\n"): + if not innocuous_errors.match(line): + sys.stderr.write(line + "\n") + for line in stdout.split("\n"): + if pattern.search(line): + yield int(line.split()[1]) + + +def _main(): + args = _parse_args() + _kill_processes_listening_on_port(args.zinc_port) + return 0 + + +if __name__ == "__main__": + sys.exit(_main()) diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py new file mode 100755 index 0000000000..68e31d4528 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +"""Utility program to post a comment to a github PR""" +import argparse +import json +import os +import sys +import urllib.parse +from urllib.error import HTTPError, URLError +from urllib.request import urlopen, Request + + +def _parse_args(): + pr_link_var = "ghprbPullLink" + pr_link_option = "--pr-link" + github_oauth_key_var = "GITHUB_OAUTH_KEY" + github_oauth_key_option = "--github-oauth-key" + parser = argparse.ArgumentParser() + parser.add_argument("-pr", pr_link_option, + default=os.environ.get(pr_link_var, ""), + help="Specify pull request link") + parser.add_argument(github_oauth_key_option, + default=os.environ.get(github_oauth_key_var, ""), + help="Specify github oauth key") + args = parser.parse_args() + if not args.pr_link: + parser.error("Specify either environment variable {} or option {}".format( + pr_link_var, pr_link_option)) + if not args.github_oauth_key: + parser.error("Specify either environment variable {} or option {}".format( + github_oauth_key_var, github_oauth_key_option)) + return args + + +def post_message_to_github(msg, github_oauth_key, pr_link): + print("Attempting to post to Github...") + + ghprb_pull_id = os.environ["ghprbPullId"] + api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark") + url = api_url + "/issues/" + ghprb_pull_id + "/comments" + + posted_message = json.dumps({"body": msg}) + request = Request(url, + headers={ + "Authorization": "token {}".format(github_oauth_key), + "Content-Type": "application/json" + }, + data=posted_message.encode('utf-8')) + try: + response = urlopen(request) + + if response.getcode() == 201: + print(" > Post successful.") + else: + print_err("Surprising post response.") + print_err(" > http_code: {}".format(response.getcode())) + print_err(" > api_response: {}".format(response.read())) + print_err(" > data: {}".format(posted_message)) + except HTTPError as http_e: + print_err("Failed to post message to Github.") + print_err(" > http_code: {}".format(http_e.code)) + print_err(" > api_response: {}".format(http_e.read())) + print_err(" > data: {}".format(posted_message)) + except URLError as url_e: + print_err("Failed to post message to Github.") + print_err(" > urllib_status: {}".format(url_e.reason[1])) + print_err(" > data: {}".format(posted_message)) + + +def print_err(msg): + print(msg, file=sys.stderr) + + +def _main(): + args = _parse_args() + msg = sys.stdin.read() + post_message_to_github(msg, args.github_oauth_key, args.pr_link) + return 0 + + +if __name__ == "__main__": + sys.exit(_main()) diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py new file mode 100755 index 0000000000..f5153d5161 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/session_lock_resource.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python2 +"""Utility program to hold resources for a given session. + +A session is defined as the lifetime of some process (externally specified, +or the original forking process of this process) + +This program will daemonize and return 0 if the lock succeeds. If the lock +fails, it will return a non-zero exit code. + +After the parent program (or other specified pid) exits, it will also exit, +unlocking the file +""" +import argparse +import errno +import fcntl +import os +import sys +import time + + +_LOCK_DIR = "/tmp/session_locked_resources" + + +def _parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser() + parser.add_argument("-t", "--timeout-secs", type=int, + help="How long to wait for lock acquisition, in seconds") + parser.add_argument("-p", "--pid", type=int, + help="PID to wait for exit (defaults to parent pid)") + parser.add_argument("resource", help="Resource to lock") + return parser.parse_args() + + +def _acquire_lock(filename, timeout_secs, message): + """Acquire a lock file. + + Returns True iff the file could be locked within the timeout + """ + f = open(filename, "a+") + time_attempted = 0 + while True: + try: + fcntl.lockf(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + # Lock acquired + f.truncate(0) + f.write(message) + f.flush() + return f + except IOError: + # Locking failed + time_attempted += 1 + if timeout_secs and time_attempted >= timeout_secs: + # Timeout exceeded + raise IOError("Can't get child lock") + time.sleep(1) + + +def _daemonize(child_body): + """Daemonize. Returns whether the child was successful. + + Child body is a function to call in the child. It should take one + argument, which is a function that is called with a boolean + that indicates that the child succeeded/failed in its initialization + """ + CHILD_FAIL = '\2' + CHILD_SUCCESS = '\0' + r_fd, w_fd = os.pipe() + if os.fork() != 0: + # We are the original script. Read success/fail from the final + # child and log an error message if needed. + child_code = os.read(r_fd, 1) # .decode('utf-8') + return child_code == CHILD_SUCCESS + # First child + os.setsid() + if os.fork() != 0: + # Still in first child + _close_std_streams() + os._exit(0) + # Second child (daemon process) + + def _write_to_parent(success): + parent_message = CHILD_SUCCESS if success else CHILD_FAIL + os.write(w_fd, parent_message) + child_body(_write_to_parent) + os._exit(0) + + +def _close_std_streams(): + """Close all our stdin/stdout/stderr streams.""" + sys.stdin.close() + sys.stdout.close() + sys.stderr.close() + os.close(0) + os.close(1) + os.close(2) + + +def _wait_for_pid_exit(pid): + while _is_pid_running(pid): + time.sleep(1) + + +def _is_pid_running(pid): + """Wait for a pid to finish. + + From Stack Overflow: https://stackoverflow.com/questions/7653178 + """ + try: + os.kill(pid, 0) + except OSError as err: + if err.errno == errno.ESRCH: + return False + return True + + +def _lock_and_wait(lock_success_callback, resource, timeout_secs, + controlling_pid): + """Attempt to lock the file then wait. + + lock_success_callback will be called if the locking worked. + """ + lock_filename = os.path.join(_LOCK_DIR, resource) + lock_message = ("Session lock on " + resource + + ", controlling pid " + str(controlling_pid) + "\n") + try: + f = _acquire_lock(lock_filename, timeout_secs, lock_message) + except IOError: + lock_success_callback(False) + return + lock_success_callback(True) + _wait_for_pid_exit(controlling_pid) + + +def main(): + """Main program""" + args = _parse_args() + if not os.path.exists(_LOCK_DIR): + os.mkdir(_LOCK_DIR) + controlling_pid = args.pid or os.getppid() + child_body_func = lambda success_callback: _lock_and_wait( + success_callback, args.resource, args.timeout_secs, + controlling_pid) + if _daemonize(child_body_func): + return 0 + else: + print("Could not acquire lock") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf b/dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf new file mode 100644 index 0000000000..1a9901a636 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/files/worker-limits.conf @@ -0,0 +1,5 @@ +* soft nproc 1024 +root soft nproc unlimited +jenkins soft nofile 100000 +jenkins hard nofile 200000 +jenkins soft nproc 1600000 diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml new file mode 100644 index 0000000000..ec700ce091 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/cleanup.yml @@ -0,0 +1,12 @@ +- name: purge unneeded dependencies + apt: + purge: yes + autoremove: yes + +- name: clean up unneeded cached packages + apt: + autoclean: yes + +- name: reset apt to a useful state + apt: + update_cache: yes diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml new file mode 100644 index 0000000000..7a90431cc6 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_anaconda.yml @@ -0,0 +1,79 @@ +--- +- name: Check for Anaconda + stat: + path: "{{ anaconda_home }}" + register: anaconda_dir + +- block: + - name: Download Anaconda + get_url: + url: "{{ anaconda_installer }}" + dest: /tmp/{{ anaconda_version }} + + - name: Install Anaconda + command: "bash /tmp/{{ anaconda_version }} -b -p {{ anaconda_home }}" + become: yes + become_user: jenkins + + - name: add anaconda bin to jenkins PATH in bashrc + lineinfile: + name: /home/jenkins/.bashrc + state: present + insertafter: EOF + line: "export PATH=/home/jenkins/anaconda2/bin:$PATH" + + - name: delete Anaconda download + file: + path: "/tmp/{{ anaconda_version }}" + state: absent + when: anaconda_dir.stat.islnk is not defined + +- name: check for py3 env + command: "conda env list" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + changed_when: False + check_mode: no + register: py3_check + +- block: + - name: Create Python Env + command: "{{ anaconda_home }}/bin/conda create -y --name py3 python=3.6" + become: yes + become_user: jenkins + when: "'py3' not in py3_check.stdout" + +- name: update anaconda pip for py2.7 + command: "{{ anaconda_home }}/bin/pip install --upgrade pip" + become: yes + become_user: jenkins + tags: + - skip_ansible_lint + +- name: update anaconda pip for py3 + command: "{{ anaconda_home }}/envs/py3/bin/pip install --upgrade pip" + become: yes + become_user: jenkins + tags: + - skip_ansible_lint + +- name: copy pip requirments for py3 + copy: + src: "python_environments/{{ anaconda_py3_pip_requirements }}" + dest: "/tmp/{{ anaconda_py3_pip_requirements }}" + owner: jenkins + group: jenkins + mode: 0660 + +- name: install py3 pip packages + pip: + requirements: "/tmp/{{ anaconda_py3_pip_requirements }}" + environment: + PATH: "{{ anaconda_home }}/envs/py3/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + +- name: delete py3 pip requirements file + file: + path: "/tmp/{{ anaconda_py3_pip_requirements }}" + state: absent diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml new file mode 100644 index 0000000000..e55ab06549 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_build_packages.yml @@ -0,0 +1,21 @@ +--- +# set up the required system packages for a host +- name: install base build/compilation tools + apt: + pkg: + - autoconf + - maven + - gcc + - make + - cmake + - g++ + - libssl-dev + - libev-dev + - libevent-dev + - libffi-dev + - curl + - python-dev + - clang-format + - pkg-config + state: present + update_cache: yes diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml new file mode 100644 index 0000000000..eacc56e253 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_docker.yml @@ -0,0 +1,33 @@ +--- +# set up and install docker +- name: docker repo signing key + apt_key: + url: "https://download.docker.com/linux/ubuntu/gpg" + state: present + +- name: docker repo + apt_repository: + repo: 'deb [arch=amd64] https://download.docker.com/linux/ubuntu bionic stable' + state: present + update_cache: yes + filename: docker + +- name: install docker-ce and docker-compose + apt: + pkg: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-compose + state: present + update_cache: yes + +- name: enable docker service + service: + name: docker + enabled: yes + +- name: put jenkins in docker group + user: + name: jenkins + groups: docker diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml new file mode 100644 index 0000000000..7173df46b5 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_minikube.yml @@ -0,0 +1,16 @@ +--- +# install minikube +- name: download kubectl binary + get_url: + url: '{{ kubectl_url }}' + dest: '{{ kubectl_install_dir }}/{{ kubectl_target }}' + mode: 0755 + +- name: install minikube + apt: + deb: '{{ minikube_url }}' + +- name: set k8s version + command: "/usr/bin/minikube config set kubernetes-version {{ k8s_version }}" + become: yes + become_user: jenkins diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml new file mode 100644 index 0000000000..663954fde7 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml @@ -0,0 +1,183 @@ +--- +# install spark build packages +- name: install pypy3 + apt: + pkg: + - pypy3 + state: present + update_cache: yes + +- name: download pip module for pypy + get_url: + url: '{{ pypy_pip_mirror }}/{{ pypy_pip_module }}' + dest: '/tmp/{{ pypy_pip_module }}' + mode: 0644 + +- name: install pip module for pypy + command: 'pypy3 /tmp/{{ pypy_pip_module }}' + changed_when: False + +- name: remove temporary pip module for pypy + file: + path: '/tmp/{{ pypy_pip_module }}' + state: absent + +- name: install test coverage package for pypy + command: 'pypy3 -m pip install coverage==4.5.2' + changed_when: False + +- name: check for spark py3k env + command: "conda env list" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + changed_when: False + check_mode: no + register: py3k_check + +- block: + - name: copy python 3.6 conda environment for spark testing (2.4) + copy: + src: "python_environments/{{ spark_py3k_environment }}" + dest: "/tmp/{{ spark_py3k_environment }}" + owner: jenkins + group: jenkins + mode: 0660 + + - name: create py3k conda environment for spark + command: "conda create -y --name py3k --file /tmp/{{ spark_py3k_environment }}" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + + - name: delete py3k conda environment file + file: + path: "/tmp/{{ spark_py3k_environment }}" + state: absent + when: "'py3k' not in py3k_check.stdout" + +- block: + - name: copy python 3.6 conda environment for spark testing (master, 3.x+) + copy: + src: "python_environments/{{ spark_py36_environment }}" + dest: "/tmp/{{ spark_py36_environment }}" + owner: jenkins + group: jenkins + mode: 0660 + + - name: create py36 conda environment for spark + command: "conda create -y --name py36 --file /tmp/{{ spark_py36_environment }}" + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + + - name: delete py36 conda environment file + file: + path: "/tmp/{{ spark_py36_environment }}" + state: absent + when: "'py36' not in py3k_check.stdout" + + +- name: create symlink for anaconda homedir + file: + src: "{{ anaconda_home }}" + dest: "/home/anaconda" + state: link + +- name: copy python2 pip requirements file + copy: + src: "python_environments/{{ spark_py2_pip_requirements }}" + dest: "/tmp/{{ spark_py2_pip_requirements }}" + owner: jenkins + group: jenkins + mode: 0660 + +- name: install python2 doc and test packages for spark + pip: + requirements: "/tmp/{{ spark_py2_pip_requirements }}" + extra_args: --ignore-installed + environment: + PATH: "{{ anaconda_home }}/bin:{{ ansible_env.PATH }}" + become: yes + become_user: jenkins + +- name: delete python2 pip requirements file + file: + path: "/tmp/{{ spark_py2_pip_requirements }}" + state: absent + +- name: add cran apt repo key + apt_key: + keyserver: keyserver.ubuntu.com + id: "{{ r_cran_repo_key }}" + state: present + +- name: add cran apt repo + apt_repository: + repo: "{{ r_cran_repo }}" + update_cache: yes + +- name: install sparkR system deps + apt: + pkg: + - default-jre + - default-jre-headless # both for r-cran-rjava + - libcurl4-openssl-dev # devtools package dep + - libxml2-dev # lintr dep + - qpdf + - libssh2-1-dev # devtools dep + - libssl-dev # git2r dep + - libodbc1 + state: present + +- name: install base R packages + apt: + pkg: + - r-base-core + - r-base-dev + state: present + +- name: ensure R java environment is properly set up + command: "/usr/bin/R CMD javareconf" + environment: + JAVA_HOME: "/usr/java/latest" + register: r_result + changed_when: False + failed_when: "r_result.rc != 0 or 'had non-zero exit status' in r_result.stderr" + +- name: install rJava + apt: + name: r-cran-rjava + state: present + +- name: install next set java R packages + apt: + pkg: + - r-cran-rodbc + - r-mathlib + +- name: install required R packages via Rscript (default version) + command: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('{{ item }}' %in% installed.packages()[,'Package'])) { install.packages(pkgs='{{ item }}'); print('Added'); } else { print('Already installed'); }" + register: r_result + failed_when: "r_result.rc != 0 or 'had non-zero exit status' in r_result.stderr" + changed_when: "'Added' in r_result.stdout" + loop: + - digest + - knitr + - devtools + - plyr + - roxygen2 + - rmarkdown + - e1071 + - testthat + +- name: get list of installed packages + command: /usr/bin/Rscript -e "installed.packages()[,'Package']" + changed_when: False + check_mode: no + register: r_check + +- name: install lintr v2.0.0 + command: /usr/bin/Rscript --slave --no-save --no-restore-history -e "devtools::install_github('jimhester/lintr@v2.0.0')" + when: "'lintr' not in r_check.stdout" diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml new file mode 100644 index 0000000000..04438e1963 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/jenkins_userspace.yml @@ -0,0 +1,119 @@ +--- +# set up the jenkins worker userspace +- name: ensure groups jenkins will need are present + group: + name: "{{ item }}" + state: present + loop: + - jenkins + - docker + +- name: create jenkins login + user: + name: jenkins + group: jenkins + groups: docker + generate_ssh_key: yes + ssh_key_bits: 2048 + ssh_key_file: ".ssh/id_rsa" + shell: /bin/bash + +- name: create the jenkins workspace directory + file: + path: "{{ jenkins_home }}/workspace" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: create the jenkins worker bin directory + file: + path: "{{ jenkins_home }}/bin" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: copy utility scripts to jenkins bin dir + copy: + src: "util_scripts/{{ item }}" + dest: "{{ jenkins_home }}/bin/" + mode: 0755 + owner: jenkins + group: jenkins + loop: + - session_lock_resource.py + - kill_zinc_nailgun.py + - post_github_pr_comment.py + +- name: update the jenkins proc and open file limits + copy: + src: worker-limits.conf + dest: /etc/security/limits.conf + owner: root + group: root + mode: 0644 + +- name: create per-executor ivy caches for spark builds + file: + path: "{{ jenkins_home }}/sparkivy" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: create the jenkins git cache directory + file: + path: "{{ jenkins_home }}/gitcaches" + state: directory + mode: 0755 + owner: jenkins + group: jenkins + +- name: create specific local gitcaches for some projects to speed clone time + git: + accept_hostkey: yes + bare: yes + dest: "{{ item.value.dir }}" + repo: "{{ item.value.repo }}" + update: no + become: yes + become_user: jenkins + loop: "{{ lookup('dict', git_caches) }}" + tags: + - skip_ansible_lint + +- name: configure git cache repos + git_config: + name: gc.pruneexpire + value: never + repo: "{{ item.value.dir }}" + scope: local + become: yes + become_user: jenkins + loop: "{{ lookup('dict', git_caches) }}" + tags: + - skip_ansible_lint + +- name: perform initial fetch + shell: + "cd {{ item.value.dir }} && /usr/bin/git fetch --all" + become: yes + become_user: jenkins + loop: "{{ lookup('dict', git_caches) }}" + changed_when: False + tags: + - skip_ansible_lint + +- name: set up cron job as jenkins to update gitcaches + cron: + name: "{{ item.name }}" + minute: "*/5" + job: "{{ item.cmd }}" + user: jenkins + loop: + - { + name: "Spark reference update", + cmd: "cd /home/jenkins/gitcaches/spark.reference && git remote update >/dev/null 2>&1" + } + diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml new file mode 100644 index 0000000000..a673bdd626 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/main.yml @@ -0,0 +1,22 @@ +--- +# set up the jenkins user first, before anything is installed and set up +- name: set up the jenkins environment + include: jenkins_userspace.yml + +- name: install system packages for software builds + include: install_build_packages.yml + +- name: install docker + include: install_docker.yml + +- name: install minikube + include: install_minikube.yml + +- name: install anaconda and associated packages + include: install_anaconda.yml + +- name: install spark build packages + include: install_spark_build_packages.yml + +- name: clean up apt + include: cleanup.yml diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml b/dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml new file mode 100644 index 0000000000..7aa3d25066 --- /dev/null +++ b/dev/ansible-for-test-node/roles/jenkins-worker/vars/main.yml @@ -0,0 +1,9 @@ +--- +minikube_url: '{{ minikube_mirror }}/v{{ minikube_version }}/{{ minikube_target }}' + +kubectl_url: '{{ kubectl_mirror }}/{{ kubectl_version }}/bin/linux/amd64/{{ kubectl_target }}' + +git_caches: + spark: + dir: '{{ jenkins_home }}/gitcaches/spark.reference' + repo: 'https://github.com/apache/spark.git' diff --git a/dev/tox.ini b/dev/tox.ini index a0f04ef0cb..e1a4cf5ce8 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -16,11 +16,11 @@ [pycodestyle] ignore=E203,E226,E241,E305,E402,E722,E731,E741,W503,W504 max-line-length=100 -exclude=*/target/*,python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* +exclude=*/target/*,python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,dev/ansible-for-test-node/* [flake8] select = E901,E999,F821,F822,F823,F401,F405,B006 # Ignore F821 for plot documents in pandas API on Spark. ignore = F821 -exclude = python/docs/build/html/*,*/target/*,python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi +exclude = python/docs/build/html/*,*/target/*,python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi,dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/*.py max-line-length = 100