Compare commits

...

10 Commits

Author SHA1 Message Date
Michael Brachmann 328a0b1447 python cell execution sandboxing docker image fixes for spark image hadoop version mismatch 2020-04-10 09:45:37 -04:00
Michael Brachmann 10350f6c3f move to openjdk, vizier-auth images 2020-02-24 14:49:33 -05:00
Michael Brachmann 71749ff516 update build files 2019-11-27 14:46:22 -05:00
Michael Brachmann 5794a79547 bug fixes and updates 2019-09-18 11:11:50 -04:00
Michael Brachmann af02ad4040 all images to master branch of vizier/mimir repos 2019-09-18 09:07:37 -04:00
Michael Brachmann 106564f2d3 proxy acme-client -> certbot 2019-09-18 09:04:03 -04:00
Michael Brachmann fdd52375d0 +x to build script 2019-09-05 15:08:24 -04:00
Michael Brachmann 92612ca58e missing quote 2019-09-05 13:02:13 -04:00
Michael Brachmann e2ceb6e7e0 ipv6 loopback bug for local k8s registry 2019-09-05 12:56:03 -04:00
Michael Brachmann 25d975e7b0 line continuation bug 2019-09-05 10:37:27 -04:00
35 changed files with 1569 additions and 87 deletions

View File

@ -0,0 +1,78 @@
FROM alpine:3.11
# Metadata
LABEL base.image="alpine:3.11"
LABEL version="0.4"
LABEL software="openjdk8"
LABEL software.version="0.2.20200202"
LABEL description="openjdk8 base image for vizier docker images"
LABEL website="http://vizierdb.info"
ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8'
RUN apk add --no-cache --virtual .build-deps zlib-dev curl binutils \
&& GLIBC_VER="2.29-r0" \
&& ALPINE_GLIBC_REPO="https://github.com/sgerrand/alpine-pkg-glibc/releases/download" \
&& GCC_LIBS_URL="https://archive.archlinux.org/packages/g/gcc-libs/gcc-libs-9.1.0-2-x86_64.pkg.tar.xz" \
&& GCC_LIBS_SHA256="91dba90f3c20d32fcf7f1dbe91523653018aa0b8d2230b00f822f6722804cf08" \
&& ZLIB_URL="https://archive.archlinux.org/packages/z/zlib/zlib-1%3A1.2.11-3-x86_64.pkg.tar.xz" \
&& ZLIB_SHA256=17aede0b9f8baa789c5aa3f358fbf8c68a5f1228c5e6cba1a5dd34102ef4d4e5 \
&& curl -kLfsS https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub -o /etc/apk/keys/sgerrand.rsa.pub \
&& SGERRAND_RSA_SHA256="823b54589c93b02497f1ba4dc622eaef9c813e6b0f0ebbb2f771e32adf9f4ef2" \
&& echo "${SGERRAND_RSA_SHA256} */etc/apk/keys/sgerrand.rsa.pub" | sha256sum -c - \
&& curl -LfsS ${ALPINE_GLIBC_REPO}/${GLIBC_VER}/glibc-${GLIBC_VER}.apk > /tmp/glibc-${GLIBC_VER}.apk \
&& apk add --no-cache /tmp/glibc-${GLIBC_VER}.apk \
&& curl -LfsS ${ALPINE_GLIBC_REPO}/${GLIBC_VER}/glibc-bin-${GLIBC_VER}.apk > /tmp/glibc-bin-${GLIBC_VER}.apk \
&& apk add --no-cache /tmp/glibc-bin-${GLIBC_VER}.apk \
&& curl -Ls ${ALPINE_GLIBC_REPO}/${GLIBC_VER}/glibc-i18n-${GLIBC_VER}.apk > /tmp/glibc-i18n-${GLIBC_VER}.apk \
&& apk add --no-cache /tmp/glibc-i18n-${GLIBC_VER}.apk \
&& /usr/glibc-compat/bin/localedef --force --inputfile POSIX --charmap UTF-8 "$LANG" || true \
&& echo "export LANG=$LANG" > /etc/profile.d/locale.sh \
&& curl -LfsS ${GCC_LIBS_URL} -o /tmp/gcc-libs.tar.xz \
&& echo "${GCC_LIBS_SHA256} */tmp/gcc-libs.tar.xz" | sha256sum -c - \
&& mkdir /tmp/gcc \
&& tar -xf /tmp/gcc-libs.tar.xz -C /tmp/gcc \
&& mv /tmp/gcc/usr/lib/libgcc* /tmp/gcc/usr/lib/libstdc++* /usr/glibc-compat/lib \
&& strip /usr/glibc-compat/lib/libgcc_s.so.* /usr/glibc-compat/lib/libstdc++.so* \
#&& curl -LfsS ${ZLIB_URL} -o /tmp/libz.tar.xz \
#&& echo "${ZLIB_SHA256} */tmp/libz.tar.xz" | sha256sum -c - \
&& mkdir /tmp/libz \
#&& tar -xf /tmp/libz.tar.xz -C /tmp/libz \
#&& mv /tmp/libz/usr/lib/libz.so* /usr/glibc-compat/lib \
&& apk del --purge .build-deps glibc-i18n \
&& rm -rf /tmp/*.apk /tmp/gcc /tmp/gcc-libs.tar.xz /tmp/libz /tmp/libz.tar.xz /var/cache/apk/*
ENV JAVA_VERSION jdk8u242-b08
RUN set -eux; \
apk add --no-cache --virtual .fetch-deps curl; \
ARCH="$(apk --print-arch)"; \
case "${ARCH}" in \
ppc64el|ppc64le) \
ESUM='72f42c2014cb29a2fcd71b1a678af3ed148358a8f9b7d741a5da88cb178d9c53'; \
BINARY_URL='https://github.com/AdoptOpenJDK/openjdk8-binaries/releases/download/jdk8u242-b08/OpenJDK8U-jdk_ppc64le_linux_hotspot_8u242b08.tar.gz'; \
;; \
s390x) \
ESUM='98f7217dc4acc6bc8b474a2009922b51767d84c045f95a16ee0b9216467ba2de'; \
BINARY_URL='https://github.com/AdoptOpenJDK/openjdk8-binaries/releases/download/jdk8u242-b08/OpenJDK8U-jdk_s390x_linux_hotspot_8u242b08.tar.gz'; \
;; \
amd64|x86_64) \
ESUM='f39b523c724d0e0047d238eb2bb17a9565a60574cf651206c867ee5fc000ab43'; \
BINARY_URL='https://github.com/AdoptOpenJDK/openjdk8-binaries/releases/download/jdk8u242-b08/OpenJDK8U-jdk_x64_linux_hotspot_8u242b08.tar.gz'; \
;; \
*) \
echo "Unsupported arch: ${ARCH}"; \
exit 1; \
;; \
esac; \
curl -LfsSo /tmp/openjdk.tar.gz ${BINARY_URL}; \
echo "${ESUM} */tmp/openjdk.tar.gz" | sha256sum -c -; \
mkdir -p /opt/java/openjdk; \
cd /opt/java/openjdk; \
tar -xf /tmp/openjdk.tar.gz --strip-components=1; \
apk del --purge .fetch-deps; \
rm -rf /var/cache/apk/*; \
rm -rf /tmp/openjdk.tar.gz;
ENV JAVA_HOME=/opt/java/openjdk \
PATH="/opt/java/openjdk/bin:$PATH"

View File

@ -1,10 +1,12 @@
FROM docker.mimirdb.info/alpine_oraclejdk8
#vizier web-api-async
#FROM docker.mimirdb.info/alpine_oraclejdk8
FROM docker.mimirdb.info/alpine_openjdk8
# Metadata
LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8"
LABEL version="0.3"
LABEL version="0.4"
LABEL software="Vizier"
LABEL software.version="0.2.20190905
LABEL software.version="0.2.20200202"
LABEL description="an open source, provenance aware, iterative data cleaning tool"
LABEL website="http://vizierdb.info"
LABEL sourcecode="https://github.com/VizierDB"

View File

@ -5,7 +5,7 @@ GIT_PASS=$2
#mimir-async
cd ./mimir
sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=pdf-datasource
sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=master
#api-async
cd ../api-async

9
build-images-auth.sh Normal file
View File

@ -0,0 +1,9 @@
sudo docker build -t docker.mimirdb.info/vizier-auth ./ --build-arg UI_BRANCH=master --build-arg API_BRANCH=master --build-arg MIMIR_BRANCH=scala-2.12
#sudo docker build -t docker.mimirdb.info/spark-hadoop --build-arg SPARK_VERSION="spark-2.4.0-bin-without-hadoop" ./
sudo docker build -t docker.mimirdb.info/spark-hadoop-scala-2.12 --build-arg SPARK_VERSION="spark-2.4.4-bin-without-hadoop-scala-2.12" ./
sudo docker build -t docker.mimirdb.info/vizier-proxy --build-arg VIZIER_CONFIG="vizier_auth.conf" ./

12
kubernetes/build-images-microk8s.sh Normal file → Executable file
View File

@ -4,25 +4,25 @@ GIT_PASS=$2
#mimir-async
cd ./mimir
sudo docker build -t localhost:32000/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=pdf-datasource
sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=pdf-datasource
#api-async
cd ../api-async
sudo docker build -t localhost:32000/vizier-api-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg API_BRANCH=master
sudo docker build -t docker.mimirdb.info/vizier-api-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg API_BRANCH=master
#ui-async
cd ../ui-nginx
sudo docker build -t localhost:32000/vizier-ui-async ./ --build-arg UI_BRANCH=master
sudo docker build -t docker.mimirdb.info/vizier-ui-async ./ --build-arg UI_BRANCH=master
#analytics
cd ../analytics-nginx
#sudo docker build -t localhost:32000/vizier-analytics ./
sudo docker build -t docker.mimirdb.info/vizier-analytics ./
#spark-master and spark-worker
cd ../spark-docker
#sudo docker build -t localhost:32000/spark-hadoop ./
#sudo docker build -t docker.mimirdb.info/spark-hadoop ./
#proxy
cd ../vizier-nginx-proxy
sudo docker build -t localhost:32000/vizier-proxy ./
sudo docker build -t docker.mimirdb.info/vizier-proxy ./

View File

@ -1,12 +1,13 @@
#run the containers
#spark-master
#kubectl run namenode --image=localhost:32000/spark-hadoop --replicas=1 --port=22 --port=6066 --port=7077 --port=8020 --port=8080 --port=50070 --env="MASTER=spark://namenode:7077" --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/master.sh
#kubectl run namenode --image=docker.mimirdb.info/spark-hadoop --replicas=1 --port=22 --port=6066 --port=7077 --port=8020 --port=8080 --port=50070 --env="MASTER=spark://namenode:7077" --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/master.sh
kind: PersistentVolume
apiVersion: v1
metadata:
name: nn-pv-1
labels:
type: namenode
namespace: vizier
spec:
capacity:
storage: 5Gi
@ -20,6 +21,7 @@ kind: PersistentVolume
apiVersion: v1
metadata:
name: dn-pv-1
namespace: vizier
spec:
capacity:
storage: 3Gi
@ -33,6 +35,7 @@ kind: PersistentVolume
apiVersion: v1
metadata:
name: dn-pv-2
namespace: vizier
spec:
capacity:
storage: 1Gi
@ -46,6 +49,7 @@ kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: hdfs-name
namespace: vizier
spec:
selector:
matchLabels:
@ -61,6 +65,7 @@ kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: namenode
namespace: vizier
spec:
replicas: 1
selector:
@ -70,6 +75,7 @@ spec:
metadata:
labels:
component: namenode
namespace: vizier
spec:
volumes:
- name: hdfs-name
@ -132,7 +138,7 @@ metadata:
app: namenode
hasuraService: custom
name: namenode
namespace: default
namespace: vizier
spec:
ports:
- name: ssh
@ -164,7 +170,7 @@ spec:
type: ClusterIP
---
#spark-worker
#kubectl run $HOSTNAME --image=localhost:32000/spark-hadoop --replicas=2 --port=$WORKER_PORT --port=$DATANODE_PORT --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="SPARK_WORKER_CORES=4" --env="SPARK_WORKER_PORT=$WORKER_PORT" --env="SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="HDFS_DATA_HOST=$HOSTNAME" --env="HDFS_HOST=spark-master" --env="HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/worker.sh
#kubectl run $HOSTNAME --image=docker.mimirdb.info/spark-hadoop --replicas=2 --port=$WORKER_PORT --port=$DATANODE_PORT --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="SPARK_WORKER_CORES=4" --env="SPARK_WORKER_PORT=$WORKER_PORT" --env="SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="HDFS_DATA_HOST=$HOSTNAME" --env="HDFS_HOST=spark-master" --env="HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/worker.sh
kind: Service
apiVersion: v1
metadata:
@ -173,7 +179,7 @@ metadata:
app: datanode
hasuraService: custom
name: datanode
namespace: default
namespace: vizier
spec:
ports:
- name: sparkblock
@ -221,7 +227,7 @@ metadata:
app: datanode
hasuraService: custom
name: datanode
namespace: default
namespace: vizier
spec:
serviceName: "datanode"
replicas: 1
@ -230,6 +236,7 @@ spec:
creationTimestamp: null
labels:
app: datanode
namespace: vizier
spec:
containers:
- name: datanode
@ -287,6 +294,7 @@ spec:
volumeClaimTemplates:
- metadata:
name: hdfs-data
namespace: vizier
spec:
storageClassName: persist
accessModes:
@ -296,13 +304,14 @@ spec:
storage: 1Gi
---
#mimir
#kubectl run vizier-mimir --image=localhost:32000/vizier-mimir-spark --replicas=1 --port=9001 --port=33388 --expose --env="RESTORE_BACKUP=false" --env="PULL_MIMIR=false" --env="AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ" --env="AWS_SECRET_ACCESS_KEY=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" --env="S3_BUCKET_NAME=vizier-data-test"
#kubectl run vizier-mimir --image=docker.mimirdb.info/vizier-mimir-spark --replicas=1 --port=9001 --port=33388 --expose --env="RESTORE_BACKUP=false" --env="PULL_MIMIR=false" --env="AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ" --env="AWS_SECRET_ACCESS_KEY=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" --env="S3_BUCKET_NAME=vizier-data-test"
kind: PersistentVolume
apiVersion: v1
metadata:
name: vizier-data-volume
labels:
type: local
namespace: vizier
spec:
storageClassName: persist
capacity:
@ -316,6 +325,7 @@ kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: vizier-data-pv-claim
namespace: vizier
spec:
storageClassName: persist
accessModes:
@ -328,6 +338,7 @@ apiVersion: v1
kind: Secret
metadata:
name: s3-credentials
namespace: vizier
data:
access-key-id: QUtJQUo3TUxGU1BZTFlHNDdBUlEK
access-key-secret: ZEw3OXFKR3lMa1VGeVl2bW1nM2hFbjhiSWtsU2FUa3JmRzBJWHVraQo=
@ -336,6 +347,7 @@ kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: vizier-mimir
namespace: vizier
spec:
replicas: 1
selector:
@ -345,6 +357,7 @@ spec:
metadata:
labels:
component: vizier-mimir
namespace: vizier
spec:
volumes:
- name: vizier-data-pv-storage
@ -352,7 +365,7 @@ spec:
claimName: vizier-data-pv-claim
containers:
- name: vizier-mimir
image: localhost:32000/vizier-mimir-async-spark
image: docker.mimirdb.info/vizier-mimir-async-spark
ports:
- containerPort: 9001
- containerPort: 4041
@ -399,6 +412,7 @@ kind: Service
apiVersion: v1
metadata:
name: vizier-mimir
namespace: vizier
spec:
ports:
- name: mimirapi
@ -420,11 +434,12 @@ spec:
component: vizier-mimir
---
#api
#kubectl run vizier-api --image=localhost:32000/vizier-api-spark --replicas=1 --port=9001 --port=80 --port=443 --expose --env="APP_PATH=" --env="API_SERVER=localhost" --env="API_LOCAL_PORT=443" --env="API_PORT=443" --env="API_SCHEME=http" --env="AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ" --env="AWS_SECRET_ACCESS_KEY=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" --env="S3_BUCKET_NAME=vizier-data-test"
#kubectl run vizier-api --image=docker.mimirdb.info/vizier-api-spark --replicas=1 --port=9001 --port=80 --port=443 --expose --env="APP_PATH=" --env="API_SERVER=localhost" --env="API_LOCAL_PORT=443" --env="API_PORT=443" --env="API_SCHEME=http" --env="AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ" --env="AWS_SECRET_ACCESS_KEY=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" --env="S3_BUCKET_NAME=vizier-data-test"
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: vizier-api
namespace: vizier
spec:
replicas: 1
selector:
@ -441,7 +456,7 @@ spec:
claimName: vizier-data-pv-claim
containers:
- name: vizier-api
image: localhost:32000/vizier-api-async-spark
image: docker.mimirdb.info/vizier-api-async-spark
ports:
- containerPort: 80
- containerPort: 9001
@ -485,6 +500,7 @@ kind: Service
apiVersion: v1
metadata:
name: vizier-api
namespace: vizier
spec:
ports:
- name: api
@ -497,11 +513,12 @@ spec:
component: vizier-api
---
#ui
#kubectl run vizier-ui --image=localhost:32000/vizier-ui --replicas=1 --port=9001 --port=80 --port=443 --expose
#kubectl run vizier-ui --image=docker.mimirdb.info/vizier-ui --replicas=1 --port=9001 --port=80 --port=443 --expose
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: vizier-ui
namespace: vizier
spec:
replicas: 1
selector:
@ -511,11 +528,12 @@ spec:
metadata:
labels:
component: vizier-ui
namespace: vizier
spec:
hostname: vizier-ui
containers:
- name: vizier-ui
image: localhost:32000/vizier-ui-async
image: docker.mimirdb.info/vizier-ui-async
ports:
- containerPort: 80
- containerPort: 443
@ -537,6 +555,7 @@ kind: Service
apiVersion: v1
metadata:
name: vizier-ui
namespace: vizier
spec:
ports:
- name: ui
@ -552,11 +571,12 @@ spec:
component: vizier-ui
---
#analytics
#kubectl run vizier-analytics --image=localhost:32000/vizier-analytics --replicas=1 --port=9001 --port=80 --expose
#kubectl run vizier-analytics --image=docker.mimirdb.info/vizier-analytics --replicas=1 --port=9001 --port=80 --expose
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: vizier-analytics
namespace: vizier
spec:
replicas: 1
selector:
@ -566,10 +586,11 @@ spec:
metadata:
labels:
component: vizier-analytics
namespace: vizier
spec:
containers:
- name: vizier-analytics
image: localhost:32000/vizier-analytics
image: docker.mimirdb.info/vizier-analytics
ports:
- containerPort: 80
- containerPort: 443
@ -582,6 +603,7 @@ kind: Service
apiVersion: v1
metadata:
name: vizier-analytics
namespace: vizier
spec:
ports:
- name: analytics
@ -602,6 +624,7 @@ kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: vizier-proxy
namespace: vizier
spec:
replicas: 1
selector:
@ -611,6 +634,7 @@ spec:
metadata:
labels:
component: vizier-proxy
namespace: vizier
spec:
containers:
- name: vizier-proxy
@ -633,6 +657,7 @@ kind: Service
apiVersion: v1
metadata:
name: vizier-proxy
namespace: vizier
spec:
ports:
- name: ssh

View File

@ -1,10 +1,12 @@
FROM docker.mimirdb.info/alpine_oraclejdk8_nginx
#vizier mimir
#FROM docker.mimirdb.info/alpine_oraclejdk8_nginx
FROM docker.mimirdb.info/alpine_openjdk8
# Metadata
LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8_nginx"
LABEL version="0.3"
LABEL version="0.4"
LABEL software="Vizier"
LABEL software.version="0.2.20190905"
LABEL software.version="0.2.20200202"
LABEL description="an open source, provenance aware, iterative data cleaning tool"
LABEL website="http://vizierdb.info"
LABEL sourcecode="https://github.com/VizierDB"
@ -27,6 +29,20 @@ ENV DATA_STAGING_TYPE=s3
EXPOSE 33388
EXPOSE 22
ENV IAM_ROLE=none
ENV AWS_ACCESS_KEY_ID=value
ENV AWS_SECRET_ACCESS_KEY=value
ENV USE_S3_VOLUME=true
ENV S3_ENDPOINT=
ENV S3A_ENDPOINT="https://s3.vizier.app/"
ENV S3_BUCKET_ACL=private
ENV S3_BUCKET_NAME=none
ENV MOUNT_POINT=/usr/local/source/web-api/vizier/.vizierdb
ENV MIMIR_HOST="vizier-mimir.local"
ENV MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir"
ARG S3FS_VERSION=v1.82
RUN echo "@testing http://dl-4.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories \
&& apk add --update \
build-base alpine-sdk \
@ -36,27 +52,16 @@ RUN echo "@testing http://dl-4.alpinelinux.org/alpine/edge/testing" >> /etc/apk/
curl-dev libxml2-dev \
ca-certificates \
bash \
R \
git \
curl \
sed \
openssh \
supervisor \
&& rm /var/cache/apk/* \
&& mkdir -p /usr/local/source/web-api/.vizierdb
&& mkdir -p $MOUNT_POINT
ENV IAM_ROLE=none
ENV AWS_ACCESS_KEY_ID=value
ENV AWS_SECRET_ACCESS_KEY=value
ENV USE_S3_VOLUME=true
ENV S3_ENDPOINT=
ENV S3A_ENDPOINT="https://s3.vizier.app/"
ENV S3_BUCKET_ACL=private
ENV S3_BUCKET_NAME=none
ENV MOUNT_POINT=/usr/local/source/web-api/.vizierdb
ENV MIMIR_HOST="vizier-mimir.local"
ENV MIMIR_DATA_DIR="/usr/local/source/web-api/.vizierdb/mimir"
ARG S3FS_VERSION=v1.82
RUN git clone https://github.com/s3fs-fuse/s3fs-fuse.git && \
cd s3fs-fuse \

View File

@ -31,8 +31,8 @@ if [ "$USE_S3_VOLUME" == "true" ]; then
fi
#mk mimir data dir if it doesn't exist
mkdir -p /usr/local/source/web-api/.vizierdb/mimir
mkdir -p /usr/local/source/web-api/.vizierdb/logs
mkdir -p $MIMIR_DATA_DIR
mkdir -p "$MOUNT_POINT/logs"
#update mimir from repo
if [ $PULL_MIMIR == "true" ]

View File

@ -0,0 +1,54 @@
FROM docker.mimirdb.info/alpine_openjdk8
LABEL software="vizier-python-executor"
LABEL software.version="0.2.20200202"
LABEL version="0.7"
ARG VIZIERSERVER_PYTHON_EXECUTOR_PORT=5005
ARG API_BRANCH=master
ARG CONDA_VERSION="latest"
ENV PYEXECUTOR_DEBUG=False
ENV MIMIR_URL=http://vizier-auth:8089/api/v2/
ENV VIZIERSERVER_PYTHON_EXECUTOR_PORT=$VIZIERSERVER_PYTHON_EXECUTOR_PORT
ENV WSGI_LOG_LEVEL=debug
RUN apk add --update --no-cache curl bash git supervisor ca-certificates
#RUN curl -OsL "https://repo.continuum.io/archive/Anaconda3-$CONDA_VERSION-Linux-x86_64.sh" \
# && /bin/bash Anaconda3-$CONDA_VERSION-Linux-x86_64.sh -b -p /opt/conda \
# && rm Anaconda3-$CONDA_VERSION-Linux-x86_64.sh \
# && echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh
RUN curl -OsL "https://repo.anaconda.com/miniconda/Miniconda3-$CONDA_VERSION-Linux-x86_64.sh" \
&& /bin/bash Miniconda3-$CONDA_VERSION-Linux-x86_64.sh -b -p /opt/conda \
&& rm Miniconda3-$CONDA_VERSION-Linux-x86_64.sh \
&& echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh
#setup web-api
RUN mkdir -p /usr/local/source/ \
&& cd /usr/local/source/ \
&& /opt/conda/bin/conda create --name vizierasync python=3.8 pip \
&& source /opt/conda/bin/activate vizierasync \
&& git clone https://github.com/VizierDB/web-api-async.git web-api\
&& cd /usr/local/source/web-api \
&& git checkout -b local_$API_BRANCH origin/$API_BRANCH \
&& pip install -r requirements.txt \
&& pip install gunicorn \
&& pip install futures \
&& pip install matplotlib \
&& pip install bokeh \
&& pip install geopandas \
&& pip install pandas \
&& pip install numpy \
&& pip install shapely \
&& pip install -e .
COPY main.py /usr/local/source/web-api/vizier/main.py
COPY run_executor.sh /usr/local/source/run_executor.sh
COPY supervisord.conf /etc/supervisord.conf
RUN chmod +x /usr/local/source/run_executor.sh
WORKDIR /usr/local/source
EXPOSE $VIZIERSERVER_PYTHON_EXECUTOR_PORT 9001
ENTRYPOINT /usr/bin/supervisord

90
python-executor/main.py Normal file
View File

@ -0,0 +1,90 @@
import sys
import os
from flask import Flask
from flask import request
from vizier.engine.packages.stream import OutputStream
from vizier.engine.packages.pycell.client.base import VizierDBClient
from vizier.engine.packages.pycell.plugins import python_cell_preload
from vizier.datastore.mimir.store import MimirDatastore
from vizier.datastore.fs.base import FileSystemDatastore
from vizier.engine.packages.pycell.processor import VARS_DBCLIENT
from multiprocessing import Process, Pipe
app = Flask(__name__)
application = app
VIZIERSERVER_PYTHON_EXECUTOR_PORT = os.environ.get('VIZIERSERVER_PYTHON_EXECUTOR_PORT', 5005)
def set2list(obj):
if isinstance(obj, set):
return list(obj)
else:
return obj
def execute_python(conn, obj):
out = sys.stdout
err = sys.stderr
stream = list()
dsklass = globals()[obj['datastore']]
datastore = dsklass(obj['basepath'])
client = VizierDBClient(
datastore=datastore,
datasets=obj['datasets'],
source=obj['source'],
dataobjects=obj['dataobjects']
)
variables = {VARS_DBCLIENT: client}
sys.stdout = OutputStream(tag='out', stream=stream)
sys.stderr = OutputStream(tag='err', stream=stream)
# Keep track of exception that is thrown by the code
exception = None
python_cell_preload(variables)
# Run the Python code
try:
exec(obj['source'], variables)
except Exception as ex:
exception = ex
finally:
# Make sure to reverse redirection of output streams
sys.stdout = out
sys.stderr = err
# Set module outputs
print(str(exception))
stdout = []
stderr = []
is_success = (exception is None)
for tag, text in stream:
text = ''.join(text).strip()
if tag == 'out':
stdout.append(text)
else:
stderr.append(text)
is_success = False
if not is_success:
stderr.append(str(exception))
conn.send({'success':is_success,
'stdout':stdout,
'stderr':stderr,
'provenance':
{'read':set2list(client.read),
'write':set2list(client.write),
'delete':set2list(client.delete)},
'datasets':client.datasets,
'dataobjects':client.dataobjects})
conn.close()
@app.route("/", methods=['POST'])
def home():
if not request.json:
raise ValueError("not json")
obj = request.json
print(str(obj))
parent_conn, child_conn = Pipe()
p = Process(target=execute_python, args=(child_conn,obj))
p.start()
return_val = parent_conn.recv()
p.join()
print(return_val)
return return_val
if __name__ == "__main__":
app.run(debug=True, port=VIZIERSERVER_PYTHON_EXECUTOR_PORT)

View File

@ -0,0 +1,12 @@
#!/bin/bash
echo 'activating virtualenv...'
cd /usr/local/source/web-api/
source /opt/conda/bin/activate vizierasync
cd vizier
echo 'running wsgi server...'
if [ $PYEXECUTOR_DEBUG == "True" ]
then
python3 main.py
else
gunicorn -w 1 --access-logfile - --error-logfile - --log-level $WSGI_LOG_LEVEL --threads 8 --bind 0.0.0.0:$VIZIERSERVER_PYTHON_EXECUTOR_PORT main
fi

View File

@ -0,0 +1,22 @@
[supervisord]
nodaemon=true
[program:pyexec]
command=/usr/local/source/run_executor.sh
stdout_events_enabled=true
stderr_events_enabled=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[inet_http_server]
port=0.0.0.0:9001
username=root
password=odinlab
[supervisorctl]
serverurl=http://127.0.0.1:9001
username=root
password=odinlab

View File

@ -42,3 +42,11 @@ sudo docker rm vizier-ui
#proxy
sudo docker stop vizier-proxy
sudo docker rm vizier-proxy
#auth
sudo docker stop vizier-auth
sudo docker rm vizier-auth
#auth
sudo docker stop python-executor
sudo docker rm python-executor

1
remove-volumes.sh Executable file
View File

@ -0,0 +1 @@
sudo docker volume rm hdfs-data hdfs-data-0 hdfs-data-1 spark-data spark-data-0 spark-data-1 spark-scratch-0 spark-scratch-1 vizier-data

View File

@ -2,8 +2,10 @@
#run the containers
#spark-master
SPARK_VERSION="spark-2.4.0-bin-without-hadoop"
SPARK_CONTAINER="spark-hadoop"
MASTER_HOSTNAME="namenode"
MASTER_CONTAINER=`sudo docker run -d -v hdfs-data:/hadoop/dfs/name -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/master.sh`
MASTER_CONTAINER=`sudo docker run -d -v hdfs-data:/hadoop/dfs/name -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/master.sh`
echo "master container id: $MASTER_CONTAINER"
#wait for master to be ready
sleep 5
@ -21,7 +23,7 @@ while [ $i -lt $SPARK_WORKERS_COUNT ]
do
WORKER_WEBUI_PORT=$[$WORKER_WEBUI_PORT+$i]
DATANODE_HOSTNAME="datanode$i"
sudo docker run -d -v hdfs-data-$i:/hadoop/dfs/data -v spark-scratch-$i:/usr/local/spark-2.4.0-bin-without-hadoop/work -v spark-data-$i:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
sudo docker run -d -v hdfs-data-$i:/hadoop/dfs/data -v spark-scratch-$i:/usr/local/$SPARK_VERSION/work -v spark-data-$i:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
i=$[$i+1]
done

44
run-containers-auth.sh Executable file
View File

@ -0,0 +1,44 @@
#!/bin/sh
#run the containers
#spark-master
SPARK_VERSION="spark-2.4.4-bin-without-hadoop-scala-2.12"
SPARK_CONTAINER="spark-hadoop-scala-2.12"
MASTER_HOSTNAME="namenode"
MASTER_CONTAINER=`sudo docker run -d -v hdfs-data:/hadoop/dfs/name -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/master.sh`
echo "master container id: $MASTER_CONTAINER"
#wait for master to be ready
sleep 5
#spark-workers
START_PORT=7001
END_PORT=7006
WORKER_PORT=8882
WORKER_WEBUI_PORT=8082
DATANODE_PORT=50010
#for additional spark workers increment the count below
SPARK_WORKERS_COUNT=2
i="0"
while [ $i -lt $SPARK_WORKERS_COUNT ]
do
WORKER_WEBUI_PORT=$[$WORKER_WEBUI_PORT+$i]
DATANODE_HOSTNAME="datanode$i"
sudo docker run -d -v hdfs-data-$i:/hadoop/dfs/data -v spark-scratch-$i:/usr/local/$SPARK_VERSION/work -v spark-data-$i:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
i=$[$i+1]
done
VIZIER_DOMAIN="vizier.devel"
S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ"
S3_AWS_SECRET_ACCESS_KEY="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"
S3_BUCKET_NAME="vizier-data-test"
VIZIER_DATA_VOLUME="vizier-data"
#python-executor for sandboxing python cell code execution
sudo docker run -d -h python-executor --name python-executor --network spark-net -p 5005:5005 -p 9003:9001 -v $VIZIER_DATA_VOLUME:/usr/local/source/vizier-api-auth/vizier-data -e MIMIR_URL=http://vizier-auth:8089/api/v2/ docker.mimirdb.info/python-executor
#vizier-auth
sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/vizier-api-auth/vizier-data -p 5000:5000 -p 9002:9001 --expose 9000 --expose 4041 --expose 8089 --network spark-net -h vizier-auth --name vizier-auth -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir" -e REMOTE_SPARK=true -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-auth" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e S3_AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e S3_AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e API_SERVER="demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_PORT=443 -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e AUTHSERVER_AUTH_CLIENTS="GenericOAuth20Client" -e VIZIERAUTH_OAUTH_ID=62dbc5e3ce67547a8ed874e5907f1798956f9a4403af6d20b33be2a8e460219a -e VIZIERAUTH_OAUTH_SECRET=b1005dfa188919c0ce56406fef1203c70daae50759973c5c59d826dc41c069b0 docker.mimirdb.info/vizier-auth
#proxy
sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_auth.conf" -e VIZIER_API_APP_PATH="/vizier-db/api/v1/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy

View File

@ -0,0 +1,42 @@
#!/bin/sh
SPARK_VERSION="spark-2.4.4-bin-without-hadoop-scala-2.12"
SPARK_CONTAINER="spark-hadoop-scala-2.12"
MASTER_HOSTNAME="namenode"
MASTER_CONTAINER=`sudo docker run --restart always -d -v data-auth:/tmp/data --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/master.sh`
echo "master container id: $MASTER_CONTAINER"
START_PORT=7001
END_PORT=7006
WORKER_PORT=8882
WORKER_WEBUI_PORT=8082
HOSTNAME="datanode"
DATANODE_PORT=50010
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
WORKER_WEBUI_PORT=8083
HOSTNAME="datanode2"
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
WORKER_WEBUI_PORT=8084
HOSTNAME="datanode3"
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
WORKER_WEBUI_PORT=8085
HOSTNAME="datanode4"
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
VIZIER_DOMAIN="vizierdb.info"
S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ"
S3_AWS_SECRET_ACCESS_KEY="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"
S3_BUCKET_NAME="vizier-data-ub"
#python-executor for sandboxing python cell code execution
sudo docker run --restart always -d -h python-executor --name python-executor --network spark-net -p 5005:5005 -p 9003:9001 --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-auth,target=/usr/local/source/vizier-api-auth/vizier-data -e MIMIR_URL=http://vizier-auth:8089/api/v2/ docker.mimirdb.info/python-executor
#vizier-auth
sudo docker run --restart always -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-auth,target=/usr/local/source/vizier-api-auth/vizier-data -p 5000:5000 -p 9002:9001 --expose 9000 --expose 4041 --expose 8089 --network spark-net -h vizier-auth --name vizier-auth -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/vizier-api-auth/vizier-data" -e REMOTE_SPARK=true -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-auth" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e API_SERVER="demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_PORT=443 -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e AUTHSERVER_AUTH_CLIENTS="GenericOAuth20Client" -e VIZIERAUTH_OAUTH_ID=e070fef69a20f246bcbc16ebc49c584dadde4753d88de0dac42eeea1cf2a2e48 -e VIZIERAUTH_OAUTH_SECRET=d5ed682921d6cb73d9a7b190173662403483bb2197f3960c9b0de325af624072 docker.mimirdb.info/vizier-auth
#runBackup --restore --sparkHost namenode --dataStagingType s3 --overwriteJars -X LOG LOGM remoteSpark

View File

@ -1,7 +1,9 @@
#!/bin/sh
SPARK_VERSION="spark-2.4.0-bin-without-hadoop"
SPARK_CONTAINER="spark-hadoop"
MASTER_HOSTNAME="namenode"
MASTER_CONTAINER=`sudo docker run -d -v data:/tmp/data --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/master.sh`
MASTER_CONTAINER=`sudo docker run -d -v data:/tmp/data --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/master.sh`
echo "master container id: $MASTER_CONTAINER"
START_PORT=7001
@ -10,19 +12,19 @@ WORKER_PORT=8882
WORKER_WEBUI_PORT=8082
HOSTNAME="datanode"
DATANODE_PORT=50010
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
WORKER_WEBUI_PORT=8083
HOSTNAME="datanode2"
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
WORKER_WEBUI_PORT=8084
HOSTNAME="datanode3"
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
WORKER_WEBUI_PORT=8085
HOSTNAME="datanode4"
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
sudo docker run -d -v data:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
VIZIER_DOMAIN="vizierdb.info"

View File

@ -1,11 +1,11 @@
FROM docker.mimirdb.info/alpine_oraclejdk8
FROM docker.mimirdb.info/alpine_openjdk8
# Metadata
LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8"
LABEL version="0.1"
LABEL base.image="docker.mimirdb.info/alpine_openjdk8"
LABEL version="0.4"
LABEL software="Spark"
LABEL software.version="0.1.201801"
LABEL software.version="0.1.202004"
LABEL description="Spark image"
RUN apk add --update curl bash sed perl grep openssh
@ -61,13 +61,14 @@ VOLUME /hadoop/dfs/data
#ARG SPARK_ARCHIVE=http://supergsego.com/apache/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz
#RUN curl -sL $SPARK_ARCHIVE | gunzip | tar -x -C /usr/local/
#or copy it
COPY spark-2.4.0-bin-without-hadoop.tgz /
RUN gunzip -c /spark-2.4.0-bin-without-hadoop.tgz | tar -x -C /usr/local/ && rm /spark-2.4.0-bin-without-hadoop.tgz
COPY hadoop-aws-2.8.2.jar aws-java-sdk-1.11.234.jar aws-java-sdk-core-1.11.234.jar aws-java-sdk-kms-1.11.234.jar \
aws-java-sdk-s3-1.11.234.jar hadoop-aws-2.8.2.jar httpclient-4.5.3.jar joda-time-2.9.9.jar /usr/local/spark-2.4.0-bin-without-hadoop/jars/
ARG SPARK_VERSION="spark-2.4.0-bin-without-hadoop"
ENV SPARK_VERSION=$SPARK_VERSION
ENV SPARK_HOME /usr/local/spark-2.4.0-bin-without-hadoop
#or copy it
COPY $SPARK_VERSION.tgz /
RUN gunzip -c /$SPARK_VERSION.tgz | tar -x -C /usr/local/ && rm /$SPARK_VERSION.tgz
ENV SPARK_HOME /usr/local/$SPARK_VERSION
ENV PATH $PATH:$SPARK_HOME/bin
ENV INSTANCE_TYPE worker

View File

@ -1,3 +1,3 @@
sudo docker start namenode datanode0 datanode1
sleep 20
sudo docker start vizier-mimir vizier-api vizier-ui vizier-proxy
sudo docker start vizier-auth vizier-mimir vizier-api vizier-ui vizier-proxy

View File

@ -1 +1 @@
sudo docker stop vizier-proxy vizier-ui vizier-api vizier-mimir datanode1 datanode0 namenode
sudo docker stop vizier-proxy vizier-ui vizier-api vizier-mimir vizier-auth datanode1 datanode0 namenode

View File

@ -1,6 +1,7 @@
FROM nginx:alpine
#vizier web-ui
FROM nginx:alpine
LABEL software.version="0.2.20190905"
LABEL software.version="0.2.20200202"
EXPOSE 80
EXPOSE 22

247
vizier-auth/Dockerfile Normal file
View File

@ -0,0 +1,247 @@
#vizier-auth
FROM docker.mimirdb.info/alpine_openjdk8
# Metadata
LABEL base.image="docker.mimirdb.info/alpine_openjdk8"
LABEL version="0.5.8"
LABEL software="Vizier Auth"
LABEL software.version="0.2.20200202"
LABEL description="an open source, provenance aware, iterative data cleaning tool"
LABEL website="http://vizierdb.info"
LABEL sourcecode="https://github.com/VizierDB"
LABEL documentation="https://github.com/VizierDB/web-api/wiki"
LABEL tags="CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning"
ARG MIMIR_BRANCH=master
ARG API_BRANCH=master
ARG UI_BRANCH=master
ARG VIZIER_DATA_PATH="/usr/local/source/vizier-api-auth/vizier-data/"
ENV API_SCHEME=https
ENV API_SERVER=api.vizier.app
ENV API_PORT=443
ENV APP_PATH="/vizier-db/api/v1"
ENV ANALYTICS_URL=""
ENV ANALYTICS_SITE_ID=12a12e629ffb388167c2c3e560bbc8e1
ENV API_BASIC_AUTH=false
ENV RUN_SSH=true
ENV PULL_MIMIR=true
ENV RESTORE_BACKUP=true
ENV SPARK_HOST=namenode
ENV NEW_MIMIR_DB_FROM_S3=true
ENV NEW_HIVE_METASTORE_FROM_S3=true
ENV HDFS_CONF_dfs_client_use_datanode_hostname=false
ENV DATA_STAGING_TYPE=s3
EXPOSE 33388
EXPOSE 22
ENV IAM_ROLE=none
ENV AWS_ACCESS_KEY_ID=value
ENV AWS_SECRET_ACCESS_KEY=value
ENV USE_S3_VOLUME=false
ENV S3_ENDPOINT=
ENV S3A_ENDPOINT="https://s3.vizier.app/"
ENV S3_BUCKET_ACL=private
ENV S3_BUCKET_NAME=none
ENV MOUNT_POINT=$VIZIER_DATA_PATH
ENV MIMIR_HOST="vizier-auth"
ENV MIMIR_DATA_DIR=$VIZIER_DATA_PATH
ENV WEB_API_SOURCE_RELATIVE_PATH="/../web-api/"
ENV REMOTE_SPARK=false
#gram
ENV VIZIERAUTH_OAUTH_ID=e554e37483640ccc73324b5620376601843aadfa37d972f094ea13d02df90a0f
ENV VIZIERAUTH_OAUTH_SECRET=f385531e40fb5268397d222c6c26a611cdd906510d3ee4ed8ad013d33b2c4102
#shibboleth and gitlab
ENV AUTHSERVER_AUTH_CLIENTS="SAML2Client,GenericOAuth20Client"
ENV SANDBOX_PYTHON_EXECUTION=True
ENV SANDBOX_PYTHON_URL=http://python-executor:5005/
ENV GITLAB_OAUTH_HOST="gitlab.odin.cse.buffalo.edu"
#have vizier-auth scala code run web-api process (true)
ENV RUN_WEB_API=false
ENV VIZIERSERVER_NAME="vizier"
ENV VIZIERSERVER_LOG_DIR="$VIZIER_DATA_PATH.vizierdb/logs"
ENV VIZIERSERVER_DEBUG="False"
ENV VIZIERSERVER_BASE_URL="http://demo.vizier.devel"
ENV VIZIERSERVER_SERVER_PORT="5000"
ENV VIZIERSERVER_SERVER_LOCAL_PORT="5000"
ENV VIZIERSERVER_APP_PATH="/vizier-db/api/v1"
ENV VIZIERSERVER_ROW_LIMIT="25"
ENV VIZIERSERVER_MAX_ROW_LIMIT="-1"
ENV VIZIERSERVER_MAX_UPLOAD_SIZE="16777216"
ENV VIZIERSERVER_ENGINE="MIMIR"
ENV VIZIERSERVER_PACKAGE_PATH="/usr/local/source/web-api/resources/packages/common:/usr/local/source/web-api/resources/packages/mimir"
ENV VIZIERSERVER_PROCESSOR_PATH="/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir"
ENV VIZIERENGINE_BACKEND="MULTIPROCESS"
ENV VIZIERENGINE_SYNCHRONOUS=""
ENV VIZIERENGINE_USE_SHORT_IDENTIFIER="True"
ENV VIZIERENGINE_DATA_DIR="$VIZIER_DATA_PATH.vizierdb"
ENV VIZIERENGINE_CELERY_ROUTES=""
ENV CELERY_BROKER_URL="amqp://guest@localhost//"
ENV VIZIERWORKER_ENV="MIMIR"
ENV VIZIERWORKER_PROCESSOR_PATH="/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir"
ENV VIZIERWORKER_LOG_DIR="$VIZIER_DATA_PATH.vizierdb/logs/worker"
ENV VIZIERWORKER_CONTROLLER_URL="http://demo.vizier.devel:5000/vizier-db/api/v1"
ENV MIMIR_HOST="127.0.0.1"
ENV MIMIR_URL="http://127.0.0.1:8089/api/v2/"
ENV FLASK_PORT="9000"
ARG S3FS_VERSION=v1.82
RUN echo "@testing http://dl-4.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories \
&& apk add --update \
build-base alpine-sdk \
fuse fuse-dev \
automake autoconf git \
libressl-dev \
curl-dev libxml2-dev \
ca-certificates \
bash \
R \
git \
curl \
sed \
openssh \
supervisor \
python3 \
yarn \
openssl ncurses coreutils python2 make gcc g++ libgcc linux-headers grep util-linux binutils findutils \
&& pip3 install --no-cache-dir --upgrade pip \
&& rm /var/cache/apk/* \
&& mkdir -p /usr/local/source
RUN git clone https://github.com/s3fs-fuse/s3fs-fuse.git && \
cd s3fs-fuse \
git checkout tags/${S3FS_VERSION} && \
./autogen.sh && \
./configure --prefix=/usr && \
make && \
make install
RUN s3fs --version
#setup ssh
RUN echo 'root:odinlab' |chpasswd
RUN sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config \
&& sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config \
&& ssh-keygen -f /etc/ssh/ssh_host_rsa_key -N '' -t rsa \
&& ssh-keygen -f /etc/ssh/ssh_host_dsa_key -N '' -t dsa \
&& ssh-keygen -f /etc/ssh/ssh_host_ecdsa_key -N '' -t ecdsa \
&& ssh-keygen -f /etc/ssh/ssh_host_ed25519_key -N '' -t ed25519
#install nvm
RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.35.2/install.sh | bash
#RUN mkdir ~/.ssh/
#COPY gitlab.idk ~/.ssh/id_rsa
#COPY gitlab.idk.pub ~/.ssh/id_rsa.pub
#RUN touch ~/.ssh/known_hosts \
# && ssh-keyscan -p 222 gitlab.vizier.io >> ~/.ssh/known_hosts
#setup mimir
RUN curl -sL "https://github.com/sbt/sbt/releases/download/v1.3.8/sbt-1.3.8.tgz" | gunzip | tar -x -C /usr/local/source/ \
&& chmod 0755 /usr/local/source/sbt/bin/sbt \
&& git clone https://github.com/UBOdin/mimir.git /usr/local/source/mimir \
#&& git clone ssh://docker@gitlab.vizier.io:222/~mike/vizier-api-auth.git /usr/local/source/vizier-api-auth \
#&& git clone ssh://docker@gitlab.vizier.io:222/~mike/vizier-admin-ui.git /usr/local/source/vizier-admin-ui \
&& git clone https://docker:cleanTheData02@gitlab.vizier.io/mike/vizier-api-auth.git /usr/local/source/vizier-api-auth \
&& git clone https://docker:cleanTheData02@gitlab.vizier.io/mike/vizier-admin-ui.git /usr/local/source/vizier-admin-ui \
&& cd /usr/local/source/mimir \
&& git checkout -b local_$MIMIR_BRANCH origin/$MIMIR_BRANCH \
&& ../sbt/bin/sbt publish
ENV NVM_DIR="/root/.nvm"
ARG NVM_DIR="/root/.nvm"
#setup vizier-admin-ui
RUN cd /usr/local/source/vizier-admin-ui \
&& [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm \
&& nvm install node \
&& nvm use node \
&& ../sbt/bin/sbt build
#RUN cd /usr/local/source/mimir \
# && ../sbt/bin/sbt "runMimirVizier -X LOG LOGM remoteSpark NO-VISTRAILS"
ENV PULL_CODE=15
RUN cd /usr/local/source/mimir \
&& git pull \
&& rm -r /root/.m2/repository/info/mimirdb \
&& ../sbt/bin/sbt publish
#&& ../sbt/bin/sbt bootstrap
#install miniconda
RUN curl -OsL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \
&& /bin/bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
&& rm Miniconda3-latest-Linux-x86_64.sh \
&& echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh
#setup web-api
RUN cd /usr/local/source/ \
&& /opt/conda/bin/conda create --name vizierasync python=3.7 pip \
&& source /opt/conda/bin/activate vizierasync \
&& git clone https://github.com/VizierDB/web-api-async.git web-api\
&& cd /usr/local/source/web-api \
&& git checkout -b local_$API_BRANCH origin/$API_BRANCH \
&& pip install -r requirements.txt \
&& pip install gunicorn \
&& pip install futures \
&& pip install matplotlib \
&& pip install bokeh \
&& pip install flask \
&& pip install geopandas \
&& pip install pandas \
&& pip install numpy \
&& pip install shapely \
# && pip install git+https://github.com/matplotlib/basemap \
&& pip install -e . \
&& mkdir -p /usr/local/source/web-api/.vizierdb
LABEL pullui="1"
#setup production web-ui branch
RUN mkdir -p /usr/local/source/ \
&& cd /usr/local/source/ \
&& git clone https://github.com/VizierDB/web-ui.git \
&& cd /usr/local/source/web-ui \
&& git checkout -b local_$UI_BRANCH origin/$UI_BRANCH \
&& sed -i "s#'http://localhost:5000/.*'#'$API_SCHEME://$API_SERVER:$API_PORT$APP_PATH'#g" /usr/local/source/web-ui/public/env.js \
&& sed -i "s/\"homepage\": \"http:\/\/localhost:5000\/vizier-db\/api\/v1\/web-ui\/\",//" /usr/local/source/web-ui/package.json \
&& sed -i "s#href = href + '/vizier-db';#//href = href + '/vizier-db';#" /usr/local/source/web-ui/src/core/util/App.js \
&& yarn install
#hack where I used https://babeljs.io/repl to compile the two js
# files with e6 code to e5 for compliance with yarn build
COPY query-string-index.js /usr/local/source/web-ui/node_modules/query-string/index.js
COPY strict-uri-encode-index.js /usr/local/source/web-ui/node_modules/strict-uri-encode/index.js
RUN cd /usr/local/source/web-ui \
&& yarn build
COPY run_init.sh /usr/local/source/run_init.sh
COPY run_vizier_auth.sh /usr/local/source/run_vizier_auth.sh
COPY run_web_api.sh /usr/local/source/run_web_api.sh
COPY entrypoint.sh /usr/local/source/entrypoint.sh
COPY supervisord.conf /etc/supervisord.conf
EXPOSE 9000
EXPOSE 5000
EXPOSE 9001
EXPOSE 8089
RUN chmod +x /usr/local/source/run_init.sh \
/usr/local/source/run_vizier_auth.sh \
/usr/local/source/run_web_api.sh \
/usr/local/source/entrypoint.sh \
&& mkdir -p $MOUNT_POINT
VOLUME $VIZIER_DATA_PATH
ENTRYPOINT ["\/bin\/bash", "-c", "/usr/local/source/entrypoint.sh"]

View File

@ -0,0 +1,5 @@
if [ "$API_SERVER_USE_PUB_IP" == "true" ]; then
export API_SERVER=$(curl ipinfo.io/ip)
fi
/usr/local/source/run_init.sh
/usr/bin/supervisord

View File

@ -0,0 +1,259 @@
'use strict';
var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }();
var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; };
var strictUriEncode = require('strict-uri-encode');
var decodeComponent = require('decode-uri-component');
function encoderForArrayFormat(options) {
switch (options.arrayFormat) {
case 'index':
return function (key, value, index) {
return value === null ? [encode(key, options), '[', index, ']'].join('') : [encode(key, options), '[', encode(index, options), ']=', encode(value, options)].join('');
};
case 'bracket':
return function (key, value) {
return value === null ? [encode(key, options), '[]'].join('') : [encode(key, options), '[]=', encode(value, options)].join('');
};
default:
return function (key, value) {
return value === null ? encode(key, options) : [encode(key, options), '=', encode(value, options)].join('');
};
}
}
function parserForArrayFormat(options) {
var result = void 0;
switch (options.arrayFormat) {
case 'index':
return function (key, value, accumulator) {
result = /\[(\d*)\]$/.exec(key);
key = key.replace(/\[\d*\]$/, '');
if (!result) {
accumulator[key] = value;
return;
}
if (accumulator[key] === undefined) {
accumulator[key] = {};
}
accumulator[key][result[1]] = value;
};
case 'bracket':
return function (key, value, accumulator) {
result = /(\[\])$/.exec(key);
key = key.replace(/\[\]$/, '');
if (!result) {
accumulator[key] = value;
return;
}
if (accumulator[key] === undefined) {
accumulator[key] = [value];
return;
}
accumulator[key] = [].concat(accumulator[key], value);
};
default:
return function (key, value, accumulator) {
if (accumulator[key] === undefined) {
accumulator[key] = value;
return;
}
accumulator[key] = [].concat(accumulator[key], value);
};
}
}
function encode(value, options) {
if (options.encode) {
return options.strict ? strictUriEncode(value) : encodeURIComponent(value);
}
return value;
}
function decode(value, options) {
if (options.decode) {
return decodeComponent(value);
}
return value;
}
function keysSorter(input) {
if (Array.isArray(input)) {
return input.sort();
}
if ((typeof input === 'undefined' ? 'undefined' : _typeof(input)) === 'object') {
return keysSorter(Object.keys(input)).sort(function (a, b) {
return Number(a) - Number(b);
}).map(function (key) {
return input[key];
});
}
return input;
}
function extract(input) {
var queryStart = input.indexOf('?');
if (queryStart === -1) {
return '';
}
return input.slice(queryStart + 1);
}
function parse(input, options) {
options = Object.assign({ decode: true, arrayFormat: 'none' }, options);
var formatter = parserForArrayFormat(options);
// Create an object with no prototype
var ret = Object.create(null);
if (typeof input !== 'string') {
return ret;
}
input = input.trim().replace(/^[?#&]/, '');
if (!input) {
return ret;
}
var _iteratorNormalCompletion = true;
var _didIteratorError = false;
var _iteratorError = undefined;
try {
for (var _iterator = input.split('&')[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
var param = _step.value;
var _param$replace$split = param.replace(/\+/g, ' ').split('='),
_param$replace$split2 = _slicedToArray(_param$replace$split, 2),
key = _param$replace$split2[0],
value = _param$replace$split2[1];
// Missing `=` should be `null`:
// http://w3.org/TR/2012/WD-url-20120524/#collect-url-parameters
value = value === undefined ? null : decode(value, options);
formatter(decode(key, options), value, ret);
}
} catch (err) {
_didIteratorError = true;
_iteratorError = err;
} finally {
try {
if (!_iteratorNormalCompletion && _iterator.return) {
_iterator.return();
}
} finally {
if (_didIteratorError) {
throw _iteratorError;
}
}
}
return Object.keys(ret).sort().reduce(function (result, key) {
var value = ret[key];
if (Boolean(value) && (typeof value === 'undefined' ? 'undefined' : _typeof(value)) === 'object' && !Array.isArray(value)) {
// Sort object keys, not values
result[key] = keysSorter(value);
} else {
result[key] = value;
}
return result;
}, Object.create(null));
}
exports.extract = extract;
exports.parse = parse;
exports.stringify = function (obj, options) {
var defaults = {
encode: true,
strict: true,
arrayFormat: 'none'
};
options = Object.assign(defaults, options);
if (options.sort === false) {
options.sort = function () {};
}
var formatter = encoderForArrayFormat(options);
return obj ? Object.keys(obj).sort(options.sort).map(function (key) {
var value = obj[key];
if (value === undefined) {
return '';
}
if (value === null) {
return encode(key, options);
}
if (Array.isArray(value)) {
var result = [];
var _iteratorNormalCompletion2 = true;
var _didIteratorError2 = false;
var _iteratorError2 = undefined;
try {
for (var _iterator2 = value.slice()[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) {
var value2 = _step2.value;
if (value2 === undefined) {
continue;
}
result.push(formatter(key, value2, result.length));
}
} catch (err) {
_didIteratorError2 = true;
_iteratorError2 = err;
} finally {
try {
if (!_iteratorNormalCompletion2 && _iterator2.return) {
_iterator2.return();
}
} finally {
if (_didIteratorError2) {
throw _iteratorError2;
}
}
}
return result.join('&');
}
return encode(key, options) + '=' + encode(value, options);
}).filter(function (x) {
return x.length > 0;
}).join('&') : '';
};
exports.parseUrl = function (input, options) {
return {
url: input.split('?')[0] || '',
query: parse(extract(input), options)
};
};

71
vizier-auth/run_init.sh Executable file
View File

@ -0,0 +1,71 @@
#!/bin/bash
#mount s3
set -euo pipefail
set -o errexit
set -o errtrace
IFS=$'\n\t'
export S3_ACL=${S3_ACL:-private}
if [ "$USE_S3_VOLUME" == "true" ]; then
test $MOUNT_POINT
rm -rf ${MOUNT_POINT}
mkdir -p ${MOUNT_POINT}
if [ "$S3_ENDPOINT" != "" ]; then
echo "$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY" > /usr/local/s3cred
chmod 400 /usr/local/s3cred
/usr/bin/s3fs ${S3_BUCKET_NAME} ${MOUNT_POINT} -o passwd_file=/usr/local/s3cred,use_path_request_style,url=${S3_ENDPOINT}
else
if [ "$IAM_ROLE" == "none" ]; then
export AWSACCESSKEYID=${AWSACCESSKEYID:-$AWS_ACCESS_KEY_ID}
export AWSSECRETACCESSKEY=${AWSSECRETACCESSKEY:-$AWS_SECRET_ACCESS_KEY}
echo 'IAM_ROLE is not set - mounting S3 with credentials from ENV'
/usr/bin/s3fs ${S3_BUCKET_NAME} ${MOUNT_POINT} -o nosuid,nonempty,nodev,allow_other,default_acl=${S3_ACL},retries=5
else
echo 'IAM_ROLE is set - using it to mount S3'
/usr/bin/s3fs ${S3_BUCKET_NAME} ${MOUNT_POINT} -o iam_role=${IAM_ROLE},nosuid,nonempty,nodev,allow_other,default_acl=${S3_ACL},retries=5
fi
fi
fi
#init web-ui config:
sed -ri "s#'https?://[a-zA-Z0-9.-]*[:0-9]*/?.*'#'$API_SCHEME://$API_SERVER:$API_PORT$APP_PATH'#g" /usr/local/source/web-ui/build/env.js
sed -ri "s/ANALYTICS_URL: '.*'/ANALYTICS_URL: '${ANALYTICS_URL}'/g" /usr/local/source/web-ui/build/env.js
sed -ri "s/ANALYTICS_SITE_ID: '.*'/ANALYTICS_SITE_ID: '${ANALYTICS_SITE_ID}'/g" /usr/local/source/web-ui/build/env.js
sed -ri "s/API_BASIC_AUTH: .*,/API_BASIC_AUTH: ${API_BASIC_AUTH},/g" /usr/local/source/web-ui/build/env.js
#mk mimir data dir if it doesn't exist
mkdir -p $MIMIR_DATA_DIR
mkdir -p "$MOUNT_POINT/logs"
#update mimir from repo
if [ $PULL_MIMIR == "true" ]
then
(cd /usr/local/source/mimir; git pull; rm -r /root/.m2/repository/info/mimirdb/; /usr/local/source/sbt/bin/sbt publish;)
fi
#restore data from some other s3 backup
if [ $RESTORE_BACKUP == "true" ] && [ ! -f /usr/local/source/.restoreComplete ]
then
cd /usr/local/source/mimir
/usr/local/source/sbt/bin/sbt "runBackup --restore --dataStagingType s3 --overwriteJars -X LOG LOGM remoteSpark"
touch /usr/local/source/.restoreComplete
fi
if [ $NEW_MIMIR_DB_FROM_S3 == "true" ] && [ ! -f /usr/local/source/mimir/debug.db ] && [ -f /usr/local/source/web-api/.vizierdb/mimir/debug.db ]
then
#cp /usr/local/source/mimir/debug.db /usr/local/source/web-api/.vizierdb/mimir/
cp /usr/local/source/web-api/.vizierdb/mimir/debug.db /usr/local/source/mimir/debug.db
fi
if [ $NEW_HIVE_METASTORE_FROM_S3 == "true" ] && [ ! -d /usr/local/source/mimir/metastore_db ] && [ -f /usr/local/source/web-api/.vizierdb/mimir/metastore_db.tar ]
then
#tar -C /usr/local/source/mimir -cf metastore_db.tar /usr/local/source/mimir/metastore_db
#cp metastore_db.tar /usr/local/source/web-api/.vizierdb/mimir/
cp /usr/local/source/web-api/.vizierdb/mimir/metastore_db.tar /usr/local/source/mimir/metastore_db.tar
cd /usr/local/source/mimir
tar -xf metastore_db.tar
rm metastore_db.tar
fi

3
vizier-auth/run_vizier_auth.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
cd /usr/local/source/vizier-api-auth
/usr/local/source/sbt/bin/sbt run

10
vizier-auth/run_web_api.sh Executable file
View File

@ -0,0 +1,10 @@
#!/bin/bash
echo 'activating virtualenv...'
cd /usr/local/source/web-api/
source /opt/conda/bin/activate vizierasync
cd vizier
echo 'running wsgi server...'
#uwsgi --plugins-dir /usr/lib/uwsgi/ --need-plugin python --plugins-list --uid nginx --gid nginx --socket /tmp/uwsgi.sock --wsgi-file server.py --chown-socket nginx:nginx --callable app --processes 4 --threads 2 --stats 127.0.0.1:9191 --virtualenv /opt/conda/envs/vizier
#gunicorn -w 4 --bind unix:vizier.sock -m 777 server:app
#gunicorn -c ./wsgi_profiler_conf.py -w 1 --threads 8 --bind 0.0.0.0:$FLASK_PORT wsgi:app
gunicorn -w 1 --threads 8 --bind 0.0.0.0:$FLASK_PORT wsgi:app

View File

@ -0,0 +1,7 @@
'use strict';
module.exports = function (str) {
return encodeURIComponent(str).replace(/[!'()*]/g, function (x) {
return '%' + x.charCodeAt(0).toString(16).toUpperCase();
});
};

View File

@ -0,0 +1,33 @@
[supervisord]
nodaemon=true
[program:mimir]
command=/usr/local/source/run_vizier_auth.sh
stdout_events_enabled=true
stderr_events_enabled=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:api]
command=/usr/local/source/run_web_api.sh
stdout_events_enabled=true
stderr_events_enabled=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:sshd]
command=/usr/sbin/sshd -D
[inet_http_server]
port=0.0.0.0:9001
username=root
password=odinlab
[supervisorctl]
serverurl=http://127.0.0.1:9001
username=root
password=odinlab

View File

@ -2,15 +2,17 @@
# Metadata
LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8_nginx"
LABEL version="0.3"
LABEL version="0.4"
LABEL software="Vizier"
LABEL software.version="0.2.20190905"
LABEL software.version="0.2.20191001"
ENV VIZIER_CONFIG="vizier_ecs.conf"
ARG VIZIER_CONFIG="vizier_ecs.conf"
ENV VIZIER_CONFIG=$VIZIER_CONFIG
ENV VIZIER_DOMAIN="vizier.app"
ENV VIZIER_API_APP_PATH="/vizier-db/api/v1/"
ENV VIZIER_API_PROXY_PATH=""
ENV ACME_HOSTS="demo.vizier.app api.vizier.app vizier.vizier.app mimir.vizier.app spark.vizier.app hdfs.vizier.app proxy.vizier.app analytics.vizier.app"
ARG ACME_HOSTS="demo.vizier.app api.vizier.app vizier.vizier.app mimir.vizier.app spark.vizier.app hdfs.vizier.app proxy.vizier.app analytics.vizier.app"
ENV ACME_HOSTS=$ACME_HOSTS
ENV API_BASIC_AUTH=false
EXPOSE 80
@ -25,9 +27,9 @@ COPY acme-client /etc/periodic/weekly/acme-client
RUN wget https://dl.eff.org/certbot-auto \
&& mv certbot-auto /usr/local/bin/certbot-auto \
&& chown root /usr/local/bin/certbot-auto \
&& chmod 0755 /usr/local/bin/certbot-auto \
&& chmod 0755 /usr/local/bin/certbot-auto
RUN apk add nginx nginx-mod-http-headers-more libressl openssh yarn supervisor git \
RUN apk add nginx nginx-mod-http-headers-more libressl openssh yarn supervisor git bash \
&& chmod +x /etc/periodic/weekly/acme-client \
&& rm /etc/nginx/conf.d/default.conf
@ -40,25 +42,14 @@ RUN sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_confi
&& ssh-keygen -f /etc/ssh/ssh_host_ecdsa_key -N '' -t ecdsa \
&& ssh-keygen -f /etc/ssh/ssh_host_ed25519_key -N '' -t ed25519
COPY fullchain-ui.pem /etc/ssl/acme/demo.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/demo.vizier.app/privkey.pem
COPY fullchain-api.pem /etc/ssl/acme/api.vizier.app/fullchain.pem
COPY api.pem /etc/ssl/acme/private/api.vizier.app/privkey.pem
COPY fullchain-ui.pem /etc/ssl/acme/vizier.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/vizier.vizier.app/privkey.pem
COPY fullchain-ui.pem /etc/ssl/acme/mimir.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/mimir.vizier.app/privkey.pem
COPY fullchain-ui.pem /etc/ssl/acme/spark.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/spark.vizier.app/privkey.pem
COPY fullchain-ui.pem /etc/ssl/acme/hdfs.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/hdfs.vizier.app/privkey.pem
COPY fullchain-ui.pem /etc/ssl/acme/analytics.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/analytics.vizier.app/privkey.pem
COPY fullchain-ui.pem /etc/ssl/acme/proxy.vizier.app/fullchain.pem
COPY ui.pem /etc/ssl/acme/private/proxy.vizier.app/privkey.pem
COPY fullchain-ui.pem /usr/local/fullchain.pem
COPY ui.pem /usr/local/privkey.pem
COPY copy-certs.sh /usr/local/copy-certs.sh
RUN chmod +x /usr/local/copy-certs.sh \
&& /bin/bash -c /usr/local/copy-certs.sh
COPY vizier_ecs.conf /usr/local/vizier_ecs.conf
COPY vizier_k8s.conf /usr/local/vizier_k8s.conf
COPY $VIZIER_CONFIG /usr/local/$VIZIER_CONFIG
COPY entrypoint.sh /usr/local/entrypoint.sh
COPY supervisord.conf /etc/supervisord.conf
COPY .htpasswd /etc/nginx/.htpasswd

View File

@ -3,7 +3,7 @@
hosts=$ACME_HOSTS
for host in $hosts; do
acme-client -a https://letsencrypt.org/documents/LE-SA-v1.2-November-15-2017.pdf -Nnmv $host && renew=1
sudo /usr/local/bin/certbot-auto renew
done
[ "$renew" = 1 ] && rc-service nginx reload

View File

@ -0,0 +1,12 @@
#!/bin/bash
#IFS=' ' read -a domainnames <<< $ACME_HOSTS
domainnames=($ACME_HOSTS)
for i in "${domainnames[@]}"
do
echo "copying certs for $i"
mkdir -p /etc/ssl/acme/$i/
mkdir -p /etc/ssl/acme/private/$i/
cp /usr/local/fullchain.pem /etc/ssl/acme/$i/
cp /usr/local/privkey.pem /etc/ssl/acme/private/$i/
done
rm /usr/local/fullchain.pem /usr/local/privkey.pem

View File

@ -0,0 +1,446 @@
upstream vizier-supervisor {
server vizier-auth:9001;
}
upstream mimir-driver {
server vizier-auth:4041;
}
upstream mimir-api {
server vizier-auth:8089;
}
upstream proxy-supervisor {
server 127.0.0.1:9001;
}
upstream vizier {
server vizier-auth:5000;
}
upstream vizierapi {
server vizier-auth:5000;
}
upstream sparkmaster {
server namenode:8080;
}
upstream sparkhdfs {
server namenode:50070;
}
server {
listen 80;
server_name vizier.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://vizier.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name vizier.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/vizier.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/vizier.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://vizier-supervisor;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name mimir.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://mimir.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name mimir.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/mimir.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/mimir.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://mimir-api;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name driver.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://driver.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name driver.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/mimir.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/mimir.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://mimir-driver;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name proxy.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://proxy.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name proxy.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/proxy.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/proxy.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://proxy-supervisor;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name demo.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://demo.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name demo.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/demo.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/demo.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://vizier;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
location /vizier-db/api/v1/ {
#auth_basic "Vizier API";
#auth_basic_user_file /etc/nginx/.htpasswd_api;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://vizierapi;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name api.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://api.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name api.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/api.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/api.vizier.app/privkey.pem;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://vizierapi;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name spark.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://spark.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name spark.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/spark.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/spark.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://sparkmaster;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name hdfs.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://hdfs.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name hdfs.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/hdfs.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/hdfs.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://sparkhdfs;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}
server {
listen 80;
server_name analytics.vizier.app;
# For Lets Encrypt, this needs to be served via HTTP
location ^~ /.well-known/acme-challenge {
allow all;
alias /var/www/acme;
}
location / {
return 301 https://analytics.vizier.app$request_uri;
}
}
server {
listen 443 ssl;
server_name analytics.vizier.app;
server_tokens off;
ssl on;
ssl_certificate /etc/ssl/acme/analytics.vizier.app/fullchain.pem;
ssl_certificate_key /etc/ssl/acme/private/analytics.vizier.app/privkey.pem;
#auth_basic "Vizier Demo";
#auth_basic_user_file /etc/nginx/.htpasswd;
location / {
set $upstreamanalytics vizier-analytics:80;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Frame-Options SAMEORIGIN;
proxy_pass http://$upstreamanalytics;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Real-IP $remote_addr;
}
}