add docker compose file. add bokeh support to api

This commit is contained in:
Mike 2019-06-13 11:26:16 -04:00
parent faee005c31
commit c71d1da2c2
5 changed files with 527 additions and 6 deletions

View file

@ -4,7 +4,7 @@
LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8" LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8"
LABEL version="0.3" LABEL version="0.3"
LABEL software="Vizier" LABEL software="Vizier"
LABEL software.version="0.2.20190425" LABEL software.version="0.2.20190610"
LABEL description="an open source, provenance aware, iterative data cleaning tool" LABEL description="an open source, provenance aware, iterative data cleaning tool"
LABEL website="http://vizierdb.info" LABEL website="http://vizierdb.info"
LABEL sourcecode="https://github.com/VizierDB" LABEL sourcecode="https://github.com/VizierDB"
@ -173,8 +173,6 @@ RUN curl -OsL "https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_6
&& rm Miniconda2-latest-Linux-x86_64.sh \ && rm Miniconda2-latest-Linux-x86_64.sh \
&& echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh && echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh
ENV PULL_CODE=0
#setup web-api #setup web-api
#copy local archive instead of pulling from github #copy local archive instead of pulling from github
#COPY web-api.tgz /usr/local/source/ #COPY web-api.tgz /usr/local/source/
@ -189,10 +187,19 @@ RUN cd /usr/local/source/ \
&& pip install gunicorn \ && pip install gunicorn \
&& pip install futures \ && pip install futures \
&& pip install matplotlib \ && pip install matplotlib \
&& pip install bokeh \
&& pip install geopandas \
&& pip install pandas \
&& pip install numpy \
&& pip install shapely \
&& pip install https://github.com/matplotlib/basemap/archive/v1.1.0.tar.gz \ && pip install https://github.com/matplotlib/basemap/archive/v1.1.0.tar.gz \
&& pip install -e . \ && pip install -e . \
&& mkdir -p /usr/local/source/web-api/.vizierdb && mkdir -p /usr/local/source/web-api/.vizierdb
ENV PULL_CODE=3
RUN cd /usr/local/source/web-api \
&& git pull
COPY run_init.sh /usr/local/source/run_init.sh COPY run_init.sh /usr/local/source/run_init.sh
COPY run_web_api.sh /usr/local/source/run_web_api.sh COPY run_web_api.sh /usr/local/source/run_web_api.sh

434
docker-compose.yml Normal file
View file

@ -0,0 +1,434 @@
version: "3"
services:
namenode:
command:
- /usr/local/spark-2.4.0-bin-without-hadoop/master.sh
container_name: namenode
environment:
- AWS_ECS=false
- SPARK_PUBLIC_DNS=127.0.0.1
- SPARK_DAEMON_MEMORY=8g
- SPARK_DRIVER_MEMORY=8g
- HDFS_CONF_dfs_client_use_datanode_hostname=true
- SPARK_WORKER_MEMORY=8g
- MASTER=spark://namenode:7077
- SPARK_CONF_DIR=/conf
- LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/
- SPARK_EXECUTOR_MEMORY=8g
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- HADOOP_PREFIX=/usr/local/hadoop
- HADOOP_COMMON_HOME=/usr/local/hadoop
- HADOOP_HDFS_HOME=/usr/local/hadoop
- HADOOP_MAPRED_HOME=/usr/local/hadoop
- HADOOP_YARN_HOME=/usr/local/hadoop
- HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
- YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop
- HADOOP_HOME=/usr/local/hadoop
- CLUSTER_NAME=test
- MASTER_IP=0
- HDFS_HOST=namenode
- HDFS_DATA_HOST=datanode
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_hadoop_http_staticuser_user=root
- CORE_CONF_hadoop_proxyuser_hue_hosts=*
- CORE_CONF_hadoop_proxyuser_hue_groups=*
- HDFS_CONF_dfs_webhdfs_enabled=true
- HDFS_CONF_dfs_permissions_enabled=false
- HDFS_CONF_dfs_datanode_use_datanode_hostname=true
- HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
- HDFS_CONF_dfs_datanode_address=0.0.0.0:50010
- HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
- HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
- SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop
hostname: namenode
image: docker.mimirdb.info/spark-hadoop
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'Spark image'
software: Spark
software.version: 0.1.201801
version: 0.1
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 3a27cab35ce9
ports:
- 222:22/tcp
- 4040:4040/tcp
- 50070:50070/tcp
- 6066:6066/tcp
- 7077:7077/tcp
- 8020:8020/tcp
- 8080:8080/tcp
volumes:
- spark-data:/tmp
working_dir: /usr/local/spark-2.4.0-bin-without-hadoop
datanode0:
command:
- /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
container_name: datanode0
environment:
- SPARK_PUBLIC_DNS=127.0.0.1
- SPARK_DAEMON_MEMORY=8g
- AWS_ECS=false
- SPARK_CONF_DIR=/conf
- HDFS_HOST=namenode
- LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/
- HDFS_DATA_HOST=datanode0
- HDFS_CONF_dfs_datanode_address=0.0.0.0:50010
- SPARK_DRIVER_MEMORY=8g
- SPARK_WORKER_MEMORY=8g
- HDFS_CONF_dfs_client_use_datanode_hostname=true
- SPARK_WORKER_CORES=4
- SPARK_WORKER_PORT=8882
- SPARK_WORKER_WEBUI_PORT=0
- SPARK_EXECUTOR_MEMORY=8g
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- HADOOP_PREFIX=/usr/local/hadoop
- HADOOP_COMMON_HOME=/usr/local/hadoop
- HADOOP_HDFS_HOME=/usr/local/hadoop
- HADOOP_MAPRED_HOME=/usr/local/hadoop
- HADOOP_YARN_HOME=/usr/local/hadoop
- HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
- YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop
- HADOOP_HOME=/usr/local/hadoop
- CLUSTER_NAME=test
- MASTER_IP=0
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_hadoop_http_staticuser_user=root
- CORE_CONF_hadoop_proxyuser_hue_hosts=*
- CORE_CONF_hadoop_proxyuser_hue_groups=*
- HDFS_CONF_dfs_webhdfs_enabled=true
- HDFS_CONF_dfs_permissions_enabled=false
- HDFS_CONF_dfs_datanode_use_datanode_hostname=true
- HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
- HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
- HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
- SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop
- INSTANCE_TYPE=worker
hostname: datanode0
image: docker.mimirdb.info/spark-hadoop
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'Spark image'
software: Spark
software.version: 0.1.201801
version: 0.1
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 914ed79df28c
ports:
- 0:8082/tcp
volumes:
- spark-data:/tmp
working_dir: /usr/local/spark-2.4.0-bin-without-hadoop
depends_on:
- namenode
datanode1:
command:
- /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
container_name: datanode1
environment:
- LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/
- HDFS_HOST=namenode
- AWS_ECS=false
- SPARK_WORKER_CORES=4
- SPARK_WORKER_WEBUI_PORT=1
- SPARK_EXECUTOR_MEMORY=8g
- HDFS_DATA_HOST=datanode1
- HDFS_CONF_dfs_datanode_address=0.0.0.0:50010
- SPARK_DAEMON_MEMORY=8g
- SPARK_DRIVER_MEMORY=8g
- SPARK_WORKER_MEMORY=8g
- SPARK_CONF_DIR=/conf
- SPARK_PUBLIC_DNS=127.0.0.1
- SPARK_WORKER_PORT=8882
- HDFS_CONF_dfs_client_use_datanode_hostname=true
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- HADOOP_PREFIX=/usr/local/hadoop
- HADOOP_COMMON_HOME=/usr/local/hadoop
- HADOOP_HDFS_HOME=/usr/local/hadoop
- HADOOP_MAPRED_HOME=/usr/local/hadoop
- HADOOP_YARN_HOME=/usr/local/hadoop
- HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
- YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop
- HADOOP_HOME=/usr/local/hadoop
- CLUSTER_NAME=test
- MASTER_IP=0
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_hadoop_http_staticuser_user=root
- CORE_CONF_hadoop_proxyuser_hue_hosts=*
- CORE_CONF_hadoop_proxyuser_hue_groups=*
- HDFS_CONF_dfs_webhdfs_enabled=true
- HDFS_CONF_dfs_permissions_enabled=false
- HDFS_CONF_dfs_datanode_use_datanode_hostname=true
- HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
- HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
- HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
- SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop
- INSTANCE_TYPE=worker
hostname: datanode1
image: docker.mimirdb.info/spark-hadoop
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'Spark image'
software: Spark
software.version: 0.1.201801
version: 0.1
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- bde4b468192c
ports:
- 1:8082/tcp
volumes:
- spark-data:/tmp
working_dir: /usr/local/spark-2.4.0-bin-without-hadoop
depends_on:
- namenode
vizier-mimir:
container_name: vizier-mimir
entrypoint:
- /bin/bash
- -c
- /usr/local/source/entrypoint.sh
environment:
- USE_S3_VOLUME=false
- RESTORE_BACKUP=false
- PULL_MIMIR=false
- S3_BUCKET_NAME=vizier-data-test
- MIMIR_HOST=vizier-mimir
- SPARK_HOST=namenode
- AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ
- AWS_SECRET_ACCESS_KEY=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
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- NGINX_VERSION=1.15.1
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=191
- JAVA_BUILD=12
- JAVA_PATH=2787e4a523244c269598db4e85c51e0c
- JAVA_HOME=/usr/lib/jvm/default-jvm
- RUN_SSH=true
- NEW_MIMIR_DB_FROM_S3=true
- NEW_HIVE_METASTORE_FROM_S3=true
- HDFS_CONF_dfs_client_use_datanode_hostname=false
- DATA_STAGING_TYPE=s3
- IAM_ROLE=none
- S3_ENDPOINT=
- S3A_ENDPOINT=https://s3.vizier.app/
- S3_BUCKET_ACL=private
- MOUNT_POINT=/usr/local/source/web-api/.vizierdb
- MIMIR_DATA_DIR=/usr/local/source/web-api/.vizierdb/mimir
- PULL_CODE=2
hostname: vizier-mimir
image: docker.mimirdb.info/vizier-mimir-async-spark
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8_nginx
description: 'an open source, provenance aware, iterative data cleaning tool'
documentation: https://github.com/VizierDB/web-api/wiki
software: Vizier
software.version: 0.2.20190610
sourcecode: https://github.com/VizierDB
tags: 'CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning'
version: 0.3
website: http://vizierdb.info
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 137903388a11
ports:
- 9002:9001/tcp
volumes:
- vizier-data:/usr/local/source/web-api/vizier/.vizierdb
depends_on:
- namenode
- datanode1
vizier-api:
container_name: vizier-api
entrypoint:
- /bin/bash
- -c
- /usr/local/source/entrypoint.sh
environment:
- VIZIERSERVER_SERVER_LOCAL=80
- VIZIERSERVER_SERVER_PORT=443
- MIMIR_HOST=vizier-mimir
- VIZIERSERVER_BASE_URL=https://demo.vizier.devel
- VIZIERSERVER_APP_PATH=/vizier-db/api/v1
- AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ
- AWS_SECRET_ACCESS_KEY=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
- S3_BUCKET_NAME=vizier-data-test
- USE_S3_VOLUME=false
- MIMIR_URL=http://vizier-mimir:8089/api/v2/
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- VIZIERSERVER_NAME=vizier
- VIZIERSERVER_LOG_DIR=/usr/local/source/web-api/vizier/.vizierdb/logs
- VIZIERSERVER_DEBUG=True
- VIZIERSERVER_ROW_LIMIT=25
- VIZIERSERVER_MAX_ROW_LIMIT=-1
- VIZIERSERVER_MAX_UPLOAD_SIZE=16777216
- VIZIERSERVER_ENGINE=MIMIR
- VIZIERSERVER_PACKAGE_PATH=/usr/local/source/web-api/resources/packages/common:/usr/local/source/web-api/resources/packages/mimir
- VIZIERSERVER_PROCESSOR_PATH=/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir
- VIZIERENGINE_BACKEND=MULTIPROCESS
- VIZIERENGINE_SYNCHRONOUS=
- VIZIERENGINE_USE_SHORT_IDENTIFIER=True
- VIZIERENGINE_DATA_DIR=/usr/local/source/web-api/vizier/.vizierdb
- VIZIERENGINE_CELERY_ROUTES=
- CELERY_BROKER_URL=amqp://guest@localhost//
- VIZIERWORKER_ENV=MIMIR
- VIZIERWORKER_PROCESSOR_PATH=/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir
- VIZIERWORKER_LOG_DIR=/usr/local/source/web-api/vizier/.vizierdb/logs/worker
- VIZIERWORKER_CONTROLLER_URL=http://localhost:5000/vizier-db/api/v1
- VIZIERENGINE_CONTAINER_PORTS=20171-20271
- VIZIERENGINE_CONTAINER_IMAGE=heikomueller/vizierapi:container
- PROFILER=0
- RUN_SSH=true
- ACME_HOSTS=api.vizier.app
- GLIBC_VERSION=2.27-r0
- IAM_ROLE=none
- S3_ENDPOINT=
- S3_BUCKET_ACL=private
- MOUNT_POINT=/usr/local/source/web-api/.vizierdb
- PULL_CODE=2
hostname: vizier-api
image: docker.mimirdb.info/vizier-api-async-spark
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'an open source, provenance aware, iterative data cleaning tool'
documentation: https://github.com/VizierDB/web-api/wiki
software: Vizier
software.version: 0.2.20190610
sourcecode: https://github.com/VizierDB
tags: 'CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning'
version: 0.3
website: http://vizierdb.info
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 785709de8b21
ports:
- 9003:9001/tcp
volumes:
- vizier-data:/usr/local/source/web-api/vizier/.vizierdb
vizier-ui:
container_name: vizier-ui
entrypoint:
- /bin/sh
- -c
- /usr/local/entrypoint.sh
environment:
- API_SCHEME=https
- API_SERVER=demo.vizier.devel
- APP_PATH=/vizier-db/api/v1
- API_PORT=443
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- NGINX_VERSION=1.15.1
- ANALYTICS_URL=https:\/\/analytics.vizier.app\/
- ANALYTICS_SITE_ID=12a12e629ffb388167c2c3e560bbc8e1
- API_BASIC_AUTH=false
hostname: vizier-ui
image: docker.mimirdb.info/vizier-ui-async
ipc: shareable
labels:
software.version: 0.2.20190611
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- a1920bf53960
ports:
- 9004:9001/tcp
vizier-proxy:
container_name: vizier-proxy
entrypoint:
- /bin/sh
- -c
- /usr/local/entrypoint.sh
environment:
- VIZIER_CONFIG=vizier_k8s.conf
- VIZIER_API_APP_PATH=/vizier-db/api/v1/
- VIZIER_DOMAIN=vizier.devel
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- NGINX_VERSION=1.15.1
- VIZIER_API_PROXY_PATH=
- 'ACME_HOSTS=demo.vizier.app api.vizier.app vizier.vizier.app mimir.vizier.app
spark.vizier.app hdfs.vizier.app proxy.vizier.app analytics.vizier.app'
- API_BASIC_AUTH=false
hostname: vizier-proxy
image: docker.mimirdb.info/vizier-proxy
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8_nginx
software: Vizier
software.version: 0.2.20190509
version: 0.3
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 550a44e26989
ports:
- 443:443/tcp
- 80:80/tcp
- 9001:9001/tcp
depends_on:
- vizier-mimir
- vizier-api
- vizier-ui
networks:
spark-net:
external: true
volumes:
vizier-data:
spark-data:

70
docs-run-containers.sh Executable file
View file

@ -0,0 +1,70 @@
#create network
sudo docker network create spark-net
#run the containers
#spark-master
MASTER_HOSTNAME="namenode"
MASTER_CONTAINER=`sudo docker run -d -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 \
-p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 \
--expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" \
-e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" \
-e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/master.sh`
echo "master container id: $MASTER_CONTAINER"
#wait for master to be ready
sleep 5
#spark-workers
START_PORT=7001
END_PORT=7006
WORKER_PORT=8882
DATANODE_PORT=50010
#for additional spark workers increment the count below
SPARK_WORKERS_COUNT=2
i="0"
while [ $i -lt $SPARK_WORKERS_COUNT ]
do
WORKER_WEBUI_PORT=$[$WORKER_WEBUI_PORT+$i]
DATANODE_HOSTNAME="datanode$i"
sudo docker run -d -v spark-data:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 \
--expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" \
-e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" \
-e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" \
-e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
i=$[$i+1]
done
VIZIER_DOMAIN="vizier.devel"
S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ"
S3_AWS_SECRET_ACCESS_KEY="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"
S3_BUCKET_NAME="vizier-data-test"
VIZIER_DATA_VOLUME="vizier-data"
#mimir
#to use an s3 bucket as the data directory for mimir instead of a volume use this:
#sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" \
#-e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY \
#-e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark
#to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above:
sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/.vizierdb -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir \
-e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark
#api
#to use an s3 bucket as the data directory for the api instead of a volume use this:
#sudo docker run -d -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN \
#-e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY \
#-e S3_BUCKET_NAME="$S3_BUCKET_NAME" --privileged --device /dev/fuse docker.mimirdb.info/vizier-api-spark
#to use a local volume for the data directory instead of an s3 bucket use the following for api instead of the above:
sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api \
-e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e MIMIR_URL="http://vizier-mimir:8089/api/v2/" -e APP_PATH="/api" -e API_SERVER=demo.$VIZIER_DOMAIN \
-e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY \
-e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark
#ui
sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/api" -e API_PORT=443 -e API_SCHEME=https \
--expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui
#proxy
sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_k8s.conf" \
-e VIZIER_API_APP_PATH="/api/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" -e VIZIER_API_PROXY_PATH="/" docker.mimirdb.info/vizier-proxy

View file

@ -4,7 +4,7 @@ FROM docker.mimirdb.info/alpine_oraclejdk8_nginx
LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8_nginx" LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8_nginx"
LABEL version="0.3" LABEL version="0.3"
LABEL software="Vizier" LABEL software="Vizier"
LABEL software.version="0.2.20190425" LABEL software.version="0.2.20190610"
LABEL description="an open source, provenance aware, iterative data cleaning tool" LABEL description="an open source, provenance aware, iterative data cleaning tool"
LABEL website="http://vizierdb.info" LABEL website="http://vizierdb.info"
LABEL sourcecode="https://github.com/VizierDB" LABEL sourcecode="https://github.com/VizierDB"
@ -85,6 +85,16 @@ RUN curl -sL "https://github.com/sbt/sbt/releases/download/v0.13.15/sbt-0.13.15.
&& git checkout -b local_$MIMIR_BRANCH origin/$MIMIR_BRANCH \ && git checkout -b local_$MIMIR_BRANCH origin/$MIMIR_BRANCH \
&& ../sbt/bin/sbt publish && ../sbt/bin/sbt publish
RUN cd /usr/local/source/mimir \
&& ../sbt/bin/sbt "runMimirVizier -X LOG LOGM remoteSpark NO-VISTRAILS"
ENV PULL_CODE=3
RUN cd /usr/local/source/mimir \
&& git pull \
&& rm -r /root/.m2/repository/info/mimirdb/mimir-core_2.11/0.2 \
&& ../sbt/bin/sbt publish
COPY run_init.sh /usr/local/source/run_init.sh COPY run_init.sh /usr/local/source/run_init.sh
COPY run_mimir.sh /usr/local/source/run_mimir.sh COPY run_mimir.sh /usr/local/source/run_mimir.sh
COPY entrypoint.sh /usr/local/source/entrypoint.sh COPY entrypoint.sh /usr/local/source/entrypoint.sh

View file

@ -1,6 +1,6 @@
FROM nginx:alpine FROM nginx:alpine
LABEL software.version="0.2.20190306" LABEL software.version="0.2.20190611"
EXPOSE 80 EXPOSE 80
EXPOSE 22 EXPOSE 22
@ -33,7 +33,7 @@ ENV API_BASIC_AUTH=false
#setup production web-ui branch #setup production web-ui branch
RUN mkdir /usr/local/source/ \ RUN mkdir /usr/local/source/ \
&& cd /usr/local/source/ \ && cd /usr/local/source/ \
&& echo 'test2' \ && echo 'test4' \
&& git clone https://github.com/VizierDB/web-ui.git \ && git clone https://github.com/VizierDB/web-ui.git \
&& cd /usr/local/source/web-ui \ && cd /usr/local/source/web-ui \
&& git checkout -b local_$UI_BRANCH origin/$UI_BRANCH \ && git checkout -b local_$UI_BRANCH origin/$UI_BRANCH \