From c71d1da2c2181050235e32cd6878099d304d64f2 Mon Sep 17 00:00:00 2001 From: Mike Date: Thu, 13 Jun 2019 11:26:16 -0400 Subject: [PATCH] add docker compose file. add bokeh support to api --- api-async/Dockerfile | 13 +- docker-compose.yml | 434 +++++++++++++++++++++++++++++++++++++++++ docs-run-containers.sh | 70 +++++++ mimir/Dockerfile | 12 +- ui-nginx/Dockerfile | 4 +- 5 files changed, 527 insertions(+), 6 deletions(-) create mode 100644 docker-compose.yml create mode 100755 docs-run-containers.sh diff --git a/api-async/Dockerfile b/api-async/Dockerfile index ab40c54..c1a410d 100644 --- a/api-async/Dockerfile +++ b/api-async/Dockerfile @@ -4,7 +4,7 @@ LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8" LABEL version="0.3" LABEL software="Vizier" -LABEL software.version="0.2.20190425" +LABEL software.version="0.2.20190610" LABEL description="an open source, provenance aware, iterative data cleaning tool" LABEL website="http://vizierdb.info" LABEL sourcecode="https://github.com/VizierDB" @@ -173,8 +173,6 @@ RUN curl -OsL "https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_6 && rm Miniconda2-latest-Linux-x86_64.sh \ && echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh -ENV PULL_CODE=0 - #setup web-api #copy local archive instead of pulling from github #COPY web-api.tgz /usr/local/source/ @@ -189,10 +187,19 @@ RUN cd /usr/local/source/ \ && pip install gunicorn \ && pip install futures \ && pip install matplotlib \ + && pip install bokeh \ + && pip install geopandas \ + && pip install pandas \ + && pip install numpy \ + && pip install shapely \ && pip install https://github.com/matplotlib/basemap/archive/v1.1.0.tar.gz \ && pip install -e . \ && mkdir -p /usr/local/source/web-api/.vizierdb +ENV PULL_CODE=3 + +RUN cd /usr/local/source/web-api \ + && git pull COPY run_init.sh /usr/local/source/run_init.sh COPY run_web_api.sh /usr/local/source/run_web_api.sh diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..05454e5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,434 @@ +version: "3" +services: + namenode: + command: + - /usr/local/spark-2.4.0-bin-without-hadoop/master.sh + container_name: namenode + environment: + - AWS_ECS=false + - SPARK_PUBLIC_DNS=127.0.0.1 + - SPARK_DAEMON_MEMORY=8g + - SPARK_DRIVER_MEMORY=8g + - HDFS_CONF_dfs_client_use_datanode_hostname=true + - SPARK_WORKER_MEMORY=8g + - MASTER=spark://namenode:7077 + - SPARK_CONF_DIR=/conf + - LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/ + - SPARK_EXECUTOR_MEMORY=8g + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin + - LANG=C.UTF-8 + - JAVA_VERSION=8 + - JAVA_UPDATE=161 + - JAVA_BUILD=12 + - JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808 + - JAVA_HOME=/usr/lib/jvm/default-jvm + - HADOOP_PREFIX=/usr/local/hadoop + - HADOOP_COMMON_HOME=/usr/local/hadoop + - HADOOP_HDFS_HOME=/usr/local/hadoop + - HADOOP_MAPRED_HOME=/usr/local/hadoop + - HADOOP_YARN_HOME=/usr/local/hadoop + - HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop + - YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop + - HADOOP_HOME=/usr/local/hadoop + - CLUSTER_NAME=test + - MASTER_IP=0 + - HDFS_HOST=namenode + - HDFS_DATA_HOST=datanode + - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_hadoop_http_staticuser_user=root + - CORE_CONF_hadoop_proxyuser_hue_hosts=* + - CORE_CONF_hadoop_proxyuser_hue_groups=* + - HDFS_CONF_dfs_webhdfs_enabled=true + - HDFS_CONF_dfs_permissions_enabled=false + - HDFS_CONF_dfs_datanode_use_datanode_hostname=true + - HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false + - HDFS_CONF_dfs_datanode_address=0.0.0.0:50010 + - HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name + - HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data + - SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop + hostname: namenode + image: docker.mimirdb.info/spark-hadoop + ipc: shareable + labels: + base.image: docker.mimirdb.info/alpine_oraclejdk8 + description: 'Spark image' + software: Spark + software.version: 0.1.201801 + version: 0.1 + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - 3a27cab35ce9 + ports: + - 222:22/tcp + - 4040:4040/tcp + - 50070:50070/tcp + - 6066:6066/tcp + - 7077:7077/tcp + - 8020:8020/tcp + - 8080:8080/tcp + volumes: + - spark-data:/tmp + working_dir: /usr/local/spark-2.4.0-bin-without-hadoop + datanode0: + command: + - /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh + container_name: datanode0 + environment: + - SPARK_PUBLIC_DNS=127.0.0.1 + - SPARK_DAEMON_MEMORY=8g + - AWS_ECS=false + - SPARK_CONF_DIR=/conf + - HDFS_HOST=namenode + - LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/ + - HDFS_DATA_HOST=datanode0 + - HDFS_CONF_dfs_datanode_address=0.0.0.0:50010 + - SPARK_DRIVER_MEMORY=8g + - SPARK_WORKER_MEMORY=8g + - HDFS_CONF_dfs_client_use_datanode_hostname=true + - SPARK_WORKER_CORES=4 + - SPARK_WORKER_PORT=8882 + - SPARK_WORKER_WEBUI_PORT=0 + - SPARK_EXECUTOR_MEMORY=8g + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin + - LANG=C.UTF-8 + - JAVA_VERSION=8 + - JAVA_UPDATE=161 + - JAVA_BUILD=12 + - JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808 + - JAVA_HOME=/usr/lib/jvm/default-jvm + - HADOOP_PREFIX=/usr/local/hadoop + - HADOOP_COMMON_HOME=/usr/local/hadoop + - HADOOP_HDFS_HOME=/usr/local/hadoop + - HADOOP_MAPRED_HOME=/usr/local/hadoop + - HADOOP_YARN_HOME=/usr/local/hadoop + - HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop + - YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop + - HADOOP_HOME=/usr/local/hadoop + - CLUSTER_NAME=test + - MASTER_IP=0 + - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_hadoop_http_staticuser_user=root + - CORE_CONF_hadoop_proxyuser_hue_hosts=* + - CORE_CONF_hadoop_proxyuser_hue_groups=* + - HDFS_CONF_dfs_webhdfs_enabled=true + - HDFS_CONF_dfs_permissions_enabled=false + - HDFS_CONF_dfs_datanode_use_datanode_hostname=true + - HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false + - HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name + - HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data + - SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop + - INSTANCE_TYPE=worker + hostname: datanode0 + image: docker.mimirdb.info/spark-hadoop + ipc: shareable + labels: + base.image: docker.mimirdb.info/alpine_oraclejdk8 + description: 'Spark image' + software: Spark + software.version: 0.1.201801 + version: 0.1 + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - 914ed79df28c + ports: + - 0:8082/tcp + volumes: + - spark-data:/tmp + working_dir: /usr/local/spark-2.4.0-bin-without-hadoop + depends_on: + - namenode + datanode1: + command: + - /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh + container_name: datanode1 + environment: + - LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/ + - HDFS_HOST=namenode + - AWS_ECS=false + - SPARK_WORKER_CORES=4 + - SPARK_WORKER_WEBUI_PORT=1 + - SPARK_EXECUTOR_MEMORY=8g + - HDFS_DATA_HOST=datanode1 + - HDFS_CONF_dfs_datanode_address=0.0.0.0:50010 + - SPARK_DAEMON_MEMORY=8g + - SPARK_DRIVER_MEMORY=8g + - SPARK_WORKER_MEMORY=8g + - SPARK_CONF_DIR=/conf + - SPARK_PUBLIC_DNS=127.0.0.1 + - SPARK_WORKER_PORT=8882 + - HDFS_CONF_dfs_client_use_datanode_hostname=true + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin + - LANG=C.UTF-8 + - JAVA_VERSION=8 + - JAVA_UPDATE=161 + - JAVA_BUILD=12 + - JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808 + - JAVA_HOME=/usr/lib/jvm/default-jvm + - HADOOP_PREFIX=/usr/local/hadoop + - HADOOP_COMMON_HOME=/usr/local/hadoop + - HADOOP_HDFS_HOME=/usr/local/hadoop + - HADOOP_MAPRED_HOME=/usr/local/hadoop + - HADOOP_YARN_HOME=/usr/local/hadoop + - HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop + - YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop + - HADOOP_HOME=/usr/local/hadoop + - CLUSTER_NAME=test + - MASTER_IP=0 + - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_hadoop_http_staticuser_user=root + - CORE_CONF_hadoop_proxyuser_hue_hosts=* + - CORE_CONF_hadoop_proxyuser_hue_groups=* + - HDFS_CONF_dfs_webhdfs_enabled=true + - HDFS_CONF_dfs_permissions_enabled=false + - HDFS_CONF_dfs_datanode_use_datanode_hostname=true + - HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false + - HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name + - HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data + - SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop + - INSTANCE_TYPE=worker + hostname: datanode1 + image: docker.mimirdb.info/spark-hadoop + ipc: shareable + labels: + base.image: docker.mimirdb.info/alpine_oraclejdk8 + description: 'Spark image' + software: Spark + software.version: 0.1.201801 + version: 0.1 + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - bde4b468192c + ports: + - 1:8082/tcp + volumes: + - spark-data:/tmp + working_dir: /usr/local/spark-2.4.0-bin-without-hadoop + depends_on: + - namenode + vizier-mimir: + container_name: vizier-mimir + entrypoint: + - /bin/bash + - -c + - /usr/local/source/entrypoint.sh + environment: + - USE_S3_VOLUME=false + - RESTORE_BACKUP=false + - PULL_MIMIR=false + - S3_BUCKET_NAME=vizier-data-test + - MIMIR_HOST=vizier-mimir + - SPARK_HOST=namenode + - AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ + - AWS_SECRET_ACCESS_KEY=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 + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - NGINX_VERSION=1.15.1 + - LANG=C.UTF-8 + - JAVA_VERSION=8 + - JAVA_UPDATE=191 + - JAVA_BUILD=12 + - JAVA_PATH=2787e4a523244c269598db4e85c51e0c + - JAVA_HOME=/usr/lib/jvm/default-jvm + - RUN_SSH=true + - NEW_MIMIR_DB_FROM_S3=true + - NEW_HIVE_METASTORE_FROM_S3=true + - HDFS_CONF_dfs_client_use_datanode_hostname=false + - DATA_STAGING_TYPE=s3 + - IAM_ROLE=none + - S3_ENDPOINT= + - S3A_ENDPOINT=https://s3.vizier.app/ + - S3_BUCKET_ACL=private + - MOUNT_POINT=/usr/local/source/web-api/.vizierdb + - MIMIR_DATA_DIR=/usr/local/source/web-api/.vizierdb/mimir + - PULL_CODE=2 + hostname: vizier-mimir + image: docker.mimirdb.info/vizier-mimir-async-spark + ipc: shareable + labels: + base.image: docker.mimirdb.info/alpine_oraclejdk8_nginx + description: 'an open source, provenance aware, iterative data cleaning tool' + documentation: https://github.com/VizierDB/web-api/wiki + software: Vizier + software.version: 0.2.20190610 + sourcecode: https://github.com/VizierDB + tags: 'CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning' + version: 0.3 + website: http://vizierdb.info + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - 137903388a11 + ports: + - 9002:9001/tcp + volumes: + - vizier-data:/usr/local/source/web-api/vizier/.vizierdb + depends_on: + - namenode + - datanode1 + vizier-api: + container_name: vizier-api + entrypoint: + - /bin/bash + - -c + - /usr/local/source/entrypoint.sh + environment: + - VIZIERSERVER_SERVER_LOCAL=80 + - VIZIERSERVER_SERVER_PORT=443 + - MIMIR_HOST=vizier-mimir + - VIZIERSERVER_BASE_URL=https://demo.vizier.devel + - VIZIERSERVER_APP_PATH=/vizier-db/api/v1 + - AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ + - AWS_SECRET_ACCESS_KEY=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 + - S3_BUCKET_NAME=vizier-data-test + - USE_S3_VOLUME=false + - MIMIR_URL=http://vizier-mimir:8089/api/v2/ + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - LANG=C.UTF-8 + - JAVA_VERSION=8 + - JAVA_UPDATE=161 + - JAVA_BUILD=12 + - JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808 + - JAVA_HOME=/usr/lib/jvm/default-jvm + - VIZIERSERVER_NAME=vizier + - VIZIERSERVER_LOG_DIR=/usr/local/source/web-api/vizier/.vizierdb/logs + - VIZIERSERVER_DEBUG=True + - VIZIERSERVER_ROW_LIMIT=25 + - VIZIERSERVER_MAX_ROW_LIMIT=-1 + - VIZIERSERVER_MAX_UPLOAD_SIZE=16777216 + - VIZIERSERVER_ENGINE=MIMIR + - VIZIERSERVER_PACKAGE_PATH=/usr/local/source/web-api/resources/packages/common:/usr/local/source/web-api/resources/packages/mimir + - VIZIERSERVER_PROCESSOR_PATH=/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir + - VIZIERENGINE_BACKEND=MULTIPROCESS + - VIZIERENGINE_SYNCHRONOUS= + - VIZIERENGINE_USE_SHORT_IDENTIFIER=True + - VIZIERENGINE_DATA_DIR=/usr/local/source/web-api/vizier/.vizierdb + - VIZIERENGINE_CELERY_ROUTES= + - CELERY_BROKER_URL=amqp://guest@localhost// + - VIZIERWORKER_ENV=MIMIR + - VIZIERWORKER_PROCESSOR_PATH=/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir + - VIZIERWORKER_LOG_DIR=/usr/local/source/web-api/vizier/.vizierdb/logs/worker + - VIZIERWORKER_CONTROLLER_URL=http://localhost:5000/vizier-db/api/v1 + - VIZIERENGINE_CONTAINER_PORTS=20171-20271 + - VIZIERENGINE_CONTAINER_IMAGE=heikomueller/vizierapi:container + - PROFILER=0 + - RUN_SSH=true + - ACME_HOSTS=api.vizier.app + - GLIBC_VERSION=2.27-r0 + - IAM_ROLE=none + - S3_ENDPOINT= + - S3_BUCKET_ACL=private + - MOUNT_POINT=/usr/local/source/web-api/.vizierdb + - PULL_CODE=2 + hostname: vizier-api + image: docker.mimirdb.info/vizier-api-async-spark + ipc: shareable + labels: + base.image: docker.mimirdb.info/alpine_oraclejdk8 + description: 'an open source, provenance aware, iterative data cleaning tool' + documentation: https://github.com/VizierDB/web-api/wiki + software: Vizier + software.version: 0.2.20190610 + sourcecode: https://github.com/VizierDB + tags: 'CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning' + version: 0.3 + website: http://vizierdb.info + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - 785709de8b21 + ports: + - 9003:9001/tcp + volumes: + - vizier-data:/usr/local/source/web-api/vizier/.vizierdb + vizier-ui: + container_name: vizier-ui + entrypoint: + - /bin/sh + - -c + - /usr/local/entrypoint.sh + environment: + - API_SCHEME=https + - API_SERVER=demo.vizier.devel + - APP_PATH=/vizier-db/api/v1 + - API_PORT=443 + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - NGINX_VERSION=1.15.1 + - ANALYTICS_URL=https:\/\/analytics.vizier.app\/ + - ANALYTICS_SITE_ID=12a12e629ffb388167c2c3e560bbc8e1 + - API_BASIC_AUTH=false + hostname: vizier-ui + image: docker.mimirdb.info/vizier-ui-async + ipc: shareable + labels: + software.version: 0.2.20190611 + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - a1920bf53960 + ports: + - 9004:9001/tcp + vizier-proxy: + container_name: vizier-proxy + entrypoint: + - /bin/sh + - -c + - /usr/local/entrypoint.sh + environment: + - VIZIER_CONFIG=vizier_k8s.conf + - VIZIER_API_APP_PATH=/vizier-db/api/v1/ + - VIZIER_DOMAIN=vizier.devel + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - NGINX_VERSION=1.15.1 + - VIZIER_API_PROXY_PATH= + - 'ACME_HOSTS=demo.vizier.app api.vizier.app vizier.vizier.app mimir.vizier.app + spark.vizier.app hdfs.vizier.app proxy.vizier.app analytics.vizier.app' + - API_BASIC_AUTH=false + hostname: vizier-proxy + image: docker.mimirdb.info/vizier-proxy + ipc: shareable + labels: + base.image: docker.mimirdb.info/alpine_oraclejdk8_nginx + software: Vizier + software.version: 0.2.20190509 + version: 0.3 + logging: + driver: json-file + options: {} + networks: + spark-net: + aliases: + - 550a44e26989 + ports: + - 443:443/tcp + - 80:80/tcp + - 9001:9001/tcp + depends_on: + - vizier-mimir + - vizier-api + - vizier-ui +networks: + spark-net: + external: true +volumes: + vizier-data: + spark-data: diff --git a/docs-run-containers.sh b/docs-run-containers.sh new file mode 100755 index 0000000..677471c --- /dev/null +++ b/docs-run-containers.sh @@ -0,0 +1,70 @@ +#create network +sudo docker network create spark-net + +#run the containers +#spark-master +MASTER_HOSTNAME="namenode" +MASTER_CONTAINER=`sudo docker run -d -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 \ +-p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 \ +--expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" \ +-e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" \ +-e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/master.sh` +echo "master container id: $MASTER_CONTAINER" +#wait for master to be ready +sleep 5 + +#spark-workers +START_PORT=7001 +END_PORT=7006 +WORKER_PORT=8882 +DATANODE_PORT=50010 +#for additional spark workers increment the count below +SPARK_WORKERS_COUNT=2 +i="0" +while [ $i -lt $SPARK_WORKERS_COUNT ] +do + WORKER_WEBUI_PORT=$[$WORKER_WEBUI_PORT+$i] + DATANODE_HOSTNAME="datanode$i" + sudo docker run -d -v spark-data:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 \ + --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" \ + -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" \ + -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" \ + -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh + i=$[$i+1] +done + +VIZIER_DOMAIN="vizier.devel" + +S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ" +S3_AWS_SECRET_ACCESS_KEY="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" +S3_BUCKET_NAME="vizier-data-test" +VIZIER_DATA_VOLUME="vizier-data" + +#mimir +#to use an s3 bucket as the data directory for mimir instead of a volume use this: +#sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" \ +#-e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY \ +#-e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark +#to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/.vizierdb -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir \ +-e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID \ +-e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark + +#api +#to use an s3 bucket as the data directory for the api instead of a volume use this: +#sudo docker run -d -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN \ +#-e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY \ +#-e S3_BUCKET_NAME="$S3_BUCKET_NAME" --privileged --device /dev/fuse docker.mimirdb.info/vizier-api-spark +#to use a local volume for the data directory instead of an s3 bucket use the following for api instead of the above: +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api \ +-e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e MIMIR_URL="http://vizier-mimir:8089/api/v2/" -e APP_PATH="/api" -e API_SERVER=demo.$VIZIER_DOMAIN \ +-e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY \ +-e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark + +#ui +sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/api" -e API_PORT=443 -e API_SCHEME=https \ +--expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui + +#proxy +sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_k8s.conf" \ +-e VIZIER_API_APP_PATH="/api/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" -e VIZIER_API_PROXY_PATH="/" docker.mimirdb.info/vizier-proxy diff --git a/mimir/Dockerfile b/mimir/Dockerfile index ae0758b..632cbe3 100644 --- a/mimir/Dockerfile +++ b/mimir/Dockerfile @@ -4,7 +4,7 @@ FROM docker.mimirdb.info/alpine_oraclejdk8_nginx LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8_nginx" LABEL version="0.3" LABEL software="Vizier" -LABEL software.version="0.2.20190425" +LABEL software.version="0.2.20190610" LABEL description="an open source, provenance aware, iterative data cleaning tool" LABEL website="http://vizierdb.info" LABEL sourcecode="https://github.com/VizierDB" @@ -85,6 +85,16 @@ RUN curl -sL "https://github.com/sbt/sbt/releases/download/v0.13.15/sbt-0.13.15. && git checkout -b local_$MIMIR_BRANCH origin/$MIMIR_BRANCH \ && ../sbt/bin/sbt publish +RUN cd /usr/local/source/mimir \ + && ../sbt/bin/sbt "runMimirVizier -X LOG LOGM remoteSpark NO-VISTRAILS" + +ENV PULL_CODE=3 + +RUN cd /usr/local/source/mimir \ + && git pull \ + && rm -r /root/.m2/repository/info/mimirdb/mimir-core_2.11/0.2 \ + && ../sbt/bin/sbt publish + COPY run_init.sh /usr/local/source/run_init.sh COPY run_mimir.sh /usr/local/source/run_mimir.sh COPY entrypoint.sh /usr/local/source/entrypoint.sh diff --git a/ui-nginx/Dockerfile b/ui-nginx/Dockerfile index bce0be5..e0aab09 100644 --- a/ui-nginx/Dockerfile +++ b/ui-nginx/Dockerfile @@ -1,6 +1,6 @@ FROM nginx:alpine -LABEL software.version="0.2.20190306" +LABEL software.version="0.2.20190611" EXPOSE 80 EXPOSE 22 @@ -33,7 +33,7 @@ ENV API_BASIC_AUTH=false #setup production web-ui branch RUN mkdir /usr/local/source/ \ && cd /usr/local/source/ \ - && echo 'test2' \ + && echo 'test4' \ && git clone https://github.com/VizierDB/web-ui.git \ && cd /usr/local/source/web-ui \ && git checkout -b local_$UI_BRANCH origin/$UI_BRANCH \