From 3d9254324ee8798486e3e82427c5abbefbaef08a Mon Sep 17 00:00:00 2001 From: Mike Date: Wed, 8 May 2019 14:46:06 -0400 Subject: [PATCH] fixes for non async. add async build and run scripts. --- api-async/Dockerfile | 2 +- build-images-async.sh | 28 +++++++++++++++++++++ build-images.sh | 19 +++------------ mimir/Dockerfile | 2 +- run-containers-async.sh | 54 +++++++++++++++++++++++++++++++++++++++++ run-containers.sh | 27 +++------------------ 6 files changed, 92 insertions(+), 40 deletions(-) create mode 100755 build-images-async.sh create mode 100755 run-containers-async.sh diff --git a/api-async/Dockerfile b/api-async/Dockerfile index 8be82a3..ab40c54 100644 --- a/api-async/Dockerfile +++ b/api-async/Dockerfile @@ -4,7 +4,7 @@ LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8" LABEL version="0.3" LABEL software="Vizier" -LABEL software.version="0.2.20190418" +LABEL software.version="0.2.20190425" LABEL description="an open source, provenance aware, iterative data cleaning tool" LABEL website="http://vizierdb.info" LABEL sourcecode="https://github.com/VizierDB" diff --git a/build-images-async.sh b/build-images-async.sh new file mode 100755 index 0000000..26860aa --- /dev/null +++ b/build-images-async.sh @@ -0,0 +1,28 @@ + +#build the images +GIT_USER=$1 +GIT_PASS=$2 + +#mimir-async +cd ./mimir +sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=mimir-vizier-api + +#api-async +cd ../api-async +sudo docker build -t docker.mimirdb.info/vizier-api-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg API_BRANCH=mimir-vizier-api + +#ui-async +cd ../ui-nginx +sudo docker build -t docker.mimirdb.info/vizier-ui-async ./ --build-arg UI_BRANCH=async-backend + +#analytics +cd ../analytics-nginx +#sudo docker build -t docker.mimirdb.info/vizier-analytics ./ + +#spark-master and spark-worker +cd ../spark-docker +#sudo docker build -t docker.mimirdb.info/spark-hadoop ./ + +#proxy +cd ../vizier-nginx-proxy +sudo docker build -t docker.mimirdb.info/vizier-proxy ./ diff --git a/build-images.sh b/build-images.sh index 1890545..c7d41d8 100755 --- a/build-images.sh +++ b/build-images.sh @@ -1,30 +1,19 @@ + #build the images GIT_USER=$1 GIT_PASS=$2 #mimir cd ./mimir -#sudo docker build -t docker.mimirdb.info/vizier-mimir-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS - -#mimir-async -cd ./mimir -sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=mimir-vizier-api +sudo docker build -t docker.mimirdb.info/vizier-mimir-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS #api cd ../api -#sudo docker build -t docker.mimirdb.info/vizier-api-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg API_BRANCH=master - -#api-async -cd ../api-async -sudo docker build -t docker.mimirdb.info/vizier-api-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg API_BRANCH=mimir-vizier-api +sudo docker build -t docker.mimirdb.info/vizier-api-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg API_BRANCH=master #ui cd ../ui-nginx -#sudo docker build -t docker.mimirdb.info/vizier-ui ./ --build-arg UI_BRANCH=master - -#ui-async -cd ../ui-nginx -sudo docker build -t docker.mimirdb.info/vizier-ui-async ./ --build-arg UI_BRANCH=async-backend +sudo docker build -t docker.mimirdb.info/vizier-ui ./ --build-arg UI_BRANCH=master #analytics cd ../analytics-nginx diff --git a/mimir/Dockerfile b/mimir/Dockerfile index ecbe1b1..ae0758b 100644 --- a/mimir/Dockerfile +++ b/mimir/Dockerfile @@ -4,7 +4,7 @@ FROM docker.mimirdb.info/alpine_oraclejdk8_nginx LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8_nginx" LABEL version="0.3" LABEL software="Vizier" -LABEL software.version="0.2.20190418" +LABEL software.version="0.2.20190425" LABEL description="an open source, provenance aware, iterative data cleaning tool" LABEL website="http://vizierdb.info" LABEL sourcecode="https://github.com/VizierDB" diff --git a/run-containers-async.sh b/run-containers-async.sh new file mode 100755 index 0000000..e9365d5 --- /dev/null +++ b/run-containers-async.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +#run the containers +#spark-master +MASTER_HOSTNAME="namenode" +MASTER_CONTAINER=`sudo docker run -d -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/master.sh` +echo "master container id: $MASTER_CONTAINER" +#wait for master to be ready +sleep 5 + +#spark-workers +START_PORT=7001 +END_PORT=7006 +WORKER_PORT=8882 +DATANODE_PORT=50010 +#for additional spark workers increment the count below +SPARK_WORKERS_COUNT=2 +i="0" +while [ $i -lt $SPARK_WORKERS_COUNT ] +do + WORKER_WEBUI_PORT=$[$WORKER_WEBUI_PORT+$i] + DATANODE_HOSTNAME="datanode$i" + sudo docker run -d -v spark-data:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh + i=$[$i+1] +done + +VIZIER_DOMAIN="vizier.dev" + +S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ" +S3_AWS_SECRET_ACCESS_KEY="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" +S3_BUCKET_NAME="vizier-data-test" +VIZIER_DATA_VOLUME="vizier-data" + +#mimir-async +#to use an s3 bucket as the data directory for mimir instead of a volume use this: +#sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark +#to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark + +#api-async +#to use an s3 bucket as the data directory for the api instead of a volume use this: +#sudo docker run -d -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" --privileged --device /dev/fuse docker.mimirdb.info/vizier-api-spark +#to use a local volume for the data directory instead of an s3 bucket use the following for api instead of the above: +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e MIMIR_URL="http://vizier-mimir:8089/api/v2/" -e VIZIERSERVER_APP_PATH="/vizier-db/api/v1" -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_LOCAL=80 -e VIZIERSERVER_SERVER_PORT=443 -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-async-spark + +#ui-async +sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/vizier-db/api/v1" -e API_PORT=443 -e API_SCHEME=https --expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui-async + +#analytics +#this is not required - it just tracks dom clicks and such like google analytics for the web ui +#sudo docker run -d --expose 80 -p 9004:9001 -h vizier-analytics --network spark-net docker.mimirdb.info/vizier-analytics + +#proxy +sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_k8s.conf" -e VIZIER_API_APP_PATH="/vizier-db/api/v1/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy diff --git a/run-containers.sh b/run-containers.sh index 468bce8..7d0dfef 100755 --- a/run-containers.sh +++ b/run-containers.sh @@ -35,39 +35,20 @@ VIZIER_DATA_VOLUME="vizier-data" #to use an s3 bucket as the data directory for mimir instead of a volume use this: #sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark #to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: -#sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark - -#mimir-async -#to use an s3 bucket as the data directory for mimir instead of a volume use this: -#sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark -#to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: -sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark - +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/.vizierdb -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark #api #to use an s3 bucket as the data directory for the api instead of a volume use this: #sudo docker run -d -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" --privileged --device /dev/fuse docker.mimirdb.info/vizier-api-spark #to use a local volume for the data directory instead of an s3 bucket use the following for api instead of the above: -#sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e MIMIR_URL="http://vizier-mimir:8089/api/v2/" -e VIZIERSERVER_APP_PATH="/api" -e API_SERVER=demo.$VIZIER_DOMAIN -e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark - -#api-async -#to use an s3 bucket as the data directory for the api instead of a volume use this: -#sudo docker run -d -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" --privileged --device /dev/fuse docker.mimirdb.info/vizier-api-spark -#to use a local volume for the data directory instead of an s3 bucket use the following for api instead of the above: -sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e MIMIR_URL="http://vizier-mimir:8089/api/v2/" -e VIZIERSERVER_APP_PATH="/vizier-db/api/v1" -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_LOCAL=80 -e VIZIERSERVER_SERVER_PORT=443 -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-async-spark - +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e APP_PATH="/api" -e API_SERVER=demo.$VIZIER_DOMAIN -e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark #ui -#sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/api" -e API_PORT=443 -e API_SCHEME=https --expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui - - -#ui-async -sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/vizier-db/api/v1" -e API_PORT=443 -e API_SCHEME=https --expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui-async - +sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/api" -e API_PORT=443 -e API_SCHEME=https --expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui #analytics #this is not required - it just tracks dom clicks and such like google analytics for the web ui #sudo docker run -d --expose 80 -p 9004:9001 -h vizier-analytics --network spark-net docker.mimirdb.info/vizier-analytics #proxy -sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_k8s.conf" -e VIZIER_API_APP_PATH="/vizier-db/api/v1/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy +sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_k8s.conf" -e VIZIER_API_APP_PATH="/api/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy \ No newline at end of file