#!/bin/sh #run the containers #spark-master MASTER_HOSTNAME="namenode" MASTER_CONTAINER=`sudo docker run -d -v hdfs-data:/hadoop/dfs/name -v spark-data:/tmp --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/master.sh` echo "master container id: $MASTER_CONTAINER" #wait for master to be ready sleep 5 #spark-workers START_PORT=7001 END_PORT=7006 WORKER_PORT=8882 WORKER_WEBUI_PORT=8082 DATANODE_PORT=50010 #for additional spark workers increment the count below SPARK_WORKERS_COUNT=2 i="0" while [ $i -lt $SPARK_WORKERS_COUNT ] do WORKER_WEBUI_PORT=$[$WORKER_WEBUI_PORT+$i] DATANODE_HOSTNAME="datanode$i" sudo docker run -d -v hdfs-data-$i:/hadoop/dfs/data -v spark-scratch-$i:/usr/local/spark-2.4.0-bin-without-hadoop/work -v spark-data-$i:/tmp -h $DATANODE_HOSTNAME --name $DATANODE_HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$DATANODE_HOSTNAME" -e "HDFS_HOST=$MASTER_HOSTNAME" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/spark-hadoop /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh i=$[$i+1] done VIZIER_DOMAIN="vizier.devel" S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ" S3_AWS_SECRET_ACCESS_KEY="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" S3_BUCKET_NAME="vizier-data-test" VIZIER_DATA_VOLUME="vizier-data" #mimir-async #to use an s3 bucket as the data directory for mimir instead of a volume use this: #sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark #to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir" -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark #api-async #to use an s3 bucket as the data directory for the api instead of a volume use this: #sudo docker run -d -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=80 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" --privileged --device /dev/fuse docker.mimirdb.info/vizier-api-spark #to use a local volume for the data directory instead of an s3 bucket use the following for api instead of the above: sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9003:9001 --expose 80 --network spark-net -h vizier-api --name vizier-api -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e MIMIR_URL="http://vizier-mimir:8089/api/v2/" -e VIZIERSERVER_APP_PATH="/vizier-db/api/v1" -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_LOCAL=80 -e VIZIERSERVER_SERVER_PORT=443 -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-async-spark #ui-async sudo docker run -d -e API_SERVER=demo.$VIZIER_DOMAIN -e APP_PATH="/vizier-db/api/v1" -e API_PORT=443 -e API_SCHEME=https --expose 80 --expose 443 -p 9004:9001 -h vizier-ui --name vizier-ui --network spark-net docker.mimirdb.info/vizier-ui-async #analytics #this is not required - it just tracks dom clicks and such like google analytics for the web ui #sudo docker run -d --expose 80 -p 9004:9001 -h vizier-analytics --network spark-net docker.mimirdb.info/vizier-analytics #proxy sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_k8s.conf" -e VIZIER_API_APP_PATH="/vizier-db/api/v1/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy