docker/docker-compose.yml

435 lines
15 KiB
YAML

version: "3"
services:
namenode:
command:
- /usr/local/spark-2.4.0-bin-without-hadoop/master.sh
container_name: namenode
environment:
- AWS_ECS=false
- SPARK_PUBLIC_DNS=127.0.0.1
- SPARK_DAEMON_MEMORY=8g
- SPARK_DRIVER_MEMORY=8g
- HDFS_CONF_dfs_client_use_datanode_hostname=true
- SPARK_WORKER_MEMORY=8g
- MASTER=spark://namenode:7077
- SPARK_CONF_DIR=/conf
- LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/
- SPARK_EXECUTOR_MEMORY=8g
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- HADOOP_PREFIX=/usr/local/hadoop
- HADOOP_COMMON_HOME=/usr/local/hadoop
- HADOOP_HDFS_HOME=/usr/local/hadoop
- HADOOP_MAPRED_HOME=/usr/local/hadoop
- HADOOP_YARN_HOME=/usr/local/hadoop
- HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
- YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop
- HADOOP_HOME=/usr/local/hadoop
- CLUSTER_NAME=test
- MASTER_IP=0
- HDFS_HOST=namenode
- HDFS_DATA_HOST=datanode
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_hadoop_http_staticuser_user=root
- CORE_CONF_hadoop_proxyuser_hue_hosts=*
- CORE_CONF_hadoop_proxyuser_hue_groups=*
- HDFS_CONF_dfs_webhdfs_enabled=true
- HDFS_CONF_dfs_permissions_enabled=false
- HDFS_CONF_dfs_datanode_use_datanode_hostname=true
- HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
- HDFS_CONF_dfs_datanode_address=0.0.0.0:50010
- HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
- HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
- SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop
hostname: namenode
image: docker.mimirdb.info/spark-hadoop
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'Spark image'
software: Spark
software.version: 0.1.201801
version: 0.1
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 3a27cab35ce9
ports:
- 222:22/tcp
- 4040:4040/tcp
- 50070:50070/tcp
- 6066:6066/tcp
- 7077:7077/tcp
- 8020:8020/tcp
- 8080:8080/tcp
volumes:
- spark-data:/tmp
working_dir: /usr/local/spark-2.4.0-bin-without-hadoop
datanode0:
command:
- /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
container_name: datanode0
environment:
- SPARK_PUBLIC_DNS=127.0.0.1
- SPARK_DAEMON_MEMORY=8g
- AWS_ECS=false
- SPARK_CONF_DIR=/conf
- HDFS_HOST=namenode
- LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/
- HDFS_DATA_HOST=datanode0
- HDFS_CONF_dfs_datanode_address=0.0.0.0:50010
- SPARK_DRIVER_MEMORY=8g
- SPARK_WORKER_MEMORY=8g
- HDFS_CONF_dfs_client_use_datanode_hostname=true
- SPARK_WORKER_CORES=4
- SPARK_WORKER_PORT=8882
- SPARK_WORKER_WEBUI_PORT=0
- SPARK_EXECUTOR_MEMORY=8g
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- HADOOP_PREFIX=/usr/local/hadoop
- HADOOP_COMMON_HOME=/usr/local/hadoop
- HADOOP_HDFS_HOME=/usr/local/hadoop
- HADOOP_MAPRED_HOME=/usr/local/hadoop
- HADOOP_YARN_HOME=/usr/local/hadoop
- HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
- YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop
- HADOOP_HOME=/usr/local/hadoop
- CLUSTER_NAME=test
- MASTER_IP=0
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_hadoop_http_staticuser_user=root
- CORE_CONF_hadoop_proxyuser_hue_hosts=*
- CORE_CONF_hadoop_proxyuser_hue_groups=*
- HDFS_CONF_dfs_webhdfs_enabled=true
- HDFS_CONF_dfs_permissions_enabled=false
- HDFS_CONF_dfs_datanode_use_datanode_hostname=true
- HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
- HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
- HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
- SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop
- INSTANCE_TYPE=worker
hostname: datanode0
image: docker.mimirdb.info/spark-hadoop
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'Spark image'
software: Spark
software.version: 0.1.201801
version: 0.1
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 914ed79df28c
ports:
- 0:8082/tcp
volumes:
- spark-data:/tmp
working_dir: /usr/local/spark-2.4.0-bin-without-hadoop
depends_on:
- namenode
datanode1:
command:
- /usr/local/spark-2.4.0-bin-without-hadoop/worker.sh
container_name: datanode1
environment:
- LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/
- HDFS_HOST=namenode
- AWS_ECS=false
- SPARK_WORKER_CORES=4
- SPARK_WORKER_WEBUI_PORT=1
- SPARK_EXECUTOR_MEMORY=8g
- HDFS_DATA_HOST=datanode1
- HDFS_CONF_dfs_datanode_address=0.0.0.0:50010
- SPARK_DAEMON_MEMORY=8g
- SPARK_DRIVER_MEMORY=8g
- SPARK_WORKER_MEMORY=8g
- SPARK_CONF_DIR=/conf
- SPARK_PUBLIC_DNS=127.0.0.1
- SPARK_WORKER_PORT=8882
- HDFS_CONF_dfs_client_use_datanode_hostname=true
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark-2.4.0-bin-without-hadoop/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- HADOOP_PREFIX=/usr/local/hadoop
- HADOOP_COMMON_HOME=/usr/local/hadoop
- HADOOP_HDFS_HOME=/usr/local/hadoop
- HADOOP_MAPRED_HOME=/usr/local/hadoop
- HADOOP_YARN_HOME=/usr/local/hadoop
- HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
- YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop
- HADOOP_HOME=/usr/local/hadoop
- CLUSTER_NAME=test
- MASTER_IP=0
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_hadoop_http_staticuser_user=root
- CORE_CONF_hadoop_proxyuser_hue_hosts=*
- CORE_CONF_hadoop_proxyuser_hue_groups=*
- HDFS_CONF_dfs_webhdfs_enabled=true
- HDFS_CONF_dfs_permissions_enabled=false
- HDFS_CONF_dfs_datanode_use_datanode_hostname=true
- HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
- HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
- HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
- SPARK_HOME=/usr/local/spark-2.4.0-bin-without-hadoop
- INSTANCE_TYPE=worker
hostname: datanode1
image: docker.mimirdb.info/spark-hadoop
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'Spark image'
software: Spark
software.version: 0.1.201801
version: 0.1
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- bde4b468192c
ports:
- 1:8082/tcp
volumes:
- spark-data:/tmp
working_dir: /usr/local/spark-2.4.0-bin-without-hadoop
depends_on:
- namenode
vizier-mimir:
container_name: vizier-mimir
entrypoint:
- /bin/bash
- -c
- /usr/local/source/entrypoint.sh
environment:
- USE_S3_VOLUME=false
- RESTORE_BACKUP=false
- PULL_MIMIR=false
- S3_BUCKET_NAME=vizier-data-test
- MIMIR_HOST=vizier-mimir
- SPARK_HOST=namenode
- AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ
- AWS_SECRET_ACCESS_KEY=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
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- NGINX_VERSION=1.15.1
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=191
- JAVA_BUILD=12
- JAVA_PATH=2787e4a523244c269598db4e85c51e0c
- JAVA_HOME=/usr/lib/jvm/default-jvm
- RUN_SSH=true
- NEW_MIMIR_DB_FROM_S3=true
- NEW_HIVE_METASTORE_FROM_S3=true
- HDFS_CONF_dfs_client_use_datanode_hostname=false
- DATA_STAGING_TYPE=s3
- IAM_ROLE=none
- S3_ENDPOINT=
- S3A_ENDPOINT=https://s3.vizier.app/
- S3_BUCKET_ACL=private
- MOUNT_POINT=/usr/local/source/web-api/.vizierdb
- MIMIR_DATA_DIR=/usr/local/source/web-api/.vizierdb/mimir
- PULL_CODE=2
hostname: vizier-mimir
image: docker.mimirdb.info/vizier-mimir-async-spark
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8_nginx
description: 'an open source, provenance aware, iterative data cleaning tool'
documentation: https://github.com/VizierDB/web-api/wiki
software: Vizier
software.version: 0.2.20190610
sourcecode: https://github.com/VizierDB
tags: 'CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning'
version: 0.3
website: http://vizierdb.info
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 137903388a11
ports:
- 9002:9001/tcp
volumes:
- vizier-data:/usr/local/source/web-api/vizier/.vizierdb
depends_on:
- namenode
- datanode1
vizier-api:
container_name: vizier-api
entrypoint:
- /bin/bash
- -c
- /usr/local/source/entrypoint.sh
environment:
- VIZIERSERVER_SERVER_LOCAL=80
- VIZIERSERVER_SERVER_PORT=443
- MIMIR_HOST=vizier-mimir
- VIZIERSERVER_BASE_URL=https://demo.vizier.devel
- VIZIERSERVER_APP_PATH=/vizier-db/api/v1
- AWS_ACCESS_KEY_ID=AKIAJ7MLFSPYLYG47ARQ
- AWS_SECRET_ACCESS_KEY=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
- S3_BUCKET_NAME=vizier-data-test
- USE_S3_VOLUME=false
- MIMIR_URL=http://vizier-mimir:8089/api/v2/
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- LANG=C.UTF-8
- JAVA_VERSION=8
- JAVA_UPDATE=161
- JAVA_BUILD=12
- JAVA_PATH=2f38c3b165be4555a1fa6e98c45e0808
- JAVA_HOME=/usr/lib/jvm/default-jvm
- VIZIERSERVER_NAME=vizier
- VIZIERSERVER_LOG_DIR=/usr/local/source/web-api/vizier/.vizierdb/logs
- VIZIERSERVER_DEBUG=True
- VIZIERSERVER_ROW_LIMIT=25
- VIZIERSERVER_MAX_ROW_LIMIT=-1
- VIZIERSERVER_MAX_UPLOAD_SIZE=16777216
- VIZIERSERVER_ENGINE=MIMIR
- VIZIERSERVER_PACKAGE_PATH=/usr/local/source/web-api/resources/packages/common:/usr/local/source/web-api/resources/packages/mimir
- VIZIERSERVER_PROCESSOR_PATH=/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir
- VIZIERENGINE_BACKEND=MULTIPROCESS
- VIZIERENGINE_SYNCHRONOUS=
- VIZIERENGINE_USE_SHORT_IDENTIFIER=True
- VIZIERENGINE_DATA_DIR=/usr/local/source/web-api/vizier/.vizierdb
- VIZIERENGINE_CELERY_ROUTES=
- CELERY_BROKER_URL=amqp://guest@localhost//
- VIZIERWORKER_ENV=MIMIR
- VIZIERWORKER_PROCESSOR_PATH=/usr/local/source/web-api/resources/processors/common:/usr/local/source/web-api/resources/processors/mimir
- VIZIERWORKER_LOG_DIR=/usr/local/source/web-api/vizier/.vizierdb/logs/worker
- VIZIERWORKER_CONTROLLER_URL=http://localhost:5000/vizier-db/api/v1
- VIZIERENGINE_CONTAINER_PORTS=20171-20271
- VIZIERENGINE_CONTAINER_IMAGE=heikomueller/vizierapi:container
- PROFILER=0
- RUN_SSH=true
- ACME_HOSTS=api.vizier.app
- GLIBC_VERSION=2.27-r0
- IAM_ROLE=none
- S3_ENDPOINT=
- S3_BUCKET_ACL=private
- MOUNT_POINT=/usr/local/source/web-api/.vizierdb
- PULL_CODE=2
hostname: vizier-api
image: docker.mimirdb.info/vizier-api-async-spark
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8
description: 'an open source, provenance aware, iterative data cleaning tool'
documentation: https://github.com/VizierDB/web-api/wiki
software: Vizier
software.version: 0.2.20190610
sourcecode: https://github.com/VizierDB
tags: 'CSV,Data Cleaning,Databases,Provenance,Workflow,Machine Learning'
version: 0.3
website: http://vizierdb.info
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 785709de8b21
ports:
- 9003:9001/tcp
volumes:
- vizier-data:/usr/local/source/web-api/vizier/.vizierdb
vizier-ui:
container_name: vizier-ui
entrypoint:
- /bin/sh
- -c
- /usr/local/entrypoint.sh
environment:
- API_SCHEME=https
- API_SERVER=demo.vizier.devel
- APP_PATH=/vizier-db/api/v1
- API_PORT=443
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- NGINX_VERSION=1.15.1
- ANALYTICS_URL=https:\/\/analytics.vizier.app\/
- ANALYTICS_SITE_ID=12a12e629ffb388167c2c3e560bbc8e1
- API_BASIC_AUTH=false
hostname: vizier-ui
image: docker.mimirdb.info/vizier-ui-async
ipc: shareable
labels:
software.version: 0.2.20190611
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- a1920bf53960
ports:
- 9004:9001/tcp
vizier-proxy:
container_name: vizier-proxy
entrypoint:
- /bin/sh
- -c
- /usr/local/entrypoint.sh
environment:
- VIZIER_CONFIG=vizier_k8s.conf
- VIZIER_API_APP_PATH=/vizier-db/api/v1/
- VIZIER_DOMAIN=vizier.devel
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- NGINX_VERSION=1.15.1
- VIZIER_API_PROXY_PATH=
- 'ACME_HOSTS=demo.vizier.app api.vizier.app vizier.vizier.app mimir.vizier.app
spark.vizier.app hdfs.vizier.app proxy.vizier.app analytics.vizier.app'
- API_BASIC_AUTH=false
hostname: vizier-proxy
image: docker.mimirdb.info/vizier-proxy
ipc: shareable
labels:
base.image: docker.mimirdb.info/alpine_oraclejdk8_nginx
software: Vizier
software.version: 0.2.20190509
version: 0.3
logging:
driver: json-file
options: {}
networks:
spark-net:
aliases:
- 550a44e26989
ports:
- 443:443/tcp
- 80:80/tcp
- 9001:9001/tcp
depends_on:
- vizier-mimir
- vizier-api
- vizier-ui
networks:
spark-net:
external: true
volumes:
vizier-data:
spark-data: