python cell execution sandboxing docker image fixes for spark image hadoop version mismatch
parent
10350f6c3f
commit
328a0b1447
|
@ -4,6 +4,6 @@ sudo docker build -t docker.mimirdb.info/vizier-auth ./ --build-arg UI_BRANCH=ma
|
||||||
|
|
||||||
#sudo docker build -t docker.mimirdb.info/spark-hadoop --build-arg SPARK_VERSION="spark-2.4.0-bin-without-hadoop" ./
|
#sudo docker build -t docker.mimirdb.info/spark-hadoop --build-arg SPARK_VERSION="spark-2.4.0-bin-without-hadoop" ./
|
||||||
|
|
||||||
sudo docker build -t docker.mimirdb.info/spark-hadoop --build-arg SPARK_VERSION="spark-2.4.4-bin-without-hadoop-scala-2.12" ./
|
sudo docker build -t docker.mimirdb.info/spark-hadoop-scala-2.12 --build-arg SPARK_VERSION="spark-2.4.4-bin-without-hadoop-scala-2.12" ./
|
||||||
|
|
||||||
sudo docker build -t docker.mimirdb.info/vizier-proxy --build-arg VIZIER_CONFIG="vizier_auth.conf" ./
|
sudo docker build -t docker.mimirdb.info/vizier-proxy --build-arg VIZIER_CONFIG="vizier_auth.conf" ./
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
FROM docker.mimirdb.info/alpine_openjdk8
|
||||||
|
|
||||||
|
LABEL software="vizier-python-executor"
|
||||||
|
LABEL software.version="0.2.20200202"
|
||||||
|
LABEL version="0.7"
|
||||||
|
|
||||||
|
ARG VIZIERSERVER_PYTHON_EXECUTOR_PORT=5005
|
||||||
|
ARG API_BRANCH=master
|
||||||
|
ARG CONDA_VERSION="latest"
|
||||||
|
|
||||||
|
ENV PYEXECUTOR_DEBUG=False
|
||||||
|
ENV MIMIR_URL=http://vizier-auth:8089/api/v2/
|
||||||
|
ENV VIZIERSERVER_PYTHON_EXECUTOR_PORT=$VIZIERSERVER_PYTHON_EXECUTOR_PORT
|
||||||
|
ENV WSGI_LOG_LEVEL=debug
|
||||||
|
|
||||||
|
RUN apk add --update --no-cache curl bash git supervisor ca-certificates
|
||||||
|
|
||||||
|
#RUN curl -OsL "https://repo.continuum.io/archive/Anaconda3-$CONDA_VERSION-Linux-x86_64.sh" \
|
||||||
|
# && /bin/bash Anaconda3-$CONDA_VERSION-Linux-x86_64.sh -b -p /opt/conda \
|
||||||
|
# && rm Anaconda3-$CONDA_VERSION-Linux-x86_64.sh \
|
||||||
|
# && echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh
|
||||||
|
|
||||||
|
RUN curl -OsL "https://repo.anaconda.com/miniconda/Miniconda3-$CONDA_VERSION-Linux-x86_64.sh" \
|
||||||
|
&& /bin/bash Miniconda3-$CONDA_VERSION-Linux-x86_64.sh -b -p /opt/conda \
|
||||||
|
&& rm Miniconda3-$CONDA_VERSION-Linux-x86_64.sh \
|
||||||
|
&& echo 'export PATH=/opt/conda/bin:$PATH' >> /etc/profile.d/conda.sh
|
||||||
|
|
||||||
|
#setup web-api
|
||||||
|
RUN mkdir -p /usr/local/source/ \
|
||||||
|
&& cd /usr/local/source/ \
|
||||||
|
&& /opt/conda/bin/conda create --name vizierasync python=3.8 pip \
|
||||||
|
&& source /opt/conda/bin/activate vizierasync \
|
||||||
|
&& git clone https://github.com/VizierDB/web-api-async.git web-api\
|
||||||
|
&& cd /usr/local/source/web-api \
|
||||||
|
&& git checkout -b local_$API_BRANCH origin/$API_BRANCH \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& pip install gunicorn \
|
||||||
|
&& pip install futures \
|
||||||
|
&& pip install matplotlib \
|
||||||
|
&& pip install bokeh \
|
||||||
|
&& pip install geopandas \
|
||||||
|
&& pip install pandas \
|
||||||
|
&& pip install numpy \
|
||||||
|
&& pip install shapely \
|
||||||
|
&& pip install -e .
|
||||||
|
|
||||||
|
COPY main.py /usr/local/source/web-api/vizier/main.py
|
||||||
|
COPY run_executor.sh /usr/local/source/run_executor.sh
|
||||||
|
COPY supervisord.conf /etc/supervisord.conf
|
||||||
|
RUN chmod +x /usr/local/source/run_executor.sh
|
||||||
|
WORKDIR /usr/local/source
|
||||||
|
|
||||||
|
EXPOSE $VIZIERSERVER_PYTHON_EXECUTOR_PORT 9001
|
||||||
|
ENTRYPOINT /usr/bin/supervisord
|
|
@ -0,0 +1,90 @@
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from flask import Flask
|
||||||
|
from flask import request
|
||||||
|
from vizier.engine.packages.stream import OutputStream
|
||||||
|
from vizier.engine.packages.pycell.client.base import VizierDBClient
|
||||||
|
from vizier.engine.packages.pycell.plugins import python_cell_preload
|
||||||
|
from vizier.datastore.mimir.store import MimirDatastore
|
||||||
|
from vizier.datastore.fs.base import FileSystemDatastore
|
||||||
|
from vizier.engine.packages.pycell.processor import VARS_DBCLIENT
|
||||||
|
from multiprocessing import Process, Pipe
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
application = app
|
||||||
|
VIZIERSERVER_PYTHON_EXECUTOR_PORT = os.environ.get('VIZIERSERVER_PYTHON_EXECUTOR_PORT', 5005)
|
||||||
|
|
||||||
|
def set2list(obj):
|
||||||
|
if isinstance(obj, set):
|
||||||
|
return list(obj)
|
||||||
|
else:
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def execute_python(conn, obj):
|
||||||
|
out = sys.stdout
|
||||||
|
err = sys.stderr
|
||||||
|
stream = list()
|
||||||
|
dsklass = globals()[obj['datastore']]
|
||||||
|
datastore = dsklass(obj['basepath'])
|
||||||
|
client = VizierDBClient(
|
||||||
|
datastore=datastore,
|
||||||
|
datasets=obj['datasets'],
|
||||||
|
source=obj['source'],
|
||||||
|
dataobjects=obj['dataobjects']
|
||||||
|
)
|
||||||
|
variables = {VARS_DBCLIENT: client}
|
||||||
|
sys.stdout = OutputStream(tag='out', stream=stream)
|
||||||
|
sys.stderr = OutputStream(tag='err', stream=stream)
|
||||||
|
# Keep track of exception that is thrown by the code
|
||||||
|
exception = None
|
||||||
|
python_cell_preload(variables)
|
||||||
|
# Run the Python code
|
||||||
|
try:
|
||||||
|
exec(obj['source'], variables)
|
||||||
|
except Exception as ex:
|
||||||
|
exception = ex
|
||||||
|
finally:
|
||||||
|
# Make sure to reverse redirection of output streams
|
||||||
|
sys.stdout = out
|
||||||
|
sys.stderr = err
|
||||||
|
# Set module outputs
|
||||||
|
print(str(exception))
|
||||||
|
stdout = []
|
||||||
|
stderr = []
|
||||||
|
is_success = (exception is None)
|
||||||
|
for tag, text in stream:
|
||||||
|
text = ''.join(text).strip()
|
||||||
|
if tag == 'out':
|
||||||
|
stdout.append(text)
|
||||||
|
else:
|
||||||
|
stderr.append(text)
|
||||||
|
is_success = False
|
||||||
|
if not is_success:
|
||||||
|
stderr.append(str(exception))
|
||||||
|
conn.send({'success':is_success,
|
||||||
|
'stdout':stdout,
|
||||||
|
'stderr':stderr,
|
||||||
|
'provenance':
|
||||||
|
{'read':set2list(client.read),
|
||||||
|
'write':set2list(client.write),
|
||||||
|
'delete':set2list(client.delete)},
|
||||||
|
'datasets':client.datasets,
|
||||||
|
'dataobjects':client.dataobjects})
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
@app.route("/", methods=['POST'])
|
||||||
|
def home():
|
||||||
|
if not request.json:
|
||||||
|
raise ValueError("not json")
|
||||||
|
obj = request.json
|
||||||
|
print(str(obj))
|
||||||
|
parent_conn, child_conn = Pipe()
|
||||||
|
p = Process(target=execute_python, args=(child_conn,obj))
|
||||||
|
p.start()
|
||||||
|
return_val = parent_conn.recv()
|
||||||
|
p.join()
|
||||||
|
print(return_val)
|
||||||
|
return return_val
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(debug=True, port=VIZIERSERVER_PYTHON_EXECUTOR_PORT)
|
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/bash
|
||||||
|
echo 'activating virtualenv...'
|
||||||
|
cd /usr/local/source/web-api/
|
||||||
|
source /opt/conda/bin/activate vizierasync
|
||||||
|
cd vizier
|
||||||
|
echo 'running wsgi server...'
|
||||||
|
if [ $PYEXECUTOR_DEBUG == "True" ]
|
||||||
|
then
|
||||||
|
python3 main.py
|
||||||
|
else
|
||||||
|
gunicorn -w 1 --access-logfile - --error-logfile - --log-level $WSGI_LOG_LEVEL --threads 8 --bind 0.0.0.0:$VIZIERSERVER_PYTHON_EXECUTOR_PORT main
|
||||||
|
fi
|
|
@ -0,0 +1,22 @@
|
||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
|
||||||
|
[program:pyexec]
|
||||||
|
command=/usr/local/source/run_executor.sh
|
||||||
|
stdout_events_enabled=true
|
||||||
|
stderr_events_enabled=true
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stdout_logfile_maxbytes=0
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stderr_logfile_maxbytes=0
|
||||||
|
|
||||||
|
|
||||||
|
[inet_http_server]
|
||||||
|
port=0.0.0.0:9001
|
||||||
|
username=root
|
||||||
|
password=odinlab
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=http://127.0.0.1:9001
|
||||||
|
username=root
|
||||||
|
password=odinlab
|
|
@ -43,6 +43,10 @@ sudo docker rm vizier-ui
|
||||||
sudo docker stop vizier-proxy
|
sudo docker stop vizier-proxy
|
||||||
sudo docker rm vizier-proxy
|
sudo docker rm vizier-proxy
|
||||||
|
|
||||||
#proxy
|
#auth
|
||||||
sudo docker stop vizier-auth
|
sudo docker stop vizier-auth
|
||||||
sudo docker rm vizier-auth
|
sudo docker rm vizier-auth
|
||||||
|
|
||||||
|
#auth
|
||||||
|
sudo docker stop python-executor
|
||||||
|
sudo docker rm python-executor
|
||||||
|
|
|
@ -34,9 +34,11 @@ S3_AWS_SECRET_ACCESS_KEY="dL79qJGyLkUFyYvmmg3hEn8bIklSaTkrfG0IXuki"
|
||||||
S3_BUCKET_NAME="vizier-data-test"
|
S3_BUCKET_NAME="vizier-data-test"
|
||||||
VIZIER_DATA_VOLUME="vizier-data"
|
VIZIER_DATA_VOLUME="vizier-data"
|
||||||
|
|
||||||
|
#python-executor for sandboxing python cell code execution
|
||||||
|
sudo docker run -d -h python-executor --name python-executor --network spark-net -p 5005:5005 -p 9003:9001 -v $VIZIER_DATA_VOLUME:/usr/local/source/vizier-api-auth/vizier-data -e MIMIR_URL=http://vizier-auth:8089/api/v2/ docker.mimirdb.info/python-executor
|
||||||
|
|
||||||
#vizier-auth
|
#vizier-auth
|
||||||
sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/vizier-api-auth/vizier-data -p 5000:5000 -p 9002:9001 --expose 9000 --expose 4041 --expose 8089 --network spark-net -h vizier-auth --name vizier-auth -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir" -e REMOTE_SPARK=true -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-auth" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e API_SERVER="demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_PORT=443 -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e VIZIERAUTH_OAUTH_ID=e070fef69a20f246bcbc16ebc49c584dadde4753d88de0dac42eeea1cf2a2e48 -e VIZIERAUTH_OAUTH_SECRET=d5ed682921d6cb73d9a7b190173662403483bb2197f3960c9b0de325af624072 docker.mimirdb.info/vizier-auth
|
sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/vizier-api-auth/vizier-data -p 5000:5000 -p 9002:9001 --expose 9000 --expose 4041 --expose 8089 --network spark-net -h vizier-auth --name vizier-auth -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir" -e REMOTE_SPARK=true -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-auth" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e S3_AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e S3_AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e API_SERVER="demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_PORT=443 -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e AUTHSERVER_AUTH_CLIENTS="GenericOAuth20Client" -e VIZIERAUTH_OAUTH_ID=62dbc5e3ce67547a8ed874e5907f1798956f9a4403af6d20b33be2a8e460219a -e VIZIERAUTH_OAUTH_SECRET=b1005dfa188919c0ce56406fef1203c70daae50759973c5c59d826dc41c069b0 docker.mimirdb.info/vizier-auth
|
||||||
|
|
||||||
#proxy
|
#proxy
|
||||||
sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_auth.conf" -e VIZIER_API_APP_PATH="/vizier-db/api/v1/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy
|
sudo docker run -d -p 80:80 -p 443:443 -p 9001:9001 -h vizier-proxy --name vizier-proxy --network spark-net -e VIZIER_CONFIG="vizier_auth.conf" -e VIZIER_API_APP_PATH="/vizier-db/api/v1/" -e VIZIER_DOMAIN="$VIZIER_DOMAIN" docker.mimirdb.info/vizier-proxy
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
SPARK_VERSION="spark-2.4.4-bin-without-hadoop-scala-2.12"
|
||||||
|
SPARK_CONTAINER="spark-hadoop-scala-2.12"
|
||||||
|
MASTER_HOSTNAME="namenode"
|
||||||
|
MASTER_CONTAINER=`sudo docker run --restart always -d -v data-auth:/tmp/data --name $MASTER_HOSTNAME -h $MASTER_HOSTNAME --network spark-net -p 222:22 -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8020:8020 -p 8080:8080 -p 50070:50070 --expose 7001 --expose 7002 --expose 7003 --expose 7004 --expose 7005 --expose 7006 --expose 7077 --expose 6066 --expose 4040 --expose 8020 --expose 50070 -e "MASTER=spark://namenode:7077" -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/master.sh`
|
||||||
|
echo "master container id: $MASTER_CONTAINER"
|
||||||
|
|
||||||
|
START_PORT=7001
|
||||||
|
END_PORT=7006
|
||||||
|
WORKER_PORT=8882
|
||||||
|
WORKER_WEBUI_PORT=8082
|
||||||
|
HOSTNAME="datanode"
|
||||||
|
DATANODE_PORT=50010
|
||||||
|
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
|
||||||
|
|
||||||
|
WORKER_WEBUI_PORT=8083
|
||||||
|
HOSTNAME="datanode2"
|
||||||
|
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
|
||||||
|
|
||||||
|
WORKER_WEBUI_PORT=8084
|
||||||
|
HOSTNAME="datanode3"
|
||||||
|
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
|
||||||
|
|
||||||
|
WORKER_WEBUI_PORT=8085
|
||||||
|
HOSTNAME="datanode4"
|
||||||
|
sudo docker run --restart always -d -v data-auth:/tmp/data -h $HOSTNAME --name $HOSTNAME --network spark-net --link $MASTER_CONTAINER -p $WORKER_WEBUI_PORT:8082 --expose $WORKER_PORT --expose $DATANODE_PORT -e "SPARK_CONF_DIR=/conf" -e "SPARK_PUBLIC_DNS=127.0.0.1" -e "SPARK_WORKER_CORES=4" -e "SPARK_WORKER_PORT=$WORKER_PORT" -e "SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" -e "LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" -e "HDFS_DATA_HOST=$HOSTNAME" -e "HDFS_HOST=namenode" -e "HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" -e "SPARK_EXECUTOR_MEMORY=8g" -e "SPARK_DAEMON_MEMORY=8g" -e "SPARK_DRIVER_MEMORY=8g" -e "SPARK_WORKER_MEMORY=8g" -e "HDFS_CONF_dfs_client_use_datanode_hostname=true" -e "AWS_ECS=false" docker.mimirdb.info/$SPARK_CONTAINER /usr/local/$SPARK_VERSION/worker.sh
|
||||||
|
|
||||||
|
VIZIER_DOMAIN="vizierdb.info"
|
||||||
|
|
||||||
|
S3_AWS_ACCESS_KEY_ID="AKIAJ7MLFSPYLYG47ARQ"
|
||||||
|
S3_AWS_SECRET_ACCESS_KEY="dL79qJGyLkUFyYvmmg3hEn8bIklSaTkrfG0IXuki"
|
||||||
|
S3_BUCKET_NAME="vizier-data-ub"
|
||||||
|
|
||||||
|
#python-executor for sandboxing python cell code execution
|
||||||
|
sudo docker run --restart always -d -h python-executor --name python-executor --network spark-net -p 5005:5005 -p 9003:9001 --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-auth,target=/usr/local/source/vizier-api-auth/vizier-data -e MIMIR_URL=http://vizier-auth:8089/api/v2/ docker.mimirdb.info/python-executor
|
||||||
|
|
||||||
|
#vizier-auth
|
||||||
|
sudo docker run --restart always -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-auth,target=/usr/local/source/vizier-api-auth/vizier-data -p 5000:5000 -p 9002:9001 --expose 9000 --expose 4041 --expose 8089 --network spark-net -h vizier-auth --name vizier-auth -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/vizier-api-auth/vizier-data" -e REMOTE_SPARK=true -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-auth" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e API_SERVER="demo.$VIZIER_DOMAIN" -e VIZIERSERVER_SERVER_PORT=443 -e VIZIERSERVER_BASE_URL="https://demo.$VIZIER_DOMAIN" -e AUTHSERVER_AUTH_CLIENTS="GenericOAuth20Client" -e VIZIERAUTH_OAUTH_ID=e070fef69a20f246bcbc16ebc49c584dadde4753d88de0dac42eeea1cf2a2e48 -e VIZIERAUTH_OAUTH_SECRET=d5ed682921d6cb73d9a7b190173662403483bb2197f3960c9b0de325af624072 docker.mimirdb.info/vizier-auth
|
||||||
|
|
||||||
|
|
||||||
|
#runBackup --restore --sparkHost namenode --dataStagingType s3 --overwriteJars -X LOG LOGM remoteSpark
|
|
@ -3,9 +3,9 @@ FROM docker.mimirdb.info/alpine_openjdk8
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
LABEL base.image="docker.mimirdb.info/alpine_openjdk8"
|
LABEL base.image="docker.mimirdb.info/alpine_openjdk8"
|
||||||
LABEL version="0.1"
|
LABEL version="0.4"
|
||||||
LABEL software="Spark"
|
LABEL software="Spark"
|
||||||
LABEL software.version="0.1.201801"
|
LABEL software.version="0.1.202004"
|
||||||
LABEL description="Spark image"
|
LABEL description="Spark image"
|
||||||
|
|
||||||
RUN apk add --update curl bash sed perl grep openssh
|
RUN apk add --update curl bash sed perl grep openssh
|
||||||
|
@ -67,8 +67,6 @@ ENV SPARK_VERSION=$SPARK_VERSION
|
||||||
#or copy it
|
#or copy it
|
||||||
COPY $SPARK_VERSION.tgz /
|
COPY $SPARK_VERSION.tgz /
|
||||||
RUN gunzip -c /$SPARK_VERSION.tgz | tar -x -C /usr/local/ && rm /$SPARK_VERSION.tgz
|
RUN gunzip -c /$SPARK_VERSION.tgz | tar -x -C /usr/local/ && rm /$SPARK_VERSION.tgz
|
||||||
COPY hadoop-aws-2.8.2.jar aws-java-sdk-1.11.234.jar aws-java-sdk-core-1.11.234.jar aws-java-sdk-kms-1.11.234.jar \
|
|
||||||
aws-java-sdk-s3-1.11.234.jar hadoop-aws-2.8.2.jar httpclient-4.5.3.jar joda-time-2.9.9.jar /usr/local/$SPARK_VERSION/jars/
|
|
||||||
|
|
||||||
ENV SPARK_HOME /usr/local/$SPARK_VERSION
|
ENV SPARK_HOME /usr/local/$SPARK_VERSION
|
||||||
ENV PATH $PATH:$SPARK_HOME/bin
|
ENV PATH $PATH:$SPARK_HOME/bin
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@ FROM docker.mimirdb.info/alpine_openjdk8
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
LABEL base.image="docker.mimirdb.info/alpine_openjdk8"
|
LABEL base.image="docker.mimirdb.info/alpine_openjdk8"
|
||||||
LABEL version="0.4"
|
LABEL version="0.5.8"
|
||||||
LABEL software="Vizier Auth"
|
LABEL software="Vizier Auth"
|
||||||
LABEL software.version="0.2.20200202"
|
LABEL software.version="0.2.20200202"
|
||||||
LABEL description="an open source, provenance aware, iterative data cleaning tool"
|
LABEL description="an open source, provenance aware, iterative data cleaning tool"
|
||||||
|
@ -54,6 +54,12 @@ ENV REMOTE_SPARK=false
|
||||||
#gram
|
#gram
|
||||||
ENV VIZIERAUTH_OAUTH_ID=e554e37483640ccc73324b5620376601843aadfa37d972f094ea13d02df90a0f
|
ENV VIZIERAUTH_OAUTH_ID=e554e37483640ccc73324b5620376601843aadfa37d972f094ea13d02df90a0f
|
||||||
ENV VIZIERAUTH_OAUTH_SECRET=f385531e40fb5268397d222c6c26a611cdd906510d3ee4ed8ad013d33b2c4102
|
ENV VIZIERAUTH_OAUTH_SECRET=f385531e40fb5268397d222c6c26a611cdd906510d3ee4ed8ad013d33b2c4102
|
||||||
|
#shibboleth and gitlab
|
||||||
|
ENV AUTHSERVER_AUTH_CLIENTS="SAML2Client,GenericOAuth20Client"
|
||||||
|
|
||||||
|
ENV SANDBOX_PYTHON_EXECUTION=True
|
||||||
|
ENV SANDBOX_PYTHON_URL=http://python-executor:5005/
|
||||||
|
ENV GITLAB_OAUTH_HOST="gitlab.odin.cse.buffalo.edu"
|
||||||
|
|
||||||
#have vizier-auth scala code run web-api process (true)
|
#have vizier-auth scala code run web-api process (true)
|
||||||
ENV RUN_WEB_API=false
|
ENV RUN_WEB_API=false
|
||||||
|
@ -198,7 +204,7 @@ RUN cd /usr/local/source/ \
|
||||||
&& pip install -e . \
|
&& pip install -e . \
|
||||||
&& mkdir -p /usr/local/source/web-api/.vizierdb
|
&& mkdir -p /usr/local/source/web-api/.vizierdb
|
||||||
|
|
||||||
|
LABEL pullui="1"
|
||||||
#setup production web-ui branch
|
#setup production web-ui branch
|
||||||
RUN mkdir -p /usr/local/source/ \
|
RUN mkdir -p /usr/local/source/ \
|
||||||
&& cd /usr/local/source/ \
|
&& cd /usr/local/source/ \
|
||||||
|
|
Loading…
Reference in New Issue