use hdfs staging for non aws containers

This commit is contained in:
Mike 2019-06-20 08:11:29 -04:00
parent 3323099ef7
commit cba51856c3
8 changed files with 78 additions and 6 deletions

View file

@ -205,6 +205,7 @@ COPY run_init.sh /usr/local/source/run_init.sh
COPY run_web_api.sh /usr/local/source/run_web_api.sh COPY run_web_api.sh /usr/local/source/run_web_api.sh
COPY entrypoint.sh /usr/local/source/entrypoint.sh COPY entrypoint.sh /usr/local/source/entrypoint.sh
COPY supervisord.conf /etc/supervisord.conf COPY supervisord.conf /etc/supervisord.conf
COPY wsgi_profiler_conf.py /usr/local/source/web-api/vizier/wsgi_profiler_conf.py
RUN chmod +x /usr/local/source/run_init.sh \ RUN chmod +x /usr/local/source/run_init.sh \
/usr/local/source/run_web_api.sh \ /usr/local/source/run_web_api.sh \

View file

@ -6,5 +6,5 @@ cd vizier
echo 'running wsgi server...' echo 'running wsgi server...'
#uwsgi --plugins-dir /usr/lib/uwsgi/ --need-plugin python --plugins-list --uid nginx --gid nginx --socket /tmp/uwsgi.sock --wsgi-file server.py --chown-socket nginx:nginx --callable app --processes 4 --threads 2 --stats 127.0.0.1:9191 --virtualenv /opt/conda/envs/vizier #uwsgi --plugins-dir /usr/lib/uwsgi/ --need-plugin python --plugins-list --uid nginx --gid nginx --socket /tmp/uwsgi.sock --wsgi-file server.py --chown-socket nginx:nginx --callable app --processes 4 --threads 2 --stats 127.0.0.1:9191 --virtualenv /opt/conda/envs/vizier
#gunicorn -w 4 --bind unix:vizier.sock -m 777 server:app #gunicorn -w 4 --bind unix:vizier.sock -m 777 server:app
#gunicorn -c ./wsgi_profiler_conf.py -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL server:app #gunicorn -c ./wsgi_profiler_conf.py -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL wsgi:app
gunicorn -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL wsgi:app gunicorn -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL wsgi:app

View file

@ -0,0 +1,69 @@
# Copyright (C) 2018 New York University
# University at Buffalo,
# Illinois Institute of Technology.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cProfile
import pstats
import StringIO
import logging
import os
import time
PROFILE_LIMIT = int(os.environ.get("PROFILE_LIMIT", 30))
PROFILER = bool(int(os.environ.get("PROFILER", 1)))
LOG_FILE = "/usr/local/source/web-api/vizier/.vizierdb/logs/timing.log"
print """
# ** USAGE:
$ PROFILE_LIMIT=100 gunicorn -c ./wsgi_profiler_conf.py wsgi
# ** TIME MEASUREMENTS ONLY:
$ PROFILER=0 gunicorn -c ./wsgi_profiler_conf.py wsgi
"""
def profiler_enable(worker, req):
worker.profile = cProfile.Profile()
worker.profile.enable()
worker.log.info("PROFILING %d: %s" % (worker.pid, req.uri))
def profiler_summary(worker, req):
s = StringIO.StringIO()
worker.profile.disable()
ps = pstats.Stats(worker.profile, stream=s).sort_stats('time', 'cumulative')
ps.print_stats(PROFILE_LIMIT)
logging.error("\n[%d] [INFO] [%s] URI %s" % (worker.pid, req.method, req.uri))
logging.error("[%d] [INFO] %s" % (worker.pid, unicode(s.getvalue())))
def pre_request(worker, req):
worker.start_time = time.time()
with open(LOG_FILE, "a") as f:
f.write('api, ' + req.method + ':' + str(req.path) + ', ' + str(req.query) + ', start, '+str(worker.start_time)+ "\n")
if PROFILER is True:
profiler_enable(worker, req)
def post_request(worker, req, *args):
end_time = time.time()
total_time = end_time - worker.start_time
with open(LOG_FILE, "a") as f:
f.write('api, ' + req.method + ':' + str(req.path) + ', ' + str(req.query) + ', end, ' + str(end_time) + "\n")
f.write('api, ' + req.method + ':' + str(req.path) + ', ' + str(req.query) + ', duration, ' + str(total_time*1000) + "\n")
logging.error("\n[%d] [INFO] [%s] Load Time: %.3fs\n" % (
worker.pid, req.method, total_time))
if PROFILER is True:
profiler_summary(worker, req)

View file

@ -5,7 +5,7 @@ GIT_PASS=$2
#mimir-async #mimir-async
cd ./mimir cd ./mimir
sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=mimir-vizier-api sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=master
#api-async #api-async
cd ../api-async cd ../api-async

View file

@ -389,9 +389,11 @@ spec:
- name: HDFS_CONF_dfs_client_use_datanode_hostname - name: HDFS_CONF_dfs_client_use_datanode_hostname
value: "true" value: "true"
- name: DATA_STAGING_TYPE - name: DATA_STAGING_TYPE
value: "s3" value: "hdfs"
- name: MIMIR_HOST - name: MIMIR_HOST
value: "vizier-mimir" value: "vizier-mimir"
- name: MIMIR_DATA_DIR
value: "/usr/local/source/web-api/vizier/.vizierdb/mimir"
--- ---
kind: Service kind: Service
apiVersion: v1 apiVersion: v1

View file

@ -88,7 +88,7 @@ RUN curl -sL "https://github.com/sbt/sbt/releases/download/v0.13.15/sbt-0.13.15.
RUN cd /usr/local/source/mimir \ RUN cd /usr/local/source/mimir \
&& ../sbt/bin/sbt "runMimirVizier -X LOG LOGM remoteSpark NO-VISTRAILS" && ../sbt/bin/sbt "runMimirVizier -X LOG LOGM remoteSpark NO-VISTRAILS"
ENV PULL_CODE=4 ENV PULL_CODE=5
RUN cd /usr/local/source/mimir \ RUN cd /usr/local/source/mimir \
&& git pull \ && git pull \

View file

@ -35,7 +35,7 @@ VIZIER_DATA_VOLUME="vizier-data"
#to use an s3 bucket as the data directory for mimir instead of a volume use this: #to use an s3 bucket as the data directory for mimir instead of a volume use this:
#sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark #sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark
#to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: #to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above:
sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir" -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark
#api-async #api-async
#to use an s3 bucket as the data directory for the api instead of a volume use this: #to use an s3 bucket as the data directory for the api instead of a volume use this:

View file

@ -34,7 +34,7 @@ S3_BUCKET_NAME="vizier-data-ub"
#sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir,target=/usr/local/source/web-api/.vizierdb -p 9002:9001 --expose 33388 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark #sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir,target=/usr/local/source/web-api/.vizierdb -p 9002:9001 --expose 33388 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark
#mimir-async #mimir-async
sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir-async,target=/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 8089 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir-async,target=/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 8089 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e DATA_STAGING_TYPE="hdfs" -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark
#api #api
#sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir,target=/usr/local/source/web-api/.vizierdb -p 9003:9001 -p 5000:443 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e USE_S3_VOLUME=false -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=443 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark #sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir,target=/usr/local/source/web-api/.vizierdb -p 9003:9001 -p 5000:443 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e USE_S3_VOLUME=false -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=443 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark