From cba51856c324c25e10d47acb3d139b4f6161f644 Mon Sep 17 00:00:00 2001 From: Mike Date: Thu, 20 Jun 2019 08:11:29 -0400 Subject: [PATCH] use hdfs staging for non aws containers --- api-async/Dockerfile | 1 + api-async/run_web_api.sh | 2 +- api-async/wsgi_profiler_conf.py | 69 +++++++++++++++++++ build-images-async.sh | 2 +- kubernetes/run-containers-async-microk8s.yaml | 4 +- mimir/Dockerfile | 2 +- run-containers-async.sh | 2 +- run_containers_norn.sh | 2 +- 8 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 api-async/wsgi_profiler_conf.py diff --git a/api-async/Dockerfile b/api-async/Dockerfile index 9634e92..a6db486 100644 --- a/api-async/Dockerfile +++ b/api-async/Dockerfile @@ -205,6 +205,7 @@ COPY run_init.sh /usr/local/source/run_init.sh COPY run_web_api.sh /usr/local/source/run_web_api.sh COPY entrypoint.sh /usr/local/source/entrypoint.sh COPY supervisord.conf /etc/supervisord.conf +COPY wsgi_profiler_conf.py /usr/local/source/web-api/vizier/wsgi_profiler_conf.py RUN chmod +x /usr/local/source/run_init.sh \ /usr/local/source/run_web_api.sh \ diff --git a/api-async/run_web_api.sh b/api-async/run_web_api.sh index ecc0a2a..ac86834 100755 --- a/api-async/run_web_api.sh +++ b/api-async/run_web_api.sh @@ -6,5 +6,5 @@ cd vizier echo 'running wsgi server...' #uwsgi --plugins-dir /usr/lib/uwsgi/ --need-plugin python --plugins-list --uid nginx --gid nginx --socket /tmp/uwsgi.sock --wsgi-file server.py --chown-socket nginx:nginx --callable app --processes 4 --threads 2 --stats 127.0.0.1:9191 --virtualenv /opt/conda/envs/vizier #gunicorn -w 4 --bind unix:vizier.sock -m 777 server:app -#gunicorn -c ./wsgi_profiler_conf.py -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL server:app +#gunicorn -c ./wsgi_profiler_conf.py -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL wsgi:app gunicorn -w 1 --threads 8 --bind 0.0.0.0:$VIZIERSERVER_SERVER_LOCAL wsgi:app diff --git a/api-async/wsgi_profiler_conf.py b/api-async/wsgi_profiler_conf.py new file mode 100644 index 0000000..1064205 --- /dev/null +++ b/api-async/wsgi_profiler_conf.py @@ -0,0 +1,69 @@ +# Copyright (C) 2018 New York University +# University at Buffalo, +# Illinois Institute of Technology. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cProfile +import pstats +import StringIO +import logging +import os +import time + +PROFILE_LIMIT = int(os.environ.get("PROFILE_LIMIT", 30)) +PROFILER = bool(int(os.environ.get("PROFILER", 1))) +LOG_FILE = "/usr/local/source/web-api/vizier/.vizierdb/logs/timing.log" + +print """ +# ** USAGE: +$ PROFILE_LIMIT=100 gunicorn -c ./wsgi_profiler_conf.py wsgi +# ** TIME MEASUREMENTS ONLY: +$ PROFILER=0 gunicorn -c ./wsgi_profiler_conf.py wsgi +""" + + +def profiler_enable(worker, req): + worker.profile = cProfile.Profile() + worker.profile.enable() + worker.log.info("PROFILING %d: %s" % (worker.pid, req.uri)) + + +def profiler_summary(worker, req): + s = StringIO.StringIO() + worker.profile.disable() + ps = pstats.Stats(worker.profile, stream=s).sort_stats('time', 'cumulative') + ps.print_stats(PROFILE_LIMIT) + + logging.error("\n[%d] [INFO] [%s] URI %s" % (worker.pid, req.method, req.uri)) + logging.error("[%d] [INFO] %s" % (worker.pid, unicode(s.getvalue()))) + + +def pre_request(worker, req): + worker.start_time = time.time() + with open(LOG_FILE, "a") as f: + f.write('api, ' + req.method + ':' + str(req.path) + ', ' + str(req.query) + ', start, '+str(worker.start_time)+ "\n") + if PROFILER is True: + profiler_enable(worker, req) + + +def post_request(worker, req, *args): + end_time = time.time() + total_time = end_time - worker.start_time + with open(LOG_FILE, "a") as f: + f.write('api, ' + req.method + ':' + str(req.path) + ', ' + str(req.query) + ', end, ' + str(end_time) + "\n") + f.write('api, ' + req.method + ':' + str(req.path) + ', ' + str(req.query) + ', duration, ' + str(total_time*1000) + "\n") + logging.error("\n[%d] [INFO] [%s] Load Time: %.3fs\n" % ( + worker.pid, req.method, total_time)) + if PROFILER is True: + profiler_summary(worker, req) diff --git a/build-images-async.sh b/build-images-async.sh index 26860aa..bc15c52 100755 --- a/build-images-async.sh +++ b/build-images-async.sh @@ -5,7 +5,7 @@ GIT_PASS=$2 #mimir-async cd ./mimir -sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=mimir-vizier-api +sudo docker build -t docker.mimirdb.info/vizier-mimir-async-spark ./ --build-arg gituser=$GIT_USER --build-arg gitpass=$GIT_PASS --build-arg MIMIR_BRANCH=master #api-async cd ../api-async diff --git a/kubernetes/run-containers-async-microk8s.yaml b/kubernetes/run-containers-async-microk8s.yaml index edb2904..fb77e78 100644 --- a/kubernetes/run-containers-async-microk8s.yaml +++ b/kubernetes/run-containers-async-microk8s.yaml @@ -389,9 +389,11 @@ spec: - name: HDFS_CONF_dfs_client_use_datanode_hostname value: "true" - name: DATA_STAGING_TYPE - value: "s3" + value: "hdfs" - name: MIMIR_HOST value: "vizier-mimir" + - name: MIMIR_DATA_DIR + value: "/usr/local/source/web-api/vizier/.vizierdb/mimir" --- kind: Service apiVersion: v1 diff --git a/mimir/Dockerfile b/mimir/Dockerfile index 2b9f78e..e420702 100644 --- a/mimir/Dockerfile +++ b/mimir/Dockerfile @@ -88,7 +88,7 @@ RUN curl -sL "https://github.com/sbt/sbt/releases/download/v0.13.15/sbt-0.13.15. RUN cd /usr/local/source/mimir \ && ../sbt/bin/sbt "runMimirVizier -X LOG LOGM remoteSpark NO-VISTRAILS" -ENV PULL_CODE=4 +ENV PULL_CODE=5 RUN cd /usr/local/source/mimir \ && git pull \ diff --git a/run-containers-async.sh b/run-containers-async.sh index 12fb32e..21cce69 100755 --- a/run-containers-async.sh +++ b/run-containers-async.sh @@ -35,7 +35,7 @@ VIZIER_DATA_VOLUME="vizier-data" #to use an s3 bucket as the data directory for mimir instead of a volume use this: #sudo docker run -d -v mimir-data:/tmp/data/mimir -p 9002:9001 --expose 4041 --expose 33388 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" -e MIMIR_DATA_DIR="/tmp/data/mimir" --privileged --device /dev/fuse docker.mimirdb.info/vizier-mimir-spark #to use a local bind mount for the data directory instead of an s3 bucket use the following for mimir instead of the above: -sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark +sudo docker run -d -v $VIZIER_DATA_VOLUME:/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 4041 --expose 8089 --network spark-net -h vizier-mimir --name vizier-mimir -e DATA_STAGING_TYPE="hdfs" -e MIMIR_DATA_DIR="/usr/local/source/web-api/vizier/.vizierdb/mimir" -e USE_S3_VOLUME=false -e MIMIR_HOST="vizier-mimir" -e SPARK_HOST=$MASTER_HOSTNAME -e RESTORE_BACKUP=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark #api-async #to use an s3 bucket as the data directory for the api instead of a volume use this: diff --git a/run_containers_norn.sh b/run_containers_norn.sh index 4a309e9..337a93c 100644 --- a/run_containers_norn.sh +++ b/run_containers_norn.sh @@ -34,7 +34,7 @@ S3_BUCKET_NAME="vizier-data-ub" #sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir,target=/usr/local/source/web-api/.vizierdb -p 9002:9001 --expose 33388 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-spark #mimir-async -sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir-async,target=/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 8089 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark +sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir-async,target=/usr/local/source/web-api/vizier/.vizierdb -p 9002:9001 --expose 8089 -p 4041:4041 --network spark-net -h vizier-mimir --name vizier-mimir -e DATA_STAGING_TYPE="hdfs" -e MIMIR_HOST="vizier-mimir" -e RESTORE_BACKUP=false -e USE_S3_VOLUME=false -e PULL_MIMIR=false -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-mimir-async-spark #api #sudo docker run -d --mount type=bind,source=/home/csestaff/mrb24/docker-mounts/vizier-api-mimir,target=/usr/local/source/web-api/.vizierdb -p 9003:9001 -p 5000:443 --network spark-net -h vizier-api --name vizier-api -e MIMIR_HOST="vizier-mimir" -e USE_S3_VOLUME=false -e APP_PATH="" -e API_SERVER=api.$VIZIER_DOMAIN -e API_LOCAL_PORT=443 -e API_PORT=443 -e API_SCHEME=https -e AWS_ACCESS_KEY_ID=$S3_AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$S3_AWS_SECRET_ACCESS_KEY -e S3_BUCKET_NAME="$S3_BUCKET_NAME" docker.mimirdb.info/vizier-api-spark