From db8af58cf99cbc99e7c242105b1e7e86df1b74b5 Mon Sep 17 00:00:00 2001 From: Rajiv Sambasivan Date: Fri, 4 Oct 2019 08:35:15 +0530 Subject: [PATCH 1/3] Test data generated on init. --- Changelog.md | 1 + README.md | 45 ++++++------------- arangopipe/startup_commands.sh | 5 ++- .../generate_model_data.py | 2 - 4 files changed, 18 insertions(+), 35 deletions(-) diff --git a/Changelog.md b/Changelog.md index 75751ef..e8d1afb 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,5 +6,6 @@ * Added Summary View feature to the UI * Migrated to the latest python-arango 5.2.0 * Minor UI tweaks and fixes +* Test data generated on init ## 0.1 (Initial Release) diff --git a/README.md b/README.md index e0d71c2..fdc00f9 100644 --- a/README.md +++ b/README.md @@ -68,19 +68,13 @@ To facilitate an easy start, docker containers for *torch* and *tensorflow* are ` docker run -p 6529:8529 -p 8888:8888 -p 3000:3000 -it arangopipe/ap_torch` -2. We will now setup the container with some test data. To do so: - * Execute a `docker ps` command to get the *CONTAINER ID* of the running container. - * You can then get to a shell in the container using the command: `docker exec`*CONTAINER ID*. - * Once you are in the container shell, you can generate test data to try **Arangopipe** using the `test_data_generator` utility provided with **Arangopipe**. Simply follow the steps below. - - `cd examples/test_data_generator/` - - `ipython` - - `from generate_model_data import generate_runs` - - `generate_runs()` - - `exit` - - - -3. Running an example in the *torch* container: The _pytorch_ example is a python script. To run it, you will have to use the `docker ps` command and get to the shell in the container using the `docker exec` command. These steps are similar to what you would have done in the previous step to generate test data. Change directory to the `examples/pytorch` directory. The *torch* container provides an example of a linear regression model that uses **Arangopipe** to log experiment metadata. The experiment meta data includes information about the dataset, featureset and optimization settings used to run the *pytorch* model. Once you are in the shell of the *torch* container, run the driver program that develops the torch model and logs the experiment meta-data to *arangopipe*. To run the driver program, launch an `ipython` shell. In the shell, execute the following: + + +2. Running an example in the *torch* container: The _pytorch_ example is a python script. To run it: + * Run the `docker ps` command to get the `CONTAINER ID` of the _pytorch_ container. + * Run the command ` docker exec -it [ CONTAINER ID ] /bin/bash ` where `CONTAINER ID` is obtained from the previous step. + * +Change directory to the `examples/pytorch` directory. The *torch* container provides an example of a linear regression model that uses **Arangopipe** to log experiment metadata. The experiment meta data includes information about the dataset, featureset and optimization settings used to run the *pytorch* model. To run the example, launch an `ipython` shell. In the shell, execute the following: 1. `from ch_torch_linear_regression_driver import run_driver` 2. `run_driver()` @@ -88,14 +82,14 @@ To facilitate an easy start, docker containers for *torch* and *tensorflow* are -4. Execute this step after the model development step above has completed. Point your browser http:localhost:3000. Login to the Arangopipe user interface with username root and password `open sesame`. Select `Models` in the `Search Metadata` content pane. You should see the model you developed in the previous step. The details are shown in the figure below. +3. Execute this step after the model development step above has completed. Point your browser http:localhost:3000. Login to the Arangopipe user interface with username root and password `open sesame`. Select `Models` in the `Search Metadata` content pane. You should see the model you developed in the previous step. The details are shown in the figure below. -5. Explore Arangopipe [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/arangoml/arangopipe/0.1?filepath=arangopipe%2Farangopipe_examples_torch.ipynb). Examples that show **Arangopipe** can be used with *hyperopt*, *sklearn* and *mlfow* are provided. To get the details of where these examples are located in the container, use the binder link above. To access the notebook examples provided with the docker container, point your browser to: `http://localhost:8888` to get to a **Jupyter** notebook. The default notebook password is _root_ +4. Explore Arangopipe [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/arangoml/arangopipe/0.1?filepath=arangopipe%2Farangopipe_examples_torch.ipynb). Examples that show **Arangopipe** can be used with *hyperopt*, *sklearn* and *mlfow* are provided. To get the details of where these examples are located in the container, use the binder link above. To access the notebook examples provided with the docker container, point your browser to: `http://localhost:8888` to get to a **Jupyter** notebook. The default notebook password is _root_ -6. Point your browser to: `http://localhost:6529` to get to the **ArangoDB** web user interface. The `root` password is `open sesame`. +5. Point your browser to: `http://localhost:6529` to get to the **ArangoDB** web user interface. The `root` password is `open sesame`. ### Tensorflow @@ -103,27 +97,16 @@ To facilitate an easy start, docker containers for *torch* and *tensorflow* are ` docker run -p 6529:8529 -p 8888:8888 -p 3000:3000 -it arangopipe/ap_tensor_flow` -2. We will now setup the container with some test data. To do so: - * Execute a `docker ps` command to get the *CONTAINER ID* of the running container. - * You can then get to a shell in the container using the command: `docker exec`*CONTAINER ID*. - * Once you are in the container shell, you can generate test data to try **Arangopipe** using the `test_data_generator` utility provided with **Arangopipe**. Simply follow the steps below. - - `cd examples/test_data_generator/` - - `ipython` - - `from generate_model_data import generate_runs` - - `generate_runs()` - - `exit` - - -3. Running an example in the *tensorflow* container: Run the tensorflow container. Point your browser to http://localhost:8888. You will be prompted for a password. Use `root` for the password. In the file browser that is presented in the Jupyter notebook, open the `examples` directory and then open the `TFX` directory. Open the notebook `tfx_metadata_integration.ipynb`. Read the description of the notebook. This notebook provides an example of how **Arangopipe** can be used with *tensorflow*. The utility of the multi-model feature of **ArangoDB** is leveraged in this example. [Tensorflow Data Validation](https://www.tensorflow.org/tfx/data_validation/get_started) is used to generate the summary statistics for a dataset. This *tensorflow* artifact can be stored in **Arangopipe** and reused as needed. This capability is illustrated in this notebook. +2. Running an example in the *tensorflow* container: Run the tensorflow container. Point your browser to http://localhost:8888. You will be prompted for a password. Use `root` for the password. In the file browser that is presented in the Jupyter notebook, open the `examples` directory and then open the `TFX` directory. Open the notebook `tfx_metadata_integration.ipynb`. Read the description of the notebook. This notebook provides an example of how **Arangopipe** can be used with *tensorflow*. The utility of the multi-model feature of **ArangoDB** is leveraged in this example. [Tensorflow Data Validation](https://www.tensorflow.org/tfx/data_validation/get_started) is used to generate the summary statistics for a dataset. This *tensorflow* artifact can be stored in **Arangopipe** and reused as needed. This capability is illustrated in this notebook. -4. Execute this step after you have executed all the cells in the notebook discussed in the previous step. Point your browser to http://localhost:3000. Login to the Arangopipe user interface with username root and password `open sesame`. Select `Featursets` in the `Search Metadata` content pane. You should see the featureset logged with **Arangopipe** resulting from executing the notebook discussed in the previous step. +3. Execute this step after you have executed all the cells in the notebook discussed in the previous step. Point your browser to http://localhost:3000. Login to the Arangopipe user interface with username root and password `open sesame`. Select `Featursets` in the `Search Metadata` content pane. You should see the featureset logged with **Arangopipe** resulting from executing the notebook discussed in the previous step. -5. Explore Arangopipe [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/arangoml/arangopipe/0.1?filepath=arangopipe%2Farangopipe_examples.ipynb). Examples that show **Arangopipe** can be used with *hyperopt*, *sklearn* and *mlfow* are provided. To get the details of where these examples are located in the container, use the binder link above. To access the notebook examples provided with the docker container, point your browser to: `http://localhost:8888` to get to a **Jupyter** notebook. The default notebook password is _root_ +4. Explore Arangopipe [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/arangoml/arangopipe/0.1?filepath=arangopipe%2Farangopipe_examples.ipynb). Examples that show **Arangopipe** can be used with *hyperopt*, *sklearn* and *mlfow* are provided. To get the details of where these examples are located in the container, use the binder link above. To access the notebook examples provided with the docker container, point your browser to: `http://localhost:8888` to get to a **Jupyter** notebook. The default notebook password is _root_ -6. Point your browser to: `http://localhost:6529` to get to the **ArangoDB** web user interface. The `root` password is `open sesame`. +5. Point your browser to: `http://localhost:6529` to get to the **ArangoDB** web user interface. The `root` password is `open sesame`. diff --git a/arangopipe/startup_commands.sh b/arangopipe/startup_commands.sh index c163479..2162291 100644 --- a/arangopipe/startup_commands.sh +++ b/arangopipe/startup_commands.sh @@ -1,6 +1,7 @@ #!/bin/bash arangod --database.password="open sesame"& - jupyter notebook --allow-root --notebookdir=/workspace/experiments --ip=0.0.0.0 --port=8888 --no-browser& - +sleep 5 +export PYTHONPATH=$PYTHONPATH:/workspace/experiments/examples/test_data_generator +python -c "from generate_model_data import generate_runs; generate_runs()" npm start diff --git a/arangopipe/tests/test_data_generator/generate_model_data.py b/arangopipe/tests/test_data_generator/generate_model_data.py index fd7ef51..3a241f9 100644 --- a/arangopipe/tests/test_data_generator/generate_model_data.py +++ b/arangopipe/tests/test_data_generator/generate_model_data.py @@ -154,6 +154,4 @@ def generate_runs(clean = False): ap.log_serving_perf(ex_servingperf, deployment_tag, user_id) return - - From 696bfa108ba755510fb964cfd00f771e3e5f0b10 Mon Sep 17 00:00:00 2001 From: Rajiv Sambasivan Date: Fri, 4 Oct 2019 13:27:11 +0530 Subject: [PATCH 2/3] Fix to use curl to check arangod status in startup script. --- arangopipe/Dockerfile_Torch_FE | 1 + arangopipe/makefile | 2 +- arangopipe/startup_commands.sh | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arangopipe/Dockerfile_Torch_FE b/arangopipe/Dockerfile_Torch_FE index e141580..f6d1069 100644 --- a/arangopipe/Dockerfile_Torch_FE +++ b/arangopipe/Dockerfile_Torch_FE @@ -13,6 +13,7 @@ MAINTAINER Joerg Schad ENV GIT_PYTHON_REFRESH=quiet RUN apt-get update RUN apt-get install -y python-pip +RUN apt-get install -y curl #RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango==4.4.0 #RUN pip install -i https://test.pypi.org/simple/ arangopipe RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango diff --git a/arangopipe/makefile b/arangopipe/makefile index cf0d297..d56dbd3 100644 --- a/arangopipe/makefile +++ b/arangopipe/makefile @@ -11,7 +11,7 @@ python_arangopipe:$(SRC) upload_test_pypi: twine upload --repository-url https://test.pypi.org/legacy/ -u rajiv.sambasivan -p $(TEST_PYPI_PASSWORD) dist/* docker_APSI_build:$(DOCKER_SI_FILE) - docker build --no-cache -t $(DOCKER_SI_IMG_NAME) -f $(DOCKER_SI_FILE) . + docker build --no-cache -t $(DOCKER_SI_IMG_NAME) -f $(DOCKER_SI_FILE) . docker_publish_SI_latest: @echo 'starting docker SI build...' docker login --username arangopipe --password $(DOCKER_PASSWORD) diff --git a/arangopipe/startup_commands.sh b/arangopipe/startup_commands.sh index 2162291..47f5fba 100644 --- a/arangopipe/startup_commands.sh +++ b/arangopipe/startup_commands.sh @@ -1,7 +1,11 @@ #!/bin/bash arangod --database.password="open sesame"& jupyter notebook --allow-root --notebookdir=/workspace/experiments --ip=0.0.0.0 --port=8888 --no-browser& +while [[ "$(curl -sL -w "%{http_code}\\n" "http://localhost:8529" -o /dev/null)" != "200" ]]; do +echo "Waiting for arangod" sleep 5 +done +echo "arangod is up!" export PYTHONPATH=$PYTHONPATH:/workspace/experiments/examples/test_data_generator python -c "from generate_model_data import generate_runs; generate_runs()" npm start From cb17f6a8a32b4f51bbc66578f1e7b36a2bdc42de Mon Sep 17 00:00:00 2001 From: Rajiv Sambasivan Date: Fri, 4 Oct 2019 16:40:13 +0530 Subject: [PATCH 3/3] Updates to the Dockerfile for Torch and TF to combine installs. --- arangopipe/Dockerfile_TF | 32 ------------------------- arangopipe/Dockerfile_TFFE | 10 ++------ arangopipe/Dockerfile_Torch | 44 ---------------------------------- arangopipe/Dockerfile_Torch_FE | 13 +++------- arangopipe/makefile | 4 ++-- 5 files changed, 7 insertions(+), 96 deletions(-) delete mode 100644 arangopipe/Dockerfile_TF delete mode 100644 arangopipe/Dockerfile_Torch diff --git a/arangopipe/Dockerfile_TF b/arangopipe/Dockerfile_TF deleted file mode 100644 index 5d324d2..0000000 --- a/arangopipe/Dockerfile_TF +++ /dev/null @@ -1,32 +0,0 @@ -FROM arangodb:3.4 -# This image uses two base images arangodb (above) and tensorflow (below) -FROM tensorflow/tensorflow:latest-py3 -MAINTAINER Joerg Schad -ENV GIT_PYTHON_REFRESH=quiet -RUN apt-get update -RUN apt-get install -y python-pip -RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango==4.4.0 -RUN pip install -i https://test.pypi.org/simple/ arangopipe -RUN mkdir -p /workspace -RUN pip install jupyter -RUN pip install matplotlib -RUN pip install tensorflow-data-validation -RUN pip install PyYAML==5.1.1 -WORKDIR / -COPY --from=0 / . -WORKDIR /workspace/experiments -COPY tests/AQL/ examples/AQL/ -COPY tests/covariate_shift/ examples/covariate_shift/ -COPY tests/hyperopt/ examples/hyperopt/ -COPY tests/mlflow/ examples/mlflow/ -COPY tests/test_data_generator/ examples/test_data_generator/ -COPY tests/TFX/ examples/TFX/ -COPY arangopipe_examples.ipynb . -ENV PYTHONPATH=`pwd`:$PYTHONPATH -RUN jupyter notebook --generate-config --allow-root -RUN echo "c.NotebookApp.password = u'sha1:6a3f528eec40:6e896b6e4828f525a6e20e5411cd1c8075d68619'" >> /root/.jupyter/jupyter_notebook_config.py -EXPOSE 8888 8529 -COPY startup_commands.sh /workspace/scripts/startup_commands.sh -RUN ["chmod", "+x", "/workspace/scripts/startup_commands.sh"] -ENTRYPOINT ["/workspace/scripts/startup_commands.sh"] - diff --git a/arangopipe/Dockerfile_TFFE b/arangopipe/Dockerfile_TFFE index 1c08355..1043607 100644 --- a/arangopipe/Dockerfile_TFFE +++ b/arangopipe/Dockerfile_TFFE @@ -14,15 +14,9 @@ MAINTAINER Joerg Schad ENV GIT_PYTHON_REFRESH=quiet RUN apt-get update RUN apt-get install -y python-pip -#RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango==4.4.0 -#RUN pip install -i https://test.pypi.org/simple/ arangopipe -RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango -RUN pip install arangopipe +RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango arangopipe jupyter matplotlib tensorflow-data-validation PyYAML==5.1.1 RUN mkdir -p /workspace -RUN pip install jupyter -RUN pip install matplotlib -RUN pip install tensorflow-data-validation -RUN pip install PyYAML==5.1.1 + WORKDIR / COPY --from=0 / . WORKDIR /workspace/experiments diff --git a/arangopipe/Dockerfile_Torch b/arangopipe/Dockerfile_Torch deleted file mode 100644 index 2127010..0000000 --- a/arangopipe/Dockerfile_Torch +++ /dev/null @@ -1,44 +0,0 @@ -FROM arangodb:3.4 - -FROM node:8.7.0-alpine AS frontend -RUN mkdir -p /arangopipe_frontend -WORKDIR /arangopipe_frontend -COPY arangopipe_frontend/app/package.json /arangopipe_frontend -COPY arangopipe_frontend/app/package-lock.json /arangopipe_frontend -RUN npm install -COPY arangopipe_frontend/app/ /arangopipe_frontend - -FROM continuumio/miniconda3 -MAINTAINER Joerg Schad -ENV GIT_PYTHON_REFRESH=quiet -RUN apt-get update -RUN apt-get install -y python-pip -RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango==4.4.0 -RUN pip install -i https://test.pypi.org/simple/ arangopipe -RUN mkdir -p /workspace -RUN pip install jupyter -RUN pip install matplotlib -RUN pip install PyYAML==5.1.1 -RUN conda install pytorch cudatoolkit=10.0 -c pytorch -#RUN git clone git@github.com:arangoml/arangopipe.git /workspace -WORKDIR / -COPY --from=0 / . -WORKDIR /workspace/experiments -COPY --from=frontend /arangopipe_frontend . -COPY tests/AQL/ examples/AQL/ -COPY tests/covariate_shift/ examples/covariate_shift/ -COPY tests/hyperopt/ examples/hyperopt/ -COPY tests/mlflow/ examples/mlflow/ -COPY tests/test_data_generator/ examples/test_data_generator/ -COPY tests/pytorch/ examples/pytorch/ -COPY arangopipe_examples_torch.ipynb . -ENV PYTHONPATH=`pwd`:$PYTHONPATH -RUN jupyter notebook --generate-config --allow-root -RUN echo "c.NotebookApp.password = u'sha1:6a3f528eec40:6e896b6e4828f525a6e20e5411cd1c8075d68619'" >> /root/.jupyter/jupyter_notebook_config.py - -EXPOSE 8888 8529 3000 - -COPY startup_commands.sh /workspace/experiments/startup_commands.sh -RUN ["chmod", "+x", "/workspace/experiments/startup_commands.sh"] -ENTRYPOINT ["/workspace/experiments/startup_commands.sh"] - diff --git a/arangopipe/Dockerfile_Torch_FE b/arangopipe/Dockerfile_Torch_FE index f6d1069..e659f75 100644 --- a/arangopipe/Dockerfile_Torch_FE +++ b/arangopipe/Dockerfile_Torch_FE @@ -12,18 +12,11 @@ FROM continuumio/miniconda3 MAINTAINER Joerg Schad ENV GIT_PYTHON_REFRESH=quiet RUN apt-get update -RUN apt-get install -y python-pip -RUN apt-get install -y curl -#RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango==4.4.0 -#RUN pip install -i https://test.pypi.org/simple/ arangopipe -RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango -RUN pip install arangopipe +RUN apt-get install -y python-pip curl +RUN pip install mlflow hyperopt sklearn2 jsonpickle python-arango arangopipe jupyter matplotlib PyYAML==5.1.1 RUN mkdir -p /workspace -RUN pip install jupyter -RUN pip install matplotlib -RUN pip install PyYAML==5.1.1 RUN conda install pytorch cudatoolkit=10.0 -c pytorch -#RUN git clone git@github.com:arangoml/arangopipe.git /workspace + WORKDIR / COPY --from=0 / . WORKDIR /workspace/experiments diff --git a/arangopipe/makefile b/arangopipe/makefile index d56dbd3..d09b2bf 100644 --- a/arangopipe/makefile +++ b/arangopipe/makefile @@ -2,8 +2,8 @@ SRC = arangopipe/*.py DOCKER_FILE = Dockerfile DOCKER_PASSWORD = DOCKER_REPO = arangopipe -DOCKER_SI_FILE = Dockerfile_Torch_FE -DOCKER_SI_IMG_NAME = ap_torch +DOCKER_SI_FILE = Dockerfile_TFFE +DOCKER_SI_IMG_NAME = ap_tensor_flow TEST_PYPI_PASSWORD = python_arangopipe:$(SRC)