From adb9a3b5e9db4423d641f7484b4072da26428764 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 13 Jan 2022 15:15:00 +0000 Subject: [PATCH 01/66] Init CPP project --- mnist-cpp/.devcontainer/Dockerfile | 76 +++++++++++++++++++++++ mnist-cpp/.devcontainer/devcontainer.json | 24 +++++++ mnist-cpp/.gitignore | 2 + mnist-cpp/CMakeLists.txt | 9 +++ mnist-cpp/main.cpp | 7 +++ 5 files changed, 118 insertions(+) create mode 100644 mnist-cpp/.devcontainer/Dockerfile create mode 100644 mnist-cpp/.devcontainer/devcontainer.json create mode 100644 mnist-cpp/.gitignore create mode 100644 mnist-cpp/CMakeLists.txt create mode 100644 mnist-cpp/main.cpp diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile new file mode 100644 index 0000000..6a973f3 --- /dev/null +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -0,0 +1,76 @@ +# Base image +FROM debian:bookworm + +# Env versioning +ARG HADOLINT_VERSION=v2.8.0 +ARG CMAKE_VERSION=3.16.9 +ARG TORCH_VERSION=1.10.1 + +# Non-root user with sudo access +ARG USERNAME=default +ARG USER_UID=1000 +ARG USER_GID=$USER_UID + +# Avoid warnings by switching to noninteractive +ENV DEBIAN_FRONTEND=noninteractive + +# Other env +ENV TORCH_DIR=/opt/torch + +# Install apt deps +SHELL ["/bin/bash", "-c"] +RUN apt-get update \ + && apt-get -y install --no-install-recommends \ + apt-utils \ + dialog 2>&1 \ + # + # More apt deps + && apt-get install -y --no-install-recommends \ + sudo \ + ca-certificates \ + wget \ + curl \ + git \ + vim \ + build-essential \ + unzip \ + # + # Hadolint + && wget --progress=dot:giga -O /bin/hadolint \ + https://github.com/hadolint/hadolint/releases/download/${HADOLINT_VERSION}/hadolint-Linux-x86_64 \ + && chmod +x /bin/hadolint \ + # + # Install docker binary + && curl -L https://download.docker.com/linux/static/stable/x86_64/docker-19.03.9.tgz | tar xvz docker/docker \ + && cp docker/docker /usr/local/bin \ + && rm -R docker \ + # + # Install cmake + && wget -q -O cmake-linux.sh https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh \ + && sh cmake-linux.sh -- --skip-license \ + && rm cmake-linux.sh \ + # + # Install libtorch + && mkdir $TORCH_DIR \ + && pushd $TORCH_DIR \ + && wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip \ + && unzip libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ + && rm -r libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ + && popd \ + # + # Create a non-root user to use if preferred + && groupadd --gid $USER_GID $USERNAME \ + && useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME \ + && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ + && chmod 0440 /etc/sudoers.d/$USERNAME \ + # + # Cleanup + && apt-get autoremove -y \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /home/$USERNAME + +# Switch back to dialog for any ad-hoc use of apt-get +ENV DEBIAN_FRONTEND=dialog \ No newline at end of file diff --git a/mnist-cpp/.devcontainer/devcontainer.json b/mnist-cpp/.devcontainer/devcontainer.json new file mode 100644 index 0000000..f0f0505 --- /dev/null +++ b/mnist-cpp/.devcontainer/devcontainer.json @@ -0,0 +1,24 @@ +{ + "name": "devcontainer", + "dockerFile": "Dockerfile", + "context": "..", + "remoteUser": "default", + // "workspaceFolder": "/mnist-cpp", + // "workspaceMount": "source=/home/ubuntu/git/mdn-poc,target=/mdn-poc,type=bind,consistency=default", // AWS + // "workspaceMount": "source=/home/marco/git/mdn-poc,target=/mdn-poc,type=bind,consistency=default", // Epicenter + // "workspaceMount": "source=/home/azureuser/git/mdn-poc,target=/mdn-poc,type=bind,consistency=default", // Azure + "extensions": [ + "ms-vscode.cpptools", + "ms-vscode.cpptools-extension-pack", + "ms-vscode.cpptools-themes", + "exiasr.hadolint", + // "zxh404.vscode-proto3", + "yzhang.markdown-all-in-one" + ], + "mounts": [ + "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind,consistency=default", + ], + // "runArgs": [ + // "--privileged" + // ], +} \ No newline at end of file diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore new file mode 100644 index 0000000..1899660 --- /dev/null +++ b/mnist-cpp/.gitignore @@ -0,0 +1,2 @@ +build +.vscode \ No newline at end of file diff --git a/mnist-cpp/CMakeLists.txt b/mnist-cpp/CMakeLists.txt new file mode 100644 index 0000000..f916bcf --- /dev/null +++ b/mnist-cpp/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.0 FATAL_ERROR) +project(mnist-cpp) + +list(APPEND CMAKE_PREFIX_PATH "$ENV{TORCH_DIR}/libtorch") +find_package(Torch REQUIRED) + +add_executable(main main.cpp) +target_link_libraries(main "${TORCH_LIBRARIES}") +set_property(TARGET main PROPERTY CXX_STANDARD 14) \ No newline at end of file diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp new file mode 100644 index 0000000..71c05db --- /dev/null +++ b/mnist-cpp/main.cpp @@ -0,0 +1,7 @@ +#include +#include + +int main() { + torch::Tensor tensor = torch::rand({2, 3}); + std::cout << tensor << std::endl; +} \ No newline at end of file From f342fb7465b50c6953d2c5d4944fcae21207f667 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 14 Jan 2022 13:13:13 +0000 Subject: [PATCH 02/66] working C++ training --- mnist-cpp/.gitignore | 4 ++- mnist-cpp/bin/download_data.sh | 13 +++++++++ mnist-cpp/main.cpp | 52 ++++++++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 4 deletions(-) create mode 100755 mnist-cpp/bin/download_data.sh diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 1899660..350c217 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -1,2 +1,4 @@ build -.vscode \ No newline at end of file +.vscode +data +net.pt \ No newline at end of file diff --git a/mnist-cpp/bin/download_data.sh b/mnist-cpp/bin/download_data.sh new file mode 100755 index 0000000..2240252 --- /dev/null +++ b/mnist-cpp/bin/download_data.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +mkdir -p data +pushd data +wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz +wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz +wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz +wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz +gunzip train-images-idx3-ubyte.gz +gunzip train-labels-idx1-ubyte.gz +gunzip t10k-images-idx3-ubyte.gz +gunzip t10k-labels-idx1-ubyte.gz +popd \ No newline at end of file diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp index 71c05db..353d170 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/main.cpp @@ -1,7 +1,53 @@ #include -#include + +// Define neural network +struct Net : torch::nn::Module { + torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; + + Net() { + fc1 = register_module("fc1", torch::nn::Linear(784, 64)); + fc2 = register_module("fc2", torch::nn::Linear(64, 32)); + fc3 = register_module("fc3", torch::nn::Linear(32, 10)); + } + + torch::Tensor forward(torch::Tensor x) { + x = torch::relu(fc1->forward(x.reshape({x.size(0), 784}))); + x = torch::dropout(x, /*p=*/0.5, /*train=*/is_training()); + x = torch::relu(fc2->forward(x)); + x = torch::log_softmax(fc3->forward(x), /*dim=*/1); + return x; + } +}; int main() { - torch::Tensor tensor = torch::rand({2, 3}); - std::cout << tensor << std::endl; + // Init network + auto net = std::make_shared(); + + // Multi-threaded data loader for the MNIST dataset. + auto data_loader = torch::data::make_data_loader( + torch::data::datasets::MNIST("./data").map( + torch::data::transforms::Stack<>()), + /*batch_size=*/64); + + // Init optimizer + torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01); + + // Train loop + for (size_t epoch = 1; epoch <= 10; ++epoch) { // epoch loop + size_t batch_index = 0; + for (auto& batch : *data_loader) { // batch loop + optimizer.zero_grad(); // reset gradients + torch::Tensor prediction = net->forward(batch.data); // forward pass + torch::Tensor loss = torch::nll_loss(prediction, batch.target); // compute loss + loss.backward(); // backprop + optimizer.step(); // update params + if (++batch_index % 100 == 0) { // every 100 baches + // Print logs + std::cout << "Epoch: " << epoch << " | Batch: " << batch_index + << " | Loss: " << loss.item() << std::endl; + // Checkpoint model + torch::save(net, "net.pt"); + } + } + } } \ No newline at end of file From b0529f45ee2e216830212a3e22593159c5d92e65 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 14 Jan 2022 13:17:29 +0000 Subject: [PATCH 03/66] add ssh --- mnist-cpp/.devcontainer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile index 6a973f3..972054f 100644 --- a/mnist-cpp/.devcontainer/Dockerfile +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -34,6 +34,7 @@ RUN apt-get update \ vim \ build-essential \ unzip \ + openssh-client \ # # Hadolint && wget --progress=dot:giga -O /bin/hadolint \ From 1ecfd42335ea421f1fbffb34a5238bd6d4eb772c Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 17 Jan 2022 10:37:26 +0100 Subject: [PATCH 04/66] remove json --- .../.devcontainer/{devcontainer.json => devcontainer.json.bkp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename mnist-cpp/.devcontainer/{devcontainer.json => devcontainer.json.bkp} (100%) diff --git a/mnist-cpp/.devcontainer/devcontainer.json b/mnist-cpp/.devcontainer/devcontainer.json.bkp similarity index 100% rename from mnist-cpp/.devcontainer/devcontainer.json rename to mnist-cpp/.devcontainer/devcontainer.json.bkp From 6442886ab5ccdadc0764d9bc7156e8a59dfce2fb Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 17 Jan 2022 10:38:23 +0100 Subject: [PATCH 05/66] Ignore devcontainer.json --- mnist-cpp/.devcontainer/devcontainer.json.bkp | 24 ------------------- mnist-cpp/.gitignore | 3 ++- 2 files changed, 2 insertions(+), 25 deletions(-) delete mode 100644 mnist-cpp/.devcontainer/devcontainer.json.bkp diff --git a/mnist-cpp/.devcontainer/devcontainer.json.bkp b/mnist-cpp/.devcontainer/devcontainer.json.bkp deleted file mode 100644 index f0f0505..0000000 --- a/mnist-cpp/.devcontainer/devcontainer.json.bkp +++ /dev/null @@ -1,24 +0,0 @@ -{ - "name": "devcontainer", - "dockerFile": "Dockerfile", - "context": "..", - "remoteUser": "default", - // "workspaceFolder": "/mnist-cpp", - // "workspaceMount": "source=/home/ubuntu/git/mdn-poc,target=/mdn-poc,type=bind,consistency=default", // AWS - // "workspaceMount": "source=/home/marco/git/mdn-poc,target=/mdn-poc,type=bind,consistency=default", // Epicenter - // "workspaceMount": "source=/home/azureuser/git/mdn-poc,target=/mdn-poc,type=bind,consistency=default", // Azure - "extensions": [ - "ms-vscode.cpptools", - "ms-vscode.cpptools-extension-pack", - "ms-vscode.cpptools-themes", - "exiasr.hadolint", - // "zxh404.vscode-proto3", - "yzhang.markdown-all-in-one" - ], - "mounts": [ - "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind,consistency=default", - ], - // "runArgs": [ - // "--privileged" - // ], -} \ No newline at end of file diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 350c217..3a9db49 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -1,4 +1,5 @@ build .vscode data -net.pt \ No newline at end of file +net.pt +devcontainer.json \ No newline at end of file From c8e1dc579052f1844f480e0b0eb5f9c514040c5d Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 17 Jan 2022 10:54:22 +0100 Subject: [PATCH 06/66] ignore some stuff from docker ctx --- mnist-cpp/.dockerignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 mnist-cpp/.dockerignore diff --git a/mnist-cpp/.dockerignore b/mnist-cpp/.dockerignore new file mode 100644 index 0000000..abad8d4 --- /dev/null +++ b/mnist-cpp/.dockerignore @@ -0,0 +1,4 @@ +build +data +net.pt +devcontainer.json \ No newline at end of file From 3a8de192815a3e3afeee6100c8991446e0bf7459 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 17 Jan 2022 16:55:47 +0100 Subject: [PATCH 07/66] Add NumCpp --- mnist-cpp/.devcontainer/Dockerfile | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile index 972054f..9eb0b0a 100644 --- a/mnist-cpp/.devcontainer/Dockerfile +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -5,6 +5,8 @@ FROM debian:bookworm ARG HADOLINT_VERSION=v2.8.0 ARG CMAKE_VERSION=3.16.9 ARG TORCH_VERSION=1.10.1 +ARG NUMCPP_VERSION=2.6.2 +ARG DOCKER_VERSION=19.03.9 # Non-root user with sudo access ARG USERNAME=default @@ -35,6 +37,7 @@ RUN apt-get update \ build-essential \ unzip \ openssh-client \ + libboost-all-dev \ # # Hadolint && wget --progress=dot:giga -O /bin/hadolint \ @@ -42,7 +45,7 @@ RUN apt-get update \ && chmod +x /bin/hadolint \ # # Install docker binary - && curl -L https://download.docker.com/linux/static/stable/x86_64/docker-19.03.9.tgz | tar xvz docker/docker \ + && curl -L https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz | tar xvz docker/docker \ && cp docker/docker /usr/local/bin \ && rm -R docker \ # @@ -59,6 +62,16 @@ RUN apt-get update \ && rm -r libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && popd \ # + # Install NumCpp + && git clone -b Version_${NUMCPP_VERSION} https://github.com/dpilger26/NumCpp.git \ + && pushd NumCpp \ + && mkdir build \ + && pushd build \ + && cmake .. \ + && cmake --build . --target install \ + && popd \ + && popd \ + # # Create a non-root user to use if preferred && groupadd --gid $USER_GID $USERNAME \ && useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME \ From bc52b1ec46eb4d2b02ae3dcb67d25bf8fa796455 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 17 Jan 2022 16:02:19 +0000 Subject: [PATCH 08/66] Import NumCpp --- mnist-cpp/CMakeLists.txt | 3 +++ mnist-cpp/main.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/mnist-cpp/CMakeLists.txt b/mnist-cpp/CMakeLists.txt index f916bcf..563cb2e 100644 --- a/mnist-cpp/CMakeLists.txt +++ b/mnist-cpp/CMakeLists.txt @@ -4,6 +4,9 @@ project(mnist-cpp) list(APPEND CMAKE_PREFIX_PATH "$ENV{TORCH_DIR}/libtorch") find_package(Torch REQUIRED) +find_package(NumCpp 2.6.2 REQUIRED) + add_executable(main main.cpp) target_link_libraries(main "${TORCH_LIBRARIES}") +target_link_libraries(main NumCpp::NumCpp) set_property(TARGET main PROPERTY CXX_STANDARD 14) \ No newline at end of file diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp index 353d170..bd6b5a9 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/main.cpp @@ -1,3 +1,4 @@ +#include "NumCpp.hpp" #include // Define neural network From cfd922c18d901d23fa45d8c0b93dfb8273051dc7 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 17 Jan 2022 17:08:43 +0100 Subject: [PATCH 09/66] rollback numcpp --- mnist-cpp/.devcontainer/Dockerfile | 11 ----------- mnist-cpp/CMakeLists.txt | 3 --- mnist-cpp/main.cpp | 1 - 3 files changed, 15 deletions(-) diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile index 9eb0b0a..ed69a25 100644 --- a/mnist-cpp/.devcontainer/Dockerfile +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -37,7 +37,6 @@ RUN apt-get update \ build-essential \ unzip \ openssh-client \ - libboost-all-dev \ # # Hadolint && wget --progress=dot:giga -O /bin/hadolint \ @@ -62,16 +61,6 @@ RUN apt-get update \ && rm -r libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && popd \ # - # Install NumCpp - && git clone -b Version_${NUMCPP_VERSION} https://github.com/dpilger26/NumCpp.git \ - && pushd NumCpp \ - && mkdir build \ - && pushd build \ - && cmake .. \ - && cmake --build . --target install \ - && popd \ - && popd \ - # # Create a non-root user to use if preferred && groupadd --gid $USER_GID $USERNAME \ && useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME \ diff --git a/mnist-cpp/CMakeLists.txt b/mnist-cpp/CMakeLists.txt index 563cb2e..f916bcf 100644 --- a/mnist-cpp/CMakeLists.txt +++ b/mnist-cpp/CMakeLists.txt @@ -4,9 +4,6 @@ project(mnist-cpp) list(APPEND CMAKE_PREFIX_PATH "$ENV{TORCH_DIR}/libtorch") find_package(Torch REQUIRED) -find_package(NumCpp 2.6.2 REQUIRED) - add_executable(main main.cpp) target_link_libraries(main "${TORCH_LIBRARIES}") -target_link_libraries(main NumCpp::NumCpp) set_property(TARGET main PROPERTY CXX_STANDARD 14) \ No newline at end of file diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp index bd6b5a9..353d170 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/main.cpp @@ -1,4 +1,3 @@ -#include "NumCpp.hpp" #include // Define neural network From 0065b273aaaf0be71a180a842cb8d3f7b6464edb Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 19 Jan 2022 12:47:24 +0000 Subject: [PATCH 10/66] params --- mnist-cpp/.gitignore | 2 +- mnist-cpp/bin/build.sh | 2 ++ mnist-cpp/main.cpp | 23 ++++++++++++++++------- 3 files changed, 19 insertions(+), 8 deletions(-) create mode 100755 mnist-cpp/bin/build.sh diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 3a9db49..6518fab 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -1,5 +1,5 @@ build .vscode data -net.pt +*.pt devcontainer.json \ No newline at end of file diff --git a/mnist-cpp/bin/build.sh b/mnist-cpp/bin/build.sh new file mode 100755 index 0000000..033d119 --- /dev/null +++ b/mnist-cpp/bin/build.sh @@ -0,0 +1,2 @@ +#!/bin/bash +/bin/cmake --build /examples/mnist-cpp/build --config Debug --target all -j 14 -- \ No newline at end of file diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp index 353d170..d4a1c10 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/main.cpp @@ -1,5 +1,9 @@ #include +#define DROPOUT 0.5 +#define BATCH_SIZE 64 +#define N_EPOCHS 10 + // Define neural network struct Net : torch::nn::Module { torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; @@ -12,28 +16,33 @@ struct Net : torch::nn::Module { torch::Tensor forward(torch::Tensor x) { x = torch::relu(fc1->forward(x.reshape({x.size(0), 784}))); - x = torch::dropout(x, /*p=*/0.5, /*train=*/is_training()); + x = torch::dropout(x, /*p=*/DROPOUT, /*train=*/is_training()); x = torch::relu(fc2->forward(x)); x = torch::log_softmax(fc3->forward(x), /*dim=*/1); return x; } }; -int main() { - // Init network - auto net = std::make_shared(); +int main(int argc, char** argv) { + // Init model + std::shared_ptr net = std::make_shared(); + if (argc == 3) { + torch::load(net, argv[1]); + } else if (argc > 3 || argc < 2) { + std::cerr << "Wrong number of arguments" << std::endl; + } // Multi-threaded data loader for the MNIST dataset. auto data_loader = torch::data::make_data_loader( torch::data::datasets::MNIST("./data").map( torch::data::transforms::Stack<>()), - /*batch_size=*/64); + BATCH_SIZE); // Init optimizer torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01); // Train loop - for (size_t epoch = 1; epoch <= 10; ++epoch) { // epoch loop + for (size_t epoch = 1; epoch <= N_EPOCHS; ++epoch) { // epoch loop size_t batch_index = 0; for (auto& batch : *data_loader) { // batch loop optimizer.zero_grad(); // reset gradients @@ -46,7 +55,7 @@ int main() { std::cout << "Epoch: " << epoch << " | Batch: " << batch_index << " | Loss: " << loss.item() << std::endl; // Checkpoint model - torch::save(net, "net.pt"); + torch::save(net, argv[2]); } } } From 47786ff50cf5230b9e6b1f894fb5dd6b6450256c Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 19 Jan 2022 13:48:42 +0000 Subject: [PATCH 11/66] Env --- mnist-cpp/environment.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 mnist-cpp/environment.yaml diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml new file mode 100644 index 0000000..39d69a0 --- /dev/null +++ b/mnist-cpp/environment.yaml @@ -0,0 +1,10 @@ +name: default + +channels: + - conda-forge + +dependencies: + - python=3.9 + - pip=21.3.1 + - pip: + - numpy==1.22.1 \ No newline at end of file From 6d99feae6c43934f59355964bdb8133579049176 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 19 Jan 2022 15:12:07 +0100 Subject: [PATCH 12/66] Add conda --- mnist-cpp/.devcontainer/Dockerfile | 21 +++++++++++++++++++++ mnist-cpp/environment.yaml | 1 + 2 files changed, 22 insertions(+) diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile index ed69a25..edc79f6 100644 --- a/mnist-cpp/.devcontainer/Dockerfile +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -7,6 +7,7 @@ ARG CMAKE_VERSION=3.16.9 ARG TORCH_VERSION=1.10.1 ARG NUMCPP_VERSION=2.6.2 ARG DOCKER_VERSION=19.03.9 +ARG CONDA_VERSION=4.9.2 # Non-root user with sudo access ARG USERNAME=default @@ -60,6 +61,13 @@ RUN apt-get update \ && unzip libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && rm -r libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && popd \ + # + # Install conda + && wget https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ + && bash Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh -b -p /opt/conda \ + && rm -f Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ + && ln -s /opt/conda/bin/conda /bin/conda \ + && ln -s /opt/conda/bin/conda-env /bin/conda-env \ # # Create a non-root user to use if preferred && groupadd --gid $USER_GID $USERNAME \ @@ -72,6 +80,19 @@ RUN apt-get update \ && apt-get clean -y \ && rm -rf /var/lib/apt/lists/* +# Setup default environment +COPY environment.yaml /tmp/environment.yaml +RUN conda env create -f /tmp/environment.yaml \ + && rm /tmp/environment.yaml \ + && conda run -n default python -m ipykernel install --name default \ + && chown default /opt/conda/envs/default + +# Init conda for non-root user +USER $USERNAME +RUN conda init bash \ + && conda config --set auto_activate_base false \ + && echo "conda activate default" >> ~/.bashrc + # Set working directory WORKDIR /home/$USERNAME diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index 39d69a0..2d10c29 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -6,5 +6,6 @@ channels: dependencies: - python=3.9 - pip=21.3.1 + - ipykernel - pip: - numpy==1.22.1 \ No newline at end of file From 6173eb20dbf285fb944e1fa8c9913740fe22560d Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 19 Jan 2022 14:15:39 +0000 Subject: [PATCH 13/66] pin ipykernel --- mnist-cpp/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index 2d10c29..d3b988d 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -6,6 +6,6 @@ channels: dependencies: - python=3.9 - pip=21.3.1 - - ipykernel + - ipykernel=6.7.0 - pip: - numpy==1.22.1 \ No newline at end of file From 7db9a5291d5a3c2778492712de1fa58712f59a55 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 19 Jan 2022 14:30:41 +0000 Subject: [PATCH 14/66] add packages --- mnist-cpp/environment.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index d3b988d..a9e2d27 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -8,4 +8,5 @@ dependencies: - pip=21.3.1 - ipykernel=6.7.0 - pip: - - numpy==1.22.1 \ No newline at end of file + - numpy==1.22.1 + - python-mnist==0.7 \ No newline at end of file From 5dafe028183664bf46a676b4ce62b571529bf14d Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 19 Jan 2022 15:53:30 +0000 Subject: [PATCH 15/66] Splitting --- mnist-cpp/main.cpp | 27 ++++++++----- mnist-cpp/test.ipynb | 95 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 mnist-cpp/test.ipynb diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp index d4a1c10..aa87c6b 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/main.cpp @@ -25,10 +25,14 @@ struct Net : torch::nn::Module { int main(int argc, char** argv) { // Init model + std::string out_path; std::shared_ptr net = std::make_shared(); if (argc == 3) { torch::load(net, argv[1]); - } else if (argc > 3 || argc < 2) { + out_path = argv[2]; + } else if (argc == 2) { + out_path = argv[1]; + } else { std::cerr << "Wrong number of arguments" << std::endl; } @@ -41,22 +45,27 @@ int main(int argc, char** argv) { // Init optimizer torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01); + size_t n_splits = std::stoi(std::getenv("N_SPLITS")); + size_t split = std::stoi(std::getenv("SPLIT")); + // Train loop for (size_t epoch = 1; epoch <= N_EPOCHS; ++epoch) { // epoch loop size_t batch_index = 0; for (auto& batch : *data_loader) { // batch loop - optimizer.zero_grad(); // reset gradients - torch::Tensor prediction = net->forward(batch.data); // forward pass - torch::Tensor loss = torch::nll_loss(prediction, batch.target); // compute loss - loss.backward(); // backprop - optimizer.step(); // update params - if (++batch_index % 100 == 0) { // every 100 baches + if (batch_index % n_splits == split) { + optimizer.zero_grad(); // reset gradients + torch::Tensor prediction = net->forward(batch.data); // forward pass + torch::Tensor loss = torch::nll_loss(prediction, batch.target); // compute loss + loss.backward(); // backprop + optimizer.step(); // update params // Print logs std::cout << "Epoch: " << epoch << " | Batch: " << batch_index << " | Loss: " << loss.item() << std::endl; - // Checkpoint model - torch::save(net, argv[2]); } + batch_index++; } } + + // Save + torch::save(net, out_path); } \ No newline at end of file diff --git a/mnist-cpp/test.ipynb b/mnist-cpp/test.ipynb new file mode 100644 index 0000000..73228da --- /dev/null +++ b/mnist-cpp/test.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from mnist import MNIST\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "mndata = MNIST('data')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "images, labels = mndata.load_training()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(images)[:1000].astype(np.ubyte).tofile('data0/train-images-idx3.ubyte')\n", + "np.array(images)[:1000].astype(np.ubyte).tofile('data0/train-labels-idx3.ubyte')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'list' object has no attribute 'shape'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [5]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mimages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'" + ] + } + ], + "source": [ + "images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "b19c504e0abe9af0b80e78d873495a79ef781516362c519732adc3cb489b6869" + }, + "kernelspec": { + "display_name": "Python 3.9.9 64-bit ('default': conda)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c8f864343d3867260edd61398659d2496db0eaf0 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 09:51:23 +0000 Subject: [PATCH 16/66] remove test --- mnist-cpp/test.ipynb | 95 -------------------------------------------- 1 file changed, 95 deletions(-) delete mode 100644 mnist-cpp/test.ipynb diff --git a/mnist-cpp/test.ipynb b/mnist-cpp/test.ipynb deleted file mode 100644 index 73228da..0000000 --- a/mnist-cpp/test.ipynb +++ /dev/null @@ -1,95 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "from mnist import MNIST\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "mndata = MNIST('data')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "images, labels = mndata.load_training()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "np.array(images)[:1000].astype(np.ubyte).tofile('data0/train-images-idx3.ubyte')\n", - "np.array(images)[:1000].astype(np.ubyte).tofile('data0/train-labels-idx3.ubyte')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'list' object has no attribute 'shape'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [5]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mimages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'" - ] - } - ], - "source": [ - "images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "interpreter": { - "hash": "b19c504e0abe9af0b80e78d873495a79ef781516362c519732adc3cb489b6869" - }, - "kernelspec": { - "display_name": "Python 3.9.9 64-bit ('default': conda)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.9" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From e1b879af0976847f0ccf4e1bddb758725774e47d Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 10:06:53 +0000 Subject: [PATCH 17/66] install fedn --- mnist-cpp/environment.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index a9e2d27..d3cfe65 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -8,5 +8,4 @@ dependencies: - pip=21.3.1 - ipykernel=6.7.0 - pip: - - numpy==1.22.1 - - python-mnist==0.7 \ No newline at end of file + - "-e git://github.com/scaleoutsystems/fedn.git@develop#egg=fedn&subdirectory=fedn" \ No newline at end of file From 07684ccf8e582d9087f5e9811cebd027c3f0e7d3 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 12:32:25 +0100 Subject: [PATCH 18/66] Add compose --- mnist-cpp/.devcontainer/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile index edc79f6..c835a30 100644 --- a/mnist-cpp/.devcontainer/Dockerfile +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -8,6 +8,7 @@ ARG TORCH_VERSION=1.10.1 ARG NUMCPP_VERSION=2.6.2 ARG DOCKER_VERSION=19.03.9 ARG CONDA_VERSION=4.9.2 +ARG COMPOSE_VERSION=1.29.2 # Non-root user with sudo access ARG USERNAME=default @@ -44,10 +45,12 @@ RUN apt-get update \ https://github.com/hadolint/hadolint/releases/download/${HADOLINT_VERSION}/hadolint-Linux-x86_64 \ && chmod +x /bin/hadolint \ # - # Install docker binary + # Install docker binaries && curl -L https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz | tar xvz docker/docker \ && cp docker/docker /usr/local/bin \ && rm -R docker \ + && curl -L https://github.com/docker/compose/releases/download/${COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose \ + && chmod +x /usr/local/bin/docker-compose \ # # Install cmake && wget -q -O cmake-linux.sh https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh \ From 5a37141c3178224617b74d2a69aa4b3648a0e009 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 12:35:31 +0100 Subject: [PATCH 19/66] conda entrypoint --- mnist-cpp/.devcontainer/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/.devcontainer/Dockerfile index c835a30..7ae1bb5 100644 --- a/mnist-cpp/.devcontainer/Dockerfile +++ b/mnist-cpp/.devcontainer/Dockerfile @@ -100,4 +100,7 @@ RUN conda init bash \ WORKDIR /home/$USERNAME # Switch back to dialog for any ad-hoc use of apt-get -ENV DEBIAN_FRONTEND=dialog \ No newline at end of file +ENV DEBIAN_FRONTEND=dialog + +# Add entrypoint to conda environment for commands +ENTRYPOINT ["conda", "run", "-n", "default"] \ No newline at end of file From 2a035066839a3ab03f51a059c1fdc8ac26b7ca3c Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 12:45:11 +0100 Subject: [PATCH 20/66] refactory --- mnist-cpp/.gitignore | 2 +- mnist-cpp/{.devcontainer => }/Dockerfile | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename mnist-cpp/{.devcontainer => }/Dockerfile (100%) diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 6518fab..6681a60 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -2,4 +2,4 @@ build .vscode data *.pt -devcontainer.json \ No newline at end of file +.devcontainer \ No newline at end of file diff --git a/mnist-cpp/.devcontainer/Dockerfile b/mnist-cpp/Dockerfile similarity index 100% rename from mnist-cpp/.devcontainer/Dockerfile rename to mnist-cpp/Dockerfile From ff7cdbbbcf7d90434089c07a3779a437dce1a20c Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 12:46:45 +0100 Subject: [PATCH 21/66] no need to change user --- mnist-cpp/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index 7ae1bb5..e1d43fd 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -96,9 +96,6 @@ RUN conda init bash \ && conda config --set auto_activate_base false \ && echo "conda activate default" >> ~/.bashrc -# Set working directory -WORKDIR /home/$USERNAME - # Switch back to dialog for any ad-hoc use of apt-get ENV DEBIAN_FRONTEND=dialog From 2498a94bb8e49b0f159b099a989a18302fba367d Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 15:13:03 +0000 Subject: [PATCH 22/66] Docker compose --- mnist-cpp/Dockerfile | 10 +++- mnist-cpp/config/settings-reducer.yaml | 24 +++++++++ mnist-cpp/docker-compose.yml | 69 ++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 mnist-cpp/config/settings-reducer.yaml create mode 100644 mnist-cpp/docker-compose.yml diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index e1d43fd..61402a4 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -78,17 +78,25 @@ RUN apt-get update \ && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ && chmod 0440 /etc/sudoers.d/$USERNAME \ # + # Configure FEDn directories + && mkdir -p /opt/FEDn/app/certs \ + && mkdir -p /opt/FEDn/app/config \ + && chown -R $USERNAME /opt/FEDn/app \ + # # Cleanup && apt-get autoremove -y \ && apt-get clean -y \ && rm -rf /var/lib/apt/lists/* +# Copy FEDn config +COPY config/settings-reducer.yaml /opt/FEDn/app/config + # Setup default environment COPY environment.yaml /tmp/environment.yaml RUN conda env create -f /tmp/environment.yaml \ && rm /tmp/environment.yaml \ && conda run -n default python -m ipykernel install --name default \ - && chown default /opt/conda/envs/default + && chown $USERNAME /opt/conda/envs/default # Init conda for non-root user USER $USERNAME diff --git a/mnist-cpp/config/settings-reducer.yaml b/mnist-cpp/config/settings-reducer.yaml new file mode 100644 index 0000000..ad76386 --- /dev/null +++ b/mnist-cpp/config/settings-reducer.yaml @@ -0,0 +1,24 @@ +network_id: fedn-test-network + +control: + state: idle + helper: pytorch + +statestore: + type: MongoDB + mongo_config: + username: fedn_admin + password: password + host: mongo + port: 6534 + +storage: + storage_type: S3 + storage_config: + storage_hostname: minio + storage_port: 9000 + storage_access_key: fedn_admin + storage_secret_key: password + storage_bucket: fedn-models + context_bucket: fedn-context + storage_secure_mode: False \ No newline at end of file diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml new file mode 100644 index 0000000..280d55c --- /dev/null +++ b/mnist-cpp/docker-compose.yml @@ -0,0 +1,69 @@ +version: '3.3' + +# Setup network +networks: + default: + name: fedn_default + +services: + # Base services + minio: + image: minio/minio:latest + hostname: minio + environment: + - GET_HOSTS_FROM=dns + - MINIO_HOST=minio + - MINIO_PORT=9000 + - MINIO_ROOT_USER=fedn_admin + - MINIO_ROOT_PASSWORD=password + command: server /data --console-address minio:9001 + healthcheck: + test: ["CMD", "curl", "-f", "http://minio:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + ports: + - 9000:9000 + - 9001:9001 + volumes: + - minio-data:/data + + mongo: + image: mongo:latest + restart: always + environment: + - MONGO_INITDB_ROOT_USERNAME=fedn_admin + - MONGO_INITDB_ROOT_PASSWORD=password + ports: + - 6534:6534 + command: mongod --port 6534 + + mongo-express: + image: mongo-express:latest + restart: always + depends_on: + - "mongo" + environment: + - ME_CONFIG_MONGODB_SERVER=mongo + - ME_CONFIG_MONGODB_PORT=6534 + - ME_CONFIG_MONGODB_ADMINUSERNAME=fedn_admin + - ME_CONFIG_MONGODB_ADMINPASSWORD=password + - ME_CONFIG_BASICAUTH_USERNAME=fedn_admin + - ME_CONFIG_BASICAUTH_PASSWORD=password + ports: + - 8081:8081 + + # Reducer + reducer: + environment: + - GET_HOSTS_FROM=dns + - USER=test + - PROJECT=project + build: . + working_dir: /opt/FEDn/app + command: fedn run reducer -n reducer --init=config/settings-reducer.yaml + ports: + - 8090:8090 + +volumes: # add config here to persist data across run + minio-data: \ No newline at end of file From 0ea532c392dce1a1a3d70fc2217b10f795b35e85 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 20 Jan 2022 15:53:21 +0000 Subject: [PATCH 23/66] Compose working --- mnist-cpp/Dockerfile | 2 +- mnist-cpp/config/settings-client.yaml | 5 +++++ mnist-cpp/config/settings-combiner.yaml | 11 +++++++++++ mnist-cpp/docker-compose.yml | 21 ++++++++++++++++++++- 4 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 mnist-cpp/config/settings-client.yaml create mode 100644 mnist-cpp/config/settings-combiner.yaml diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index 61402a4..a3f4dde 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -89,7 +89,7 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* # Copy FEDn config -COPY config/settings-reducer.yaml /opt/FEDn/app/config +COPY config/*.yaml /opt/FEDn/app/config/ # Setup default environment COPY environment.yaml /tmp/environment.yaml diff --git a/mnist-cpp/config/settings-client.yaml b/mnist-cpp/config/settings-client.yaml new file mode 100644 index 0000000..cbd0ae1 --- /dev/null +++ b/mnist-cpp/config/settings-client.yaml @@ -0,0 +1,5 @@ +network_id: reducer-network +controller: + discover_host: reducer + discover_port: 8090 + token: reducer_token \ No newline at end of file diff --git a/mnist-cpp/config/settings-combiner.yaml b/mnist-cpp/config/settings-combiner.yaml new file mode 100644 index 0000000..80d90b0 --- /dev/null +++ b/mnist-cpp/config/settings-combiner.yaml @@ -0,0 +1,11 @@ +network_id: fedn-test-network +controller: + discover_host: reducer + discover_port: 8090 + token: token + +combiner: + name: combiner + host: combiner + port: 12080 + max_clients: 30 \ No newline at end of file diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index 280d55c..14e8cba 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -63,7 +63,26 @@ services: working_dir: /opt/FEDn/app command: fedn run reducer -n reducer --init=config/settings-reducer.yaml ports: - - 8090:8090 + - 8090:8090 + + # Combiner + combiner: + environment: + - PYTHONUNBUFFERED=0 + - GET_HOSTS_FROM=dns + build: . + working_dir: /opt/FEDn/app + command: fedn run combiner -in config/settings-combiner.yaml + ports: + - 12080:12080 + + # Client + client: + environment: + - GET_HOSTS_FROM=dns + build: . + working_dir: /opt/FEDn/app + command: fedn run client -in config/settings-client.yaml volumes: # add config here to persist data across run minio-data: \ No newline at end of file From d43bb68b77b18c623fe70fbd7a1061be4a2be937 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 21 Jan 2022 10:55:09 +0100 Subject: [PATCH 24/66] Reducer code --- mnist-cpp/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index d3cfe65..eb0bf0d 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -8,4 +8,4 @@ dependencies: - pip=21.3.1 - ipykernel=6.7.0 - pip: - - "-e git://github.com/scaleoutsystems/fedn.git@develop#egg=fedn&subdirectory=fedn" \ No newline at end of file + - "-e git://github.com/scaleoutsystems/fedn.git@master#egg=fedn&subdirectory=fedn" \ No newline at end of file From 237c94ea4e63e165fafb7e2e5c2114177389882a Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 21 Jan 2022 14:57:44 +0000 Subject: [PATCH 25/66] compose working --- mnist-cpp/Dockerfile | 11 ++++++----- mnist-cpp/docker-compose.yml | 6 +++--- mnist-cpp/environment.yaml | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index a3f4dde..5d557fe 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -79,9 +79,10 @@ RUN apt-get update \ && chmod 0440 /etc/sudoers.d/$USERNAME \ # # Configure FEDn directories - && mkdir -p /opt/FEDn/app/certs \ - && mkdir -p /opt/FEDn/app/config \ - && chown -R $USERNAME /opt/FEDn/app \ + && mkdir -p /app/certs \ + && mkdir -p /app/client/package \ + && mkdir -p /app/config \ + && chown -R $USERNAME /app \ # # Cleanup && apt-get autoremove -y \ @@ -89,7 +90,7 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* # Copy FEDn config -COPY config/*.yaml /opt/FEDn/app/config/ +COPY config/*.yaml /app/config/ # Setup default environment COPY environment.yaml /tmp/environment.yaml @@ -108,4 +109,4 @@ RUN conda init bash \ ENV DEBIAN_FRONTEND=dialog # Add entrypoint to conda environment for commands -ENTRYPOINT ["conda", "run", "-n", "default"] \ No newline at end of file +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "default"] \ No newline at end of file diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index 14e8cba..af3c8ba 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -60,7 +60,7 @@ services: - USER=test - PROJECT=project build: . - working_dir: /opt/FEDn/app + working_dir: /app command: fedn run reducer -n reducer --init=config/settings-reducer.yaml ports: - 8090:8090 @@ -71,7 +71,7 @@ services: - PYTHONUNBUFFERED=0 - GET_HOSTS_FROM=dns build: . - working_dir: /opt/FEDn/app + working_dir: /app command: fedn run combiner -in config/settings-combiner.yaml ports: - 12080:12080 @@ -81,7 +81,7 @@ services: environment: - GET_HOSTS_FROM=dns build: . - working_dir: /opt/FEDn/app + working_dir: /app command: fedn run client -in config/settings-client.yaml volumes: # add config here to persist data across run diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index eb0bf0d..5ce1d09 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -8,4 +8,4 @@ dependencies: - pip=21.3.1 - ipykernel=6.7.0 - pip: - - "-e git://github.com/scaleoutsystems/fedn.git@master#egg=fedn&subdirectory=fedn" \ No newline at end of file + - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" \ No newline at end of file From 6b02481954c2c5256d123f8d908aede42c1bf245 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 21 Jan 2022 15:00:37 +0000 Subject: [PATCH 26/66] minor refactory --- mnist-cpp/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/main.cpp b/mnist-cpp/main.cpp index aa87c6b..48ddd9c 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/main.cpp @@ -59,7 +59,7 @@ int main(int argc, char** argv) { loss.backward(); // backprop optimizer.step(); // update params // Print logs - std::cout << "Epoch: " << epoch << " | Batch: " << batch_index + std::cerr << "Epoch: " << epoch << " | Batch: " << batch_index << " | Loss: " << loss.item() << std::endl; } batch_index++; From d266d38f8f804cbaf61c427aa3937fbf64587c2e Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 24 Jan 2022 14:23:36 +0000 Subject: [PATCH 27/66] Binaries working --- mnist-cpp/.gitignore | 3 +- mnist-cpp/CMakeLists.txt | 11 ++++++-- mnist-cpp/metric.json | 1 + mnist-cpp/net.h | 21 ++++++++++++++ mnist-cpp/{main.cpp => train.cpp} | 46 +++++++++++-------------------- mnist-cpp/validate.cpp | 32 +++++++++++++++++++++ 6 files changed, 80 insertions(+), 34 deletions(-) create mode 100644 mnist-cpp/metric.json create mode 100644 mnist-cpp/net.h rename mnist-cpp/{main.cpp => train.cpp} (57%) create mode 100644 mnist-cpp/validate.cpp diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 6681a60..7af4031 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -2,4 +2,5 @@ build .vscode data *.pt -.devcontainer \ No newline at end of file +.devcontainer +./*.json \ No newline at end of file diff --git a/mnist-cpp/CMakeLists.txt b/mnist-cpp/CMakeLists.txt index f916bcf..a016b73 100644 --- a/mnist-cpp/CMakeLists.txt +++ b/mnist-cpp/CMakeLists.txt @@ -4,6 +4,11 @@ project(mnist-cpp) list(APPEND CMAKE_PREFIX_PATH "$ENV{TORCH_DIR}/libtorch") find_package(Torch REQUIRED) -add_executable(main main.cpp) -target_link_libraries(main "${TORCH_LIBRARIES}") -set_property(TARGET main PROPERTY CXX_STANDARD 14) \ No newline at end of file +set(HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/net.h") + +foreach(_target + train validate) + add_executable(${_target} "${_target}.cpp" "${HEADER_FILES}") + target_link_libraries(${_target} "${TORCH_LIBRARIES}") + set_property(TARGET ${_target} PROPERTY CXX_STANDARD 14) +endforeach() \ No newline at end of file diff --git a/mnist-cpp/metric.json b/mnist-cpp/metric.json new file mode 100644 index 0000000..d2f8881 --- /dev/null +++ b/mnist-cpp/metric.json @@ -0,0 +1 @@ +{"loss": 0.365214, "acc": 0.8923} \ No newline at end of file diff --git a/mnist-cpp/net.h b/mnist-cpp/net.h new file mode 100644 index 0000000..65139c7 --- /dev/null +++ b/mnist-cpp/net.h @@ -0,0 +1,21 @@ +#include + +#define DROPOUT 0.5 + +struct Net : torch::nn::Module { + torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; + + Net() { + fc1 = register_module("fc1", torch::nn::Linear(784, 64)); + fc2 = register_module("fc2", torch::nn::Linear(64, 32)); + fc3 = register_module("fc3", torch::nn::Linear(32, 10)); + } + + torch::Tensor forward(torch::Tensor x) { + x = torch::relu(fc1->forward(x.reshape({x.size(0), 784}))); + x = torch::dropout(x, /*p=*/DROPOUT, /*train=*/is_training()); + x = torch::relu(fc2->forward(x)); + x = torch::log_softmax(fc3->forward(x), /*dim=*/1); + return x; + } +}; \ No newline at end of file diff --git a/mnist-cpp/main.cpp b/mnist-cpp/train.cpp similarity index 57% rename from mnist-cpp/main.cpp rename to mnist-cpp/train.cpp index 48ddd9c..1e08ce6 100644 --- a/mnist-cpp/main.cpp +++ b/mnist-cpp/train.cpp @@ -1,41 +1,28 @@ #include +#include "net.h" -#define DROPOUT 0.5 #define BATCH_SIZE 64 #define N_EPOCHS 10 - -// Define neural network -struct Net : torch::nn::Module { - torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; - - Net() { - fc1 = register_module("fc1", torch::nn::Linear(784, 64)); - fc2 = register_module("fc2", torch::nn::Linear(64, 32)); - fc3 = register_module("fc3", torch::nn::Linear(32, 10)); - } - - torch::Tensor forward(torch::Tensor x) { - x = torch::relu(fc1->forward(x.reshape({x.size(0), 784}))); - x = torch::dropout(x, /*p=*/DROPOUT, /*train=*/is_training()); - x = torch::relu(fc2->forward(x)); - x = torch::log_softmax(fc3->forward(x), /*dim=*/1); - return x; - } -}; +#define LEARNING_RATE 0.01 int main(int argc, char** argv) { // Init model std::string out_path; std::shared_ptr net = std::make_shared(); - if (argc == 3) { - torch::load(net, argv[1]); - out_path = argv[2]; + if (argc == 3) { // if 3 args + torch::load(net, argv[1]); // load from arg 1 + out_path = argv[2]; // save to arg 2 } else if (argc == 2) { - out_path = argv[1]; + out_path = argv[1]; // save to arg 1, no initial model } else { std::cerr << "Wrong number of arguments" << std::endl; + exit(1); } + // Get other params from environment vars + size_t n_splits = std::stoi(std::getenv("N_SPLITS")); + size_t split = std::stoi(std::getenv("SPLIT")); + // Multi-threaded data loader for the MNIST dataset. auto data_loader = torch::data::make_data_loader( torch::data::datasets::MNIST("./data").map( @@ -43,10 +30,7 @@ int main(int argc, char** argv) { BATCH_SIZE); // Init optimizer - torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01); - - size_t n_splits = std::stoi(std::getenv("N_SPLITS")); - size_t split = std::stoi(std::getenv("SPLIT")); + torch::optim::SGD optimizer(net->parameters(), LEARNING_RATE); // Train loop for (size_t epoch = 1; epoch <= N_EPOCHS; ++epoch) { // epoch loop @@ -59,8 +43,10 @@ int main(int argc, char** argv) { loss.backward(); // backprop optimizer.step(); // update params // Print logs - std::cerr << "Epoch: " << epoch << " | Batch: " << batch_index - << " | Loss: " << loss.item() << std::endl; + if(batch_index % (100 + split) == 0) { + std::cerr << "Epoch: " << epoch << " | Batch: " << batch_index + << " | Loss: " << loss.item() << std::endl; + } } batch_index++; } diff --git a/mnist-cpp/validate.cpp b/mnist-cpp/validate.cpp new file mode 100644 index 0000000..382f5df --- /dev/null +++ b/mnist-cpp/validate.cpp @@ -0,0 +1,32 @@ +#include +#include "net.h" + +int main(int argc, char** argv) { + + // Init model + std::string out_path; + std::shared_ptr net = std::make_shared(); + if (argc == 3) { + torch::load(net, argv[1]); // load from arg 1 + out_path = argv[2]; // save json to arg 2 + } else { + std::cerr << "Wrong number of arguments" << std::endl; + exit(1); + } + + // Load data + auto images = torch::data::datasets::MNIST("./data").map( + torch::data::transforms::Stack<>()).dataset().images(); + auto targets = torch::data::datasets::MNIST("./data").map( + torch::data::transforms::Stack<>()).dataset().targets(); + + // Compute metrics + auto prediction = net->forward(images); // forward pass + auto loss = torch::nll_loss(prediction, targets); // compute loss + auto acc = torch::mean((std::get<1>(prediction.max(1))==targets).to(torch::kFloat)); // compute acc + + // Print to file + std::ofstream out(out_path); + out << "{" << "\"loss\": " << loss.item() << ", \"acc\": " << acc.item() << "}"; + out.close(); +} \ No newline at end of file From eef1ee1d8710e19907347c4faf74589ee68c536c Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 24 Jan 2022 14:34:43 +0000 Subject: [PATCH 28/66] fix --- mnist-cpp/metric.json | 1 - mnist-cpp/validate.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 mnist-cpp/metric.json diff --git a/mnist-cpp/metric.json b/mnist-cpp/metric.json deleted file mode 100644 index d2f8881..0000000 --- a/mnist-cpp/metric.json +++ /dev/null @@ -1 +0,0 @@ -{"loss": 0.365214, "acc": 0.8923} \ No newline at end of file diff --git a/mnist-cpp/validate.cpp b/mnist-cpp/validate.cpp index 382f5df..4ae9304 100644 --- a/mnist-cpp/validate.cpp +++ b/mnist-cpp/validate.cpp @@ -15,9 +15,9 @@ int main(int argc, char** argv) { } // Load data - auto images = torch::data::datasets::MNIST("./data").map( + auto images = torch::data::datasets::MNIST("./data", torch::data::datasets::MNIST::Mode::kTest).map( torch::data::transforms::Stack<>()).dataset().images(); - auto targets = torch::data::datasets::MNIST("./data").map( + auto targets = torch::data::datasets::MNIST("./data", torch::data::datasets::MNIST::Mode::kTest).map( torch::data::transforms::Stack<>()).dataset().targets(); // Compute metrics From 3266f4f39faed9a0fc096d148ba41bd043feacfa Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 24 Jan 2022 14:42:21 +0000 Subject: [PATCH 29/66] ignore --- mnist-cpp/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 7af4031..94402b9 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -3,4 +3,4 @@ build data *.pt .devcontainer -./*.json \ No newline at end of file +/*.json \ No newline at end of file From 82f9de39d6165a8fc40a98bbbae01169d8c2bda2 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 24 Jan 2022 15:13:39 +0000 Subject: [PATCH 30/66] build package --- mnist-cpp/.gitignore | 5 ++++- mnist-cpp/bin/build.sh | 20 +++++++++++++++++++- mnist-cpp/client/fedn.yaml | 5 +++++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 mnist-cpp/client/fedn.yaml diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 94402b9..c96dcbc 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -3,4 +3,7 @@ build data *.pt .devcontainer -/*.json \ No newline at end of file +/*.json +train +validate +package \ No newline at end of file diff --git a/mnist-cpp/bin/build.sh b/mnist-cpp/bin/build.sh index 033d119..79cc894 100755 --- a/mnist-cpp/bin/build.sh +++ b/mnist-cpp/bin/build.sh @@ -1,2 +1,20 @@ #!/bin/bash -/bin/cmake --build /examples/mnist-cpp/build --config Debug --target all -j 14 -- \ No newline at end of file +# Configure +cmake --no-warn-unused-cli \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/x86_64-linux-gnu-gcc-11 \ + -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/x86_64-linux-gnu-g++-11 \ + -H$PWD \ + -B$PWD/build \ + -G "Unix Makefiles" + +# Build +cmake --build $PWD/build --config Debug --target all -j $(nproc) -- + +# Copy binaries to right folder +cp build/train build/validate client + +# Make package +mkdir -p package +tar -czvf package/package.tar.gz client \ No newline at end of file diff --git a/mnist-cpp/client/fedn.yaml b/mnist-cpp/client/fedn.yaml new file mode 100644 index 0000000..cf79039 --- /dev/null +++ b/mnist-cpp/client/fedn.yaml @@ -0,0 +1,5 @@ +entry_points: + train: + command: N_SPLITS=1 SPLIT=0 train + validate: + command: validate \ No newline at end of file From 7c73994628834fd0721a7351cc20abc36cd1fe4e Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 25 Jan 2022 14:18:06 +0000 Subject: [PATCH 31/66] refining package --- mnist-cpp/.gitignore | 6 +++-- mnist-cpp/bin/train_seed.sh | 8 ++++++ mnist-cpp/client/fedn.yaml | 4 +-- mnist-cpp/client/helper.py | 51 ++++++++++++++++++++++++++++++++++++ mnist-cpp/client/train.sh | 16 +++++++++++ mnist-cpp/client/validate.sh | 13 +++++++++ mnist-cpp/docker-compose.yml | 7 ++--- mnist-cpp/environment.yaml | 4 ++- 8 files changed, 99 insertions(+), 10 deletions(-) create mode 100755 mnist-cpp/bin/train_seed.sh create mode 100644 mnist-cpp/client/helper.py create mode 100755 mnist-cpp/client/train.sh create mode 100755 mnist-cpp/client/validate.sh diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index c96dcbc..271114d 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -3,7 +3,9 @@ build data *.pt .devcontainer -/*.json +/**/*.json train validate -package \ No newline at end of file +package +*.npz +src/fedn \ No newline at end of file diff --git a/mnist-cpp/bin/train_seed.sh b/mnist-cpp/bin/train_seed.sh new file mode 100755 index 0000000..92455d6 --- /dev/null +++ b/mnist-cpp/bin/train_seed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e + +# Train +N_SPLITS=1 SPLIT=0 client/train seed.pt + +# Make npz +python client/helper.py pt2np seed.pt seed \ No newline at end of file diff --git a/mnist-cpp/client/fedn.yaml b/mnist-cpp/client/fedn.yaml index cf79039..ca59c7c 100644 --- a/mnist-cpp/client/fedn.yaml +++ b/mnist-cpp/client/fedn.yaml @@ -1,5 +1,5 @@ entry_points: train: - command: N_SPLITS=1 SPLIT=0 train + command: N_SPLITS=1 SPLIT=0 ./train.sh validate: - command: validate \ No newline at end of file + command: ./validate.sh \ No newline at end of file diff --git a/mnist-cpp/client/helper.py b/mnist-cpp/client/helper.py new file mode 100644 index 0000000..2b1c31e --- /dev/null +++ b/mnist-cpp/client/helper.py @@ -0,0 +1,51 @@ +import fire +import collections + +from fedn.utils.pytorchhelper import PytorchHelper + +import torch +from torch import nn +from torch.nn import functional as F + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.fc1 = nn.Linear(784, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, 10) + + def forward(self, x): + x = F.relu(self.fc1(x.reshape(x.size(0), 784))) + x = F.dropout(x,p=0.5, training=self.training) + x = F.relu(self.fc2(x)) + x = F.log_softmax(self.fc3(x), dim=1) + return x + +def np2pt(np_path, pt_path): + # Load weights + helper = PytorchHelper() + weights_np = helper.load_model(np_path) + weights = collections.OrderedDict() + for w in weights_np: + weights[w] = torch.tensor(weights_np[w]) + + # Save model + model = Net() + model.load_state_dict(weights) + model.eval() + torch.jit.script(model).save(pt_path) + +def pt2np(pt_path, np_path): + # Load weights + weights = torch.jit.load(pt_path).state_dict() + weights_np = collections.OrderedDict() + for w in weights: + weights_np[w] = weights[w].cpu().detach().numpy() + + # Save + helper = PytorchHelper() + helper.save_model(weights_np, np_path) + +if __name__ == '__main__': + fire.Fire() \ No newline at end of file diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh new file mode 100755 index 0000000..4f00375 --- /dev/null +++ b/mnist-cpp/client/train.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +# Parse args +model_in="$1" +model_in_name="$(basename ${model_in%.*})" +model_out="$2" + +# Convert npz to pt +python helper.py np2pt "$model_in" "$model_in_name".pt + +# Train +N_SPLITS=1 SPLIT=0 ./train "$model_in_name".pt "$model_in_name".retrain.pt + +# Convert pt to npz +python helper.py pt2np "$model_in_name".retrain.pt "$model_out" \ No newline at end of file diff --git a/mnist-cpp/client/validate.sh b/mnist-cpp/client/validate.sh new file mode 100755 index 0000000..6c388c7 --- /dev/null +++ b/mnist-cpp/client/validate.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +# Parse args +model_in="$1" +model_in_name="$(basename ${model_in%.*})" +json_out="$2" + +# Convert npz to pt +python helper.py np2pt "$model_in" "$model_in_name".pt + +# Train +./validate "$model_in_name".pt "$json_out" \ No newline at end of file diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index af3c8ba..65f5fcc 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -25,8 +25,6 @@ services: ports: - 9000:9000 - 9001:9001 - volumes: - - minio-data:/data mongo: image: mongo:latest @@ -83,6 +81,5 @@ services: build: . working_dir: /app command: fedn run client -in config/settings-client.yaml - -volumes: # add config here to persist data across run - minio-data: \ No newline at end of file + volumes: + - ./data:/app/data \ No newline at end of file diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index 5ce1d09..866ffff 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -8,4 +8,6 @@ dependencies: - pip=21.3.1 - ipykernel=6.7.0 - pip: - - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" \ No newline at end of file + - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" + - torch==1.10.1 + - fire==0.3.1 \ No newline at end of file From 042744c64bbacfe23c777a372a3f209d4ceb6672 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 26 Jan 2022 16:10:13 +0100 Subject: [PATCH 32/66] configure app dir --- mnist-cpp/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index 5d557fe..d4d09c5 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -20,6 +20,7 @@ ENV DEBIAN_FRONTEND=noninteractive # Other env ENV TORCH_DIR=/opt/torch +ENV DATA_DIR=/app/data # Install apt deps SHELL ["/bin/bash", "-c"] @@ -84,6 +85,10 @@ RUN apt-get update \ && mkdir -p /app/config \ && chown -R $USERNAME /app \ # + # Configure data dir + && mkdir -p $DATA_DIR \ + && chown $USERNAME $DATA_DIR \ + # # Cleanup && apt-get autoremove -y \ && apt-get clean -y \ From 549982c9c7404c25c5aa88fab9784954002d60bf Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 26 Jan 2022 15:59:28 +0000 Subject: [PATCH 33/66] Working with one combiner --- mnist-cpp/.gitignore | 4 ++-- mnist-cpp/bin/download_data.sh | 3 +-- mnist-cpp/client/fedn.yaml | 2 +- mnist-cpp/client/train.sh | 2 +- mnist-cpp/config/settings-client.yaml | 5 ++--- mnist-cpp/config/settings-combiner.yaml | 1 - mnist-cpp/docker-compose.yml | 8 ++++++-- mnist-cpp/train.cpp | 7 ++++--- mnist-cpp/validate.cpp | 7 +++++-- 9 files changed, 22 insertions(+), 17 deletions(-) diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 271114d..043733b 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -1,6 +1,5 @@ build .vscode -data *.pt .devcontainer /**/*.json @@ -8,4 +7,5 @@ train validate package *.npz -src/fedn \ No newline at end of file +src/fedn +.env \ No newline at end of file diff --git a/mnist-cpp/bin/download_data.sh b/mnist-cpp/bin/download_data.sh index 2240252..a4fc344 100755 --- a/mnist-cpp/bin/download_data.sh +++ b/mnist-cpp/bin/download_data.sh @@ -1,7 +1,6 @@ #!/bin/bash -mkdir -p data -pushd data +pushd $DATA_DIR wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz diff --git a/mnist-cpp/client/fedn.yaml b/mnist-cpp/client/fedn.yaml index ca59c7c..fcfb597 100644 --- a/mnist-cpp/client/fedn.yaml +++ b/mnist-cpp/client/fedn.yaml @@ -1,5 +1,5 @@ entry_points: train: - command: N_SPLITS=1 SPLIT=0 ./train.sh + command: ./train.sh validate: command: ./validate.sh \ No newline at end of file diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index 4f00375..c32d49e 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -10,7 +10,7 @@ model_out="$2" python helper.py np2pt "$model_in" "$model_in_name".pt # Train -N_SPLITS=1 SPLIT=0 ./train "$model_in_name".pt "$model_in_name".retrain.pt +./train "$model_in_name".pt "$model_in_name".retrain.pt # Convert pt to npz python helper.py pt2np "$model_in_name".retrain.pt "$model_out" \ No newline at end of file diff --git a/mnist-cpp/config/settings-client.yaml b/mnist-cpp/config/settings-client.yaml index cbd0ae1..4254685 100644 --- a/mnist-cpp/config/settings-client.yaml +++ b/mnist-cpp/config/settings-client.yaml @@ -1,5 +1,4 @@ -network_id: reducer-network +network_id: fedn-test-network controller: discover_host: reducer - discover_port: 8090 - token: reducer_token \ No newline at end of file + discover_port: 8090 \ No newline at end of file diff --git a/mnist-cpp/config/settings-combiner.yaml b/mnist-cpp/config/settings-combiner.yaml index 80d90b0..3a19ad9 100644 --- a/mnist-cpp/config/settings-combiner.yaml +++ b/mnist-cpp/config/settings-combiner.yaml @@ -2,7 +2,6 @@ network_id: fedn-test-network controller: discover_host: reducer discover_port: 8090 - token: token combiner: name: combiner diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index 65f5fcc..dc4c619 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -27,7 +27,7 @@ services: - 9001:9001 mongo: - image: mongo:latest + image: mongo:5.0.2 restart: always environment: - MONGO_INITDB_ROOT_USERNAME=fedn_admin @@ -78,8 +78,12 @@ services: client: environment: - GET_HOSTS_FROM=dns + - N_SPLITS=1 + - SPLIT=0 build: . working_dir: /app command: fedn run client -in config/settings-client.yaml volumes: - - ./data:/app/data \ No newline at end of file + - ${DATA_DIR}:/app/data + deploy: + replicas: 0 \ No newline at end of file diff --git a/mnist-cpp/train.cpp b/mnist-cpp/train.cpp index 1e08ce6..b6a0148 100644 --- a/mnist-cpp/train.cpp +++ b/mnist-cpp/train.cpp @@ -20,12 +20,13 @@ int main(int argc, char** argv) { } // Get other params from environment vars - size_t n_splits = std::stoi(std::getenv("N_SPLITS")); - size_t split = std::stoi(std::getenv("SPLIT")); + size_t n_splits = std::stoi(std::getenv("N_SPLITS")); + size_t split = std::stoi(std::getenv("SPLIT")); + std::string data_dir = std::getenv("DATA_DIR"); // Multi-threaded data loader for the MNIST dataset. auto data_loader = torch::data::make_data_loader( - torch::data::datasets::MNIST("./data").map( + torch::data::datasets::MNIST(data_dir).map( torch::data::transforms::Stack<>()), BATCH_SIZE); diff --git a/mnist-cpp/validate.cpp b/mnist-cpp/validate.cpp index 4ae9304..5ef556e 100644 --- a/mnist-cpp/validate.cpp +++ b/mnist-cpp/validate.cpp @@ -13,11 +13,14 @@ int main(int argc, char** argv) { std::cerr << "Wrong number of arguments" << std::endl; exit(1); } + + // Parse env + std::string data_dir = std::getenv("DATA_DIR"); // Load data - auto images = torch::data::datasets::MNIST("./data", torch::data::datasets::MNIST::Mode::kTest).map( + auto images = torch::data::datasets::MNIST(data_dir, torch::data::datasets::MNIST::Mode::kTest).map( torch::data::transforms::Stack<>()).dataset().images(); - auto targets = torch::data::datasets::MNIST("./data", torch::data::datasets::MNIST::Mode::kTest).map( + auto targets = torch::data::datasets::MNIST(data_dir, torch::data::datasets::MNIST::Mode::kTest).map( torch::data::transforms::Stack<>()).dataset().targets(); // Compute metrics From 96739042ce81e0f6c0ab44b6f59b0aa589e54e14 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 27 Jan 2022 14:29:22 +0000 Subject: [PATCH 34/66] multicontainer support --- mnist-cpp/client/train.sh | 3 +++ mnist-cpp/docker-compose.yml | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index c32d49e..ed821c8 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -10,6 +10,9 @@ model_out="$2" python helper.py np2pt "$model_in" "$model_in_name".pt # Train +export N_SPLITS=$(sudo docker ps --format "{{ .Names }}" | grep client | wc -l) +SPLIT=$(sudo docker ps | grep $(hostname) | awk '{print substr($NF, length($NF), length($NF))}') +export SPLIT=$(($SPLIT - 1)) ./train "$model_in_name".pt "$model_in_name".retrain.pt # Convert pt to npz diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index dc4c619..ec7defb 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -78,12 +78,11 @@ services: client: environment: - GET_HOSTS_FROM=dns - - N_SPLITS=1 - - SPLIT=0 build: . working_dir: /app command: fedn run client -in config/settings-client.yaml volumes: - ${DATA_DIR}:/app/data + - /var/run/docker.sock:/var/run/docker.sock deploy: replicas: 0 \ No newline at end of file From 283ae16abc979305b76e872ca1aeef342f4b4c07 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 27 Jan 2022 14:51:07 +0000 Subject: [PATCH 35/66] consolidate build script --- mnist-cpp/bin/build.sh | 8 ++++++-- mnist-cpp/bin/train_seed.sh | 8 -------- 2 files changed, 6 insertions(+), 10 deletions(-) delete mode 100755 mnist-cpp/bin/train_seed.sh diff --git a/mnist-cpp/bin/build.sh b/mnist-cpp/bin/build.sh index 79cc894..b36fc9f 100755 --- a/mnist-cpp/bin/build.sh +++ b/mnist-cpp/bin/build.sh @@ -12,9 +12,13 @@ cmake --no-warn-unused-cli \ # Build cmake --build $PWD/build --config Debug --target all -j $(nproc) -- -# Copy binaries to right folder +# Copy binaries to the right folder cp build/train build/validate client # Make package mkdir -p package -tar -czvf package/package.tar.gz client \ No newline at end of file +tar -czvf package/package.tar.gz client + +# Make seed +SPLIT=0 N_SPLITS=1 build/train seed.pt +python client/helper.py pt2np seed.pt seed \ No newline at end of file diff --git a/mnist-cpp/bin/train_seed.sh b/mnist-cpp/bin/train_seed.sh deleted file mode 100755 index 92455d6..0000000 --- a/mnist-cpp/bin/train_seed.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -e - -# Train -N_SPLITS=1 SPLIT=0 client/train seed.pt - -# Make npz -python client/helper.py pt2np seed.pt seed \ No newline at end of file From 1513fba7945a25c52f363423f566c317301551f8 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 27 Jan 2022 14:51:45 +0000 Subject: [PATCH 36/66] vscode settings --- mnist-cpp/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 043733b..75939e0 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -1,5 +1,4 @@ build -.vscode *.pt .devcontainer /**/*.json From 5ed3035fc1c2156268341d658c909558570963b0 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 27 Jan 2022 16:10:55 +0000 Subject: [PATCH 37/66] Linting/formatting --- mnist-cpp/.clang-format | 2 + mnist-cpp/.gitignore | 3 +- mnist-cpp/.hadolint.yaml | 7 +++ mnist-cpp/.vscode/settings.json | 79 +++++++++++++++++++++++++++++++++ mnist-cpp/Dockerfile | 4 +- mnist-cpp/client/helper.py | 12 ++--- mnist-cpp/docker-compose.yml | 10 ++++- mnist-cpp/environment.yaml | 3 +- mnist-cpp/train.cpp | 36 ++++++++------- mnist-cpp/validate.cpp | 61 ++++++++++++++----------- 10 files changed, 163 insertions(+), 54 deletions(-) create mode 100644 mnist-cpp/.clang-format create mode 100644 mnist-cpp/.hadolint.yaml create mode 100644 mnist-cpp/.vscode/settings.json diff --git a/mnist-cpp/.clang-format b/mnist-cpp/.clang-format new file mode 100644 index 0000000..68740ba --- /dev/null +++ b/mnist-cpp/.clang-format @@ -0,0 +1,2 @@ +--- +BasedOnStyle: Google \ No newline at end of file diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 75939e0..a173e57 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -7,4 +7,5 @@ validate package *.npz src/fedn -.env \ No newline at end of file +.env +!.vscode/settings.json \ No newline at end of file diff --git a/mnist-cpp/.hadolint.yaml b/mnist-cpp/.hadolint.yaml new file mode 100644 index 0000000..e5ed1fb --- /dev/null +++ b/mnist-cpp/.hadolint.yaml @@ -0,0 +1,7 @@ +ignored: + - DL3008 + - DL4001 + - SC2046 + - DL4006 + - SC2164 + - DL3004 \ No newline at end of file diff --git a/mnist-cpp/.vscode/settings.json b/mnist-cpp/.vscode/settings.json new file mode 100644 index 0000000..78d7bec --- /dev/null +++ b/mnist-cpp/.vscode/settings.json @@ -0,0 +1,79 @@ +{ + "files.associations": { + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "chrono": "cpp", + "cinttypes": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "compare": "cpp", + "complex": "cpp", + "concepts": "cpp", + "condition_variable": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "forward_list": "cpp", + "list": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "netfwd": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "new": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "semaphore": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "stop_token": "cpp", + "streambuf": "cpp", + "thread": "cpp", + "typeindex": "cpp", + "typeinfo": "cpp", + "valarray": "cpp", + "variant": "cpp" + }, + "python.languageServer": "None", + "python.pythonPath": "/opt/conda/envs/default/bin/python", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": true + } +} \ No newline at end of file diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index d4d09c5..dbfe17b 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -61,13 +61,13 @@ RUN apt-get update \ # Install libtorch && mkdir $TORCH_DIR \ && pushd $TORCH_DIR \ - && wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip \ + && wget --progress=dot:giga https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip \ && unzip libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && rm -r libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && popd \ # # Install conda - && wget https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ + && wget --progress=dot:giga https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ && bash Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh -b -p /opt/conda \ && rm -f Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ && ln -s /opt/conda/bin/conda /bin/conda \ diff --git a/mnist-cpp/client/helper.py b/mnist-cpp/client/helper.py index 2b1c31e..3198fc7 100644 --- a/mnist-cpp/client/helper.py +++ b/mnist-cpp/client/helper.py @@ -1,9 +1,8 @@ -import fire import collections -from fedn.utils.pytorchhelper import PytorchHelper - +import fire import torch +from fedn.utils.pytorchhelper import PytorchHelper from torch import nn from torch.nn import functional as F @@ -17,11 +16,12 @@ def __init__(self): def forward(self, x): x = F.relu(self.fc1(x.reshape(x.size(0), 784))) - x = F.dropout(x,p=0.5, training=self.training) + x = F.dropout(x, p=0.5, training=self.training) x = F.relu(self.fc2(x)) x = F.log_softmax(self.fc3(x), dim=1) return x + def np2pt(np_path, pt_path): # Load weights helper = PytorchHelper() @@ -36,6 +36,7 @@ def np2pt(np_path, pt_path): model.eval() torch.jit.script(model).save(pt_path) + def pt2np(pt_path, np_path): # Load weights weights = torch.jit.load(pt_path).state_dict() @@ -47,5 +48,6 @@ def pt2np(pt_path, np_path): helper = PytorchHelper() helper.save_model(weights_np, np_path) + if __name__ == '__main__': - fire.Fire() \ No newline at end of file + fire.Fire() diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index ec7defb..a714738 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -18,7 +18,13 @@ services: - MINIO_ROOT_PASSWORD=password command: server /data --console-address minio:9001 healthcheck: - test: ["CMD", "curl", "-f", "http://minio:9000/minio/health/live"] + test: + [ + "CMD", + "curl", + "-f", + "http://minio:9000/minio/health/live" + ] interval: 30s timeout: 20s retries: 3 @@ -85,4 +91,4 @@ services: - ${DATA_DIR}:/app/data - /var/run/docker.sock:/var/run/docker.sock deploy: - replicas: 0 \ No newline at end of file + replicas: 0 diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index 866ffff..ad607a3 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -10,4 +10,5 @@ dependencies: - pip: - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" - torch==1.10.1 - - fire==0.3.1 \ No newline at end of file + - fire==0.3.1 + - autopep8==1.6.0 \ No newline at end of file diff --git a/mnist-cpp/train.cpp b/mnist-cpp/train.cpp index b6a0148..20464ab 100644 --- a/mnist-cpp/train.cpp +++ b/mnist-cpp/train.cpp @@ -1,19 +1,20 @@ #include + #include "net.h" #define BATCH_SIZE 64 #define N_EPOCHS 10 #define LEARNING_RATE 0.01 -int main(int argc, char** argv) { +int main(int argc, char **argv) { // Init model std::string out_path; std::shared_ptr net = std::make_shared(); - if (argc == 3) { // if 3 args - torch::load(net, argv[1]); // load from arg 1 - out_path = argv[2]; // save to arg 2 + if (argc == 3) { // if 3 args + torch::load(net, argv[1]); // load from arg 1 + out_path = argv[2]; // save to arg 2 } else if (argc == 2) { - out_path = argv[1]; // save to arg 1, no initial model + out_path = argv[1]; // save to arg 1, no initial model } else { std::cerr << "Wrong number of arguments" << std::endl; exit(1); @@ -25,26 +26,27 @@ int main(int argc, char** argv) { std::string data_dir = std::getenv("DATA_DIR"); // Multi-threaded data loader for the MNIST dataset. - auto data_loader = torch::data::make_data_loader( - torch::data::datasets::MNIST(data_dir).map( - torch::data::transforms::Stack<>()), - BATCH_SIZE); + auto data_loader = + torch::data::make_data_loader(torch::data::datasets::MNIST(data_dir).map( + torch::data::transforms::Stack<>()), + BATCH_SIZE); // Init optimizer torch::optim::SGD optimizer(net->parameters(), LEARNING_RATE); // Train loop - for (size_t epoch = 1; epoch <= N_EPOCHS; ++epoch) { // epoch loop + for (size_t epoch = 1; epoch <= N_EPOCHS; ++epoch) { // epoch loop size_t batch_index = 0; - for (auto& batch : *data_loader) { // batch loop + for (auto &batch : *data_loader) { // batch loop if (batch_index % n_splits == split) { - optimizer.zero_grad(); // reset gradients - torch::Tensor prediction = net->forward(batch.data); // forward pass - torch::Tensor loss = torch::nll_loss(prediction, batch.target); // compute loss - loss.backward(); // backprop - optimizer.step(); // update params + optimizer.zero_grad(); // reset gradients + torch::Tensor prediction = net->forward(batch.data); // forward pass + torch::Tensor loss = + torch::nll_loss(prediction, batch.target); // compute loss + loss.backward(); // backprop + optimizer.step(); // update params // Print logs - if(batch_index % (100 + split) == 0) { + if (batch_index % (100 + split) == 0) { std::cerr << "Epoch: " << epoch << " | Batch: " << batch_index << " | Loss: " << loss.item() << std::endl; } diff --git a/mnist-cpp/validate.cpp b/mnist-cpp/validate.cpp index 5ef556e..80e8313 100644 --- a/mnist-cpp/validate.cpp +++ b/mnist-cpp/validate.cpp @@ -1,35 +1,44 @@ #include + #include "net.h" int main(int argc, char** argv) { + // Init model + std::string out_path; + std::shared_ptr net = std::make_shared(); + if (argc == 3) { + torch::load(net, argv[1]); // load from arg 1 + out_path = argv[2]; // save json to arg 2 + } else { + std::cerr << "Wrong number of arguments" << std::endl; + exit(1); + } - // Init model - std::string out_path; - std::shared_ptr net = std::make_shared(); - if (argc == 3) { - torch::load(net, argv[1]); // load from arg 1 - out_path = argv[2]; // save json to arg 2 - } else { - std::cerr << "Wrong number of arguments" << std::endl; - exit(1); - } - - // Parse env - std::string data_dir = std::getenv("DATA_DIR"); + // Parse env + std::string data_dir = std::getenv("DATA_DIR"); - // Load data - auto images = torch::data::datasets::MNIST(data_dir, torch::data::datasets::MNIST::Mode::kTest).map( - torch::data::transforms::Stack<>()).dataset().images(); - auto targets = torch::data::datasets::MNIST(data_dir, torch::data::datasets::MNIST::Mode::kTest).map( - torch::data::transforms::Stack<>()).dataset().targets(); + // Load data + auto images = torch::data::datasets::MNIST( + data_dir, torch::data::datasets::MNIST::Mode::kTest) + .map(torch::data::transforms::Stack<>()) + .dataset() + .images(); + auto targets = torch::data::datasets::MNIST( + data_dir, torch::data::datasets::MNIST::Mode::kTest) + .map(torch::data::transforms::Stack<>()) + .dataset() + .targets(); - // Compute metrics - auto prediction = net->forward(images); // forward pass - auto loss = torch::nll_loss(prediction, targets); // compute loss - auto acc = torch::mean((std::get<1>(prediction.max(1))==targets).to(torch::kFloat)); // compute acc + // Compute metrics + auto prediction = net->forward(images); // forward pass + auto loss = torch::nll_loss(prediction, targets); // compute loss + auto acc = torch::mean((std::get<1>(prediction.max(1)) == targets) + .to(torch::kFloat)); // compute acc - // Print to file - std::ofstream out(out_path); - out << "{" << "\"loss\": " << loss.item() << ", \"acc\": " << acc.item() << "}"; - out.close(); + // Print to file + std::ofstream out(out_path); + out << "{" + << "\"loss\": " << loss.item() + << ", \"acc\": " << acc.item() << "}"; + out.close(); } \ No newline at end of file From 4735beabd17f7c79ea93fae9693d9fe381a93c9a Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 10:48:04 +0100 Subject: [PATCH 38/66] fixes --- mnist-cpp/.devcontainer/devcontainer.json.tpl | 23 +++++++++++++++++++ mnist-cpp/.gitignore | 1 - mnist-cpp/bin/launch.sh | 18 +++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 mnist-cpp/.devcontainer/devcontainer.json.tpl create mode 100755 mnist-cpp/bin/launch.sh diff --git a/mnist-cpp/.devcontainer/devcontainer.json.tpl b/mnist-cpp/.devcontainer/devcontainer.json.tpl new file mode 100644 index 0000000..61c5355 --- /dev/null +++ b/mnist-cpp/.devcontainer/devcontainer.json.tpl @@ -0,0 +1,23 @@ +{ + "name": "devcontainer", + "dockerFile": "../Dockerfile", + "context": "..", + "remoteUser": "default", + "extensions": [ + "ms-vscode.cpptools", + "ms-vscode.cpptools-extension-pack", + "ms-vscode.cpptools-themes", + "exiasr.hadolint", + "yzhang.markdown-all-in-one", + "ms-python.python", + "ms-toolsai.jupyter", + "ms-azuretools.vscode-docker" + ], + "mounts": [ + "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind,consistency=default", + ], + "runArgs": [ + "--net=host" + ], + "forwardPorts": [8090, 9000, 9001, 8081], +} \ No newline at end of file diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index a173e57..0f91018 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -1,6 +1,5 @@ build *.pt -.devcontainer /**/*.json train validate diff --git a/mnist-cpp/bin/launch.sh b/mnist-cpp/bin/launch.sh new file mode 100755 index 0000000..6ac62bd --- /dev/null +++ b/mnist-cpp/bin/launch.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Build container +docker build \ + -f Dockerfile \ + -t local/mnist-cpp \ + --build-arg DOCKER_USER=$(whoami) \ + --build-arg USER_UID=$UID \ + --build-arg $(id -u $USER) \ + . + +# Run +docker run --rm -it \ + -v $PWD:/mnist-cpp -w /mnist-cpp \ + -v /var/run/docker.sock:/var/run/docker.sock \ + --net=host \ + -u default \ + local/mnist-cpp /bin/bash \ No newline at end of file From cee907fb75a6bc7463bfbf614c0a2edbef12cc22 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 11:03:31 +0100 Subject: [PATCH 39/66] fix --- mnist-cpp/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index a714738..36f52b9 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -88,7 +88,7 @@ services: working_dir: /app command: fedn run client -in config/settings-client.yaml volumes: - - ${DATA_DIR}:/app/data + - ${DATA_DIR}/:/app/data - /var/run/docker.sock:/var/run/docker.sock deploy: replicas: 0 From f36510ff1da45b2cbc34407301abf39469df44ca Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 11:19:10 +0100 Subject: [PATCH 40/66] add env --- mnist-cpp/.env | 1 + mnist-cpp/.gitignore | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 mnist-cpp/.env diff --git a/mnist-cpp/.env b/mnist-cpp/.env new file mode 100644 index 0000000..c350ea1 --- /dev/null +++ b/mnist-cpp/.env @@ -0,0 +1 @@ +DATA_DIR=/app/data \ No newline at end of file diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 0f91018..cb17014 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -6,5 +6,5 @@ validate package *.npz src/fedn -.env +!.env !.vscode/settings.json \ No newline at end of file From dd03e2faea4fd9fedf38e6a170c88b572bc075ce Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 13:24:22 +0100 Subject: [PATCH 41/66] delete --- mnist-cpp/.env | 1 - 1 file changed, 1 deletion(-) delete mode 100644 mnist-cpp/.env diff --git a/mnist-cpp/.env b/mnist-cpp/.env deleted file mode 100644 index c350ea1..0000000 --- a/mnist-cpp/.env +++ /dev/null @@ -1 +0,0 @@ -DATA_DIR=/app/data \ No newline at end of file From 3fe24a557f41a3245113ec3741f062716b300d07 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 13:49:07 +0100 Subject: [PATCH 42/66] Example working with lanch --- mnist-cpp/.gitignore | 5 +++-- mnist-cpp/bin/download_data.sh | 3 ++- mnist-cpp/bin/launch.sh | 4 +++- mnist-cpp/docker-compose.yml | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index cb17014..171fa0a 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -6,5 +6,6 @@ validate package *.npz src/fedn -!.env -!.vscode/settings.json \ No newline at end of file +!.vscode/settings.json +data +.env \ No newline at end of file diff --git a/mnist-cpp/bin/download_data.sh b/mnist-cpp/bin/download_data.sh index a4fc344..2240252 100755 --- a/mnist-cpp/bin/download_data.sh +++ b/mnist-cpp/bin/download_data.sh @@ -1,6 +1,7 @@ #!/bin/bash -pushd $DATA_DIR +mkdir -p data +pushd data wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz diff --git a/mnist-cpp/bin/launch.sh b/mnist-cpp/bin/launch.sh index 6ac62bd..05785ef 100755 --- a/mnist-cpp/bin/launch.sh +++ b/mnist-cpp/bin/launch.sh @@ -13,6 +13,8 @@ docker build \ docker run --rm -it \ -v $PWD:/mnist-cpp -w /mnist-cpp \ -v /var/run/docker.sock:/var/run/docker.sock \ + -v $PWD/data:/app/data \ --net=host \ -u default \ - local/mnist-cpp /bin/bash \ No newline at end of file + local/mnist-cpp \ + /bin/bash -c "echo HOST_DATA_DIR=$PWD/data > .env && /bin/bash" \ No newline at end of file diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index 36f52b9..b9c4c8c 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -88,7 +88,7 @@ services: working_dir: /app command: fedn run client -in config/settings-client.yaml volumes: - - ${DATA_DIR}/:/app/data + - ${HOST_DATA_DIR}/:/app/data - /var/run/docker.sock:/var/run/docker.sock deploy: replicas: 0 From 3234be19d3dadb811dd0c9fbb16d6a9ef4847140 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 14:21:49 +0100 Subject: [PATCH 43/66] docs --- mnist-cpp/README.md | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 mnist-cpp/README.md diff --git a/mnist-cpp/README.md b/mnist-cpp/README.md new file mode 100644 index 0000000..04bdb23 --- /dev/null +++ b/mnist-cpp/README.md @@ -0,0 +1,48 @@ +# MNIST example - Pytorch C++ +This is an example of the classic MNIST hand-written text recognition task using FEDn with the PyTorch C++ API. + +## Table of Contents +- [MNIST example - Pytorch C++](#mnist-example---pytorch-c) + - [Table of Contents](#table-of-contents) + - [Prerequisites](#prerequisites) + - [Running the example](#running-the-example) + - [Clean up](#clean-up) + +## Prerequisites +The only prerequisite to run this example is [Docker](https://www.docker.com). + +## Running the example + +Start by downloading the data: + +``` +bin/download_data.sh +``` + +Start the Docker environment: +``` +bin/launch.sh +``` +> This may take a few minutes. + +Build the compute package and train the seed model: +``` +bin/build.sh +``` +> This may take a few minutes. After completion `package/package.tgz` and `seed.npz` should be built in your current working directory. + +Start reduce and combiner network: +``` +sudo docker-compose up -d +``` +> This may take a few minutes. After this is done you should be able to access the reducer interface at https://localhost:8090. + +Now navigate to https://localhost:8090 and upload `package/package.tgz` and `seed.npz`. After you are done you can deploy two clients by running: +``` +sudo docker-compose up -d --scale client=2 +``` + +Finally, you can navigate again to https://localhost:8090 and start the experiment from the "control" tab. + +## Clean up +To clean up you can run: `docker-compose down`. To exit the Docker environment simply run `exit`. \ No newline at end of file From b91b2577d6ae78e2a51573d38835782905406b68 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 28 Jan 2022 14:24:54 +0100 Subject: [PATCH 44/66] Docker compose --- mnist-cpp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/README.md b/mnist-cpp/README.md index 04bdb23..5779542 100644 --- a/mnist-cpp/README.md +++ b/mnist-cpp/README.md @@ -45,4 +45,4 @@ sudo docker-compose up -d --scale client=2 Finally, you can navigate again to https://localhost:8090 and start the experiment from the "control" tab. ## Clean up -To clean up you can run: `docker-compose down`. To exit the Docker environment simply run `exit`. \ No newline at end of file +To clean up you can run: `sudo docker-compose down`. To exit the Docker environment simply run `exit`. \ No newline at end of file From 21d1f8543bbe55d91bb8f207ecf544181cad74a2 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 31 Jan 2022 11:12:53 +0100 Subject: [PATCH 45/66] minor --- mnist-cpp/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/mnist-cpp/README.md b/mnist-cpp/README.md index 5779542..3e105d3 100644 --- a/mnist-cpp/README.md +++ b/mnist-cpp/README.md @@ -14,7 +14,6 @@ The only prerequisite to run this example is [Docker](https://www.docker.com). ## Running the example Start by downloading the data: - ``` bin/download_data.sh ``` From 0296f9107990ad414cfd73152d2f802dd5895a38 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 31 Jan 2022 11:13:25 +0100 Subject: [PATCH 46/66] typo --- mnist-cpp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/README.md b/mnist-cpp/README.md index 3e105d3..eaf0042 100644 --- a/mnist-cpp/README.md +++ b/mnist-cpp/README.md @@ -30,7 +30,7 @@ bin/build.sh ``` > This may take a few minutes. After completion `package/package.tgz` and `seed.npz` should be built in your current working directory. -Start reduce and combiner network: +Start reducer and combiner network: ``` sudo docker-compose up -d ``` From a0da0ebba8a783b06121b5f7ee9e8a1bd3d17a00 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 31 Jan 2022 13:59:31 +0100 Subject: [PATCH 47/66] gramine deps --- mnist-cpp/.hadolint.yaml | 3 ++- mnist-cpp/Dockerfile | 56 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/mnist-cpp/.hadolint.yaml b/mnist-cpp/.hadolint.yaml index e5ed1fb..a9c6338 100644 --- a/mnist-cpp/.hadolint.yaml +++ b/mnist-cpp/.hadolint.yaml @@ -4,4 +4,5 @@ ignored: - SC2046 - DL4006 - SC2164 - - DL3004 \ No newline at end of file + - DL3004 + - SC2086 \ No newline at end of file diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index dbfe17b..f4b57fb 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -9,6 +9,8 @@ ARG NUMCPP_VERSION=2.6.2 ARG DOCKER_VERSION=19.03.9 ARG CONDA_VERSION=4.9.2 ARG COMPOSE_VERSION=1.29.2 +ARG SGX_SDK_VERSION=2.15.101.1 +ARG SGX_PSW_VERSION=2.15.1 # Non-root user with sudo access ARG USERNAME=default @@ -37,9 +39,28 @@ RUN apt-get update \ curl \ git \ vim \ + openssh-client \ build-essential \ + autoconf \ + libtool \ + pkg-config \ + clang-format \ + googletest \ + libgtest-dev \ + autoconf \ + bison \ + gawk \ + ninja-build \ + python3 \ + python3-pip \ + python3-click \ + python3-jinja2 \ + libcurl4-openssl-dev \ + libprotobuf-c-dev \ + protobuf-c-compiler \ + python3-protobuf \ + linux-headers-amd64 \ unzip \ - openssh-client \ # # Hadolint && wget --progress=dot:giga -O /bin/hadolint \ @@ -65,6 +86,39 @@ RUN apt-get update \ && unzip libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && rm -r libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+cpu.zip \ && popd \ + # + # SGX SDK + && wget --progress=dot:giga https://download.01.org/intel-sgx/latest/linux-latest/distro/ubuntu20.04-server/sgx_linux_x64_sdk_${SGX_SDK_VERSION}.bin \ + && chmod +x sgx_linux_x64_sdk_${SGX_SDK_VERSION}.bin \ + && ./sgx_linux_x64_sdk_${SGX_SDK_VERSION}.bin --prefix /opt/sgx-sdk \ + && rm sgx_linux_x64_sdk_${SGX_SDK_VERSION}.bin \ + # + # SGX PSW + && wget --progress=dot:giga https://download.01.org/intel-sgx/sgx-linux/${SGX_PSW_VERSION}/distro/ubuntu20.04-server/sgx_debian_local_repo.tgz \ + && tar xzvf sgx_debian_local_repo.tgz \ + && mv sgx_debian_local_repo /opt \ + && echo 'deb [trusted=yes] file:///opt/sgx_debian_local_repo focal main' >> /etc/apt/sources.list \ + && echo 'deb [trusted=yes] http://archive.ubuntu.com/ubuntu focal main' >> /etc/apt/sources.list \ + && apt-get update \ + && sudo apt-get install -y --no-install-recommends \ + ubuntu-keyring \ + libsgx-urts \ + libsgx-launch \ + libsgx-epid \ + libsgx-quote-ex \ + libsgx-dcap-ql \ + # + # Gramine + && python3 -m pip install --no-cache-dir 'meson>=0.55' 'toml>=0.10' \ + && git clone https://github.com/gramineproject/gramine.git /opt/gramine \ + && pushd /opt/gramine \ + && git checkout $GRAMINE_HEAD \ + && mkdir -p /usr/include/asm \ + && ln -s /usr/src/linux-headers-*/arch/x86/include/uapi/asm/sgx.h /usr/include/asm/sgx.h \ + && meson setup build/ --buildtype=release -Ddirect=enabled -Dsgx=enabled \ + && ninja -C build/ \ + && ninja -C build/ install \ + && popd \ # # Install conda && wget --progress=dot:giga https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ From 8768a21f6b1039cd18e3bfe207e99ab8cadc7303 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 1 Feb 2022 13:46:10 +0100 Subject: [PATCH 48/66] separate fedn env --- mnist-cpp/Dockerfile | 40 +++++++++++++++++++++++++------------- mnist-cpp/environment.yaml | 7 +++++-- mnist-cpp/fedn.yaml | 10 ++++++++++ 3 files changed, 41 insertions(+), 16 deletions(-) create mode 100644 mnist-cpp/fedn.yaml diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index f4b57fb..8b85fa0 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -107,18 +107,6 @@ RUN apt-get update \ libsgx-epid \ libsgx-quote-ex \ libsgx-dcap-ql \ - # - # Gramine - && python3 -m pip install --no-cache-dir 'meson>=0.55' 'toml>=0.10' \ - && git clone https://github.com/gramineproject/gramine.git /opt/gramine \ - && pushd /opt/gramine \ - && git checkout $GRAMINE_HEAD \ - && mkdir -p /usr/include/asm \ - && ln -s /usr/src/linux-headers-*/arch/x86/include/uapi/asm/sgx.h /usr/include/asm/sgx.h \ - && meson setup build/ --buildtype=release -Ddirect=enabled -Dsgx=enabled \ - && ninja -C build/ \ - && ninja -C build/ install \ - && popd \ # # Install conda && wget --progress=dot:giga https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh \ @@ -143,6 +131,12 @@ RUN apt-get update \ && mkdir -p $DATA_DIR \ && chown $USERNAME $DATA_DIR \ # + # Configure for gramine + && mkdir -p /usr/include/asm \ + && ln -s /usr/src/linux-headers-*/arch/x86/include/uapi/asm/sgx.h /usr/include/asm/sgx.h \ + && mkdir -p /opt/gramine \ + && chown -R $USERNAME /opt/gramine \ + # # Cleanup && apt-get autoremove -y \ && apt-get clean -y \ @@ -156,7 +150,25 @@ COPY environment.yaml /tmp/environment.yaml RUN conda env create -f /tmp/environment.yaml \ && rm /tmp/environment.yaml \ && conda run -n default python -m ipykernel install --name default \ - && chown $USERNAME /opt/conda/envs/default + && chown $USERNAME /opt/conda/envs/default \ + # + # Gramine + && git clone https://github.com/gramineproject/gramine.git /opt/gramine \ + && pushd /opt/gramine \ + && git checkout $GRAMINE_HEAD \ + && conda run --no-capture-output -n default \ + meson setup build/ --buildtype=release -Ddirect=enabled -Dsgx=enabled \ + && conda run --no-capture-output -n default \ + ninja -C build/ \ + && conda run --no-capture-output -n default \ + ninja -C build/ install \ + && popd + +# Setup fedn environment +COPY fedn.yaml /tmp/fedn.yaml +RUN conda env create -f /tmp/fedn.yaml \ + && rm /tmp/fedn.yaml \ + && chown $USERNAME /opt/conda/envs/fedn # Init conda for non-root user USER $USERNAME @@ -168,4 +180,4 @@ RUN conda init bash \ ENV DEBIAN_FRONTEND=dialog # Add entrypoint to conda environment for commands -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "default"] \ No newline at end of file +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "fedn"] \ No newline at end of file diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index ad607a3..eee9a56 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -8,7 +8,10 @@ dependencies: - pip=21.3.1 - ipykernel=6.7.0 - pip: - - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" - torch==1.10.1 - fire==0.3.1 - - autopep8==1.6.0 \ No newline at end of file + - autopep8==1.6.0 + - click==8.0.3 + - meson>=0.55 + - toml>=0.10 + - jinja2==3.0.3 \ No newline at end of file diff --git a/mnist-cpp/fedn.yaml b/mnist-cpp/fedn.yaml new file mode 100644 index 0000000..5f68d6e --- /dev/null +++ b/mnist-cpp/fedn.yaml @@ -0,0 +1,10 @@ +name: fedn + +channels: + - conda-forge + +dependencies: + - python=3.9 + - pip=21.3.1 + - pip: + - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" \ No newline at end of file From 44520eb3779430b2672a5cd68554f09d968fccd9 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 1 Feb 2022 13:01:17 +0000 Subject: [PATCH 49/66] change version and add numpy --- mnist-cpp/environment.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index eee9a56..34303ce 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.9 + - python=3.6 - pip=21.3.1 - ipykernel=6.7.0 - pip: @@ -14,4 +14,5 @@ dependencies: - click==8.0.3 - meson>=0.55 - toml>=0.10 - - jinja2==3.0.3 \ No newline at end of file + - jinja2==3.0.3 + - numpy==1.13.1 \ No newline at end of file From 3230339658808d3ceb6949d8ab779e17a91c8512 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 2 Feb 2022 13:34:32 +0100 Subject: [PATCH 50/66] fixed gramine in conda --- mnist-cpp/Dockerfile | 41 ++++++++++++-------------------------- mnist-cpp/environment.yaml | 9 +++++---- mnist-cpp/fedn.yaml | 10 ---------- 3 files changed, 18 insertions(+), 42 deletions(-) delete mode 100644 mnist-cpp/fedn.yaml diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index 8b85fa0..22aefb7 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -11,6 +11,7 @@ ARG CONDA_VERSION=4.9.2 ARG COMPOSE_VERSION=1.29.2 ARG SGX_SDK_VERSION=2.15.101.1 ARG SGX_PSW_VERSION=2.15.1 +ARG GRAMINE_HEAD=e7429e13bb3e914c5f582ead4ca2e3f81d145a8f # Non-root user with sudo access ARG USERNAME=default @@ -44,23 +45,16 @@ RUN apt-get update \ autoconf \ libtool \ pkg-config \ - clang-format \ googletest \ libgtest-dev \ autoconf \ - bison \ gawk \ - ninja-build \ - python3 \ - python3-pip \ - python3-click \ - python3-jinja2 \ libcurl4-openssl-dev \ libprotobuf-c-dev \ protobuf-c-compiler \ - python3-protobuf \ linux-headers-amd64 \ unzip \ + bison \ # # Hadolint && wget --progress=dot:giga -O /bin/hadolint \ @@ -150,25 +144,7 @@ COPY environment.yaml /tmp/environment.yaml RUN conda env create -f /tmp/environment.yaml \ && rm /tmp/environment.yaml \ && conda run -n default python -m ipykernel install --name default \ - && chown $USERNAME /opt/conda/envs/default \ - # - # Gramine - && git clone https://github.com/gramineproject/gramine.git /opt/gramine \ - && pushd /opt/gramine \ - && git checkout $GRAMINE_HEAD \ - && conda run --no-capture-output -n default \ - meson setup build/ --buildtype=release -Ddirect=enabled -Dsgx=enabled \ - && conda run --no-capture-output -n default \ - ninja -C build/ \ - && conda run --no-capture-output -n default \ - ninja -C build/ install \ - && popd - -# Setup fedn environment -COPY fedn.yaml /tmp/fedn.yaml -RUN conda env create -f /tmp/fedn.yaml \ - && rm /tmp/fedn.yaml \ - && chown $USERNAME /opt/conda/envs/fedn + && chown $USERNAME /opt/conda/envs/default # Init conda for non-root user USER $USERNAME @@ -176,8 +152,17 @@ RUN conda init bash \ && conda config --set auto_activate_base false \ && echo "conda activate default" >> ~/.bashrc +# Gramine +RUN git clone https://github.com/gramineproject/gramine.git /opt/gramine \ + && pushd /opt/gramine \ + && git checkout $GRAMINE_HEAD \ + && conda run -n default meson setup build/ --buildtype=release -Ddirect=enabled -Dsgx=enabled \ + && conda run -n default ninja -C build/ \ + && sudo /opt/conda/envs/default/bin/ninja -C build/ install \ + && popd + # Switch back to dialog for any ad-hoc use of apt-get ENV DEBIAN_FRONTEND=dialog # Add entrypoint to conda environment for commands -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "fedn"] \ No newline at end of file +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "default"] \ No newline at end of file diff --git a/mnist-cpp/environment.yaml b/mnist-cpp/environment.yaml index 34303ce..f8c465f 100644 --- a/mnist-cpp/environment.yaml +++ b/mnist-cpp/environment.yaml @@ -4,15 +4,16 @@ channels: - conda-forge dependencies: - - python=3.6 + - python=3.9 - pip=21.3.1 - ipykernel=6.7.0 - pip: + - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" - torch==1.10.1 - fire==0.3.1 - autopep8==1.6.0 - - click==8.0.3 - meson>=0.55 - toml>=0.10 - - jinja2==3.0.3 - - numpy==1.13.1 \ No newline at end of file + - protobuf==3.19.4 + - ninja==1.10.0 + - clang-format==13.0.0 \ No newline at end of file diff --git a/mnist-cpp/fedn.yaml b/mnist-cpp/fedn.yaml deleted file mode 100644 index 5f68d6e..0000000 --- a/mnist-cpp/fedn.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: fedn - -channels: - - conda-forge - -dependencies: - - python=3.9 - - pip=21.3.1 - - pip: - - "-e git://github.com/scaleoutsystems/fedn.git@v0.3.1#egg=fedn&subdirectory=fedn" \ No newline at end of file From c4c29e846f0fdbdb0ebe7007d75519a156c42c54 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 2 Feb 2022 15:00:35 +0100 Subject: [PATCH 51/66] fix conda prefix --- mnist-cpp/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index 22aefb7..fd2ca63 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -156,7 +156,8 @@ RUN conda init bash \ RUN git clone https://github.com/gramineproject/gramine.git /opt/gramine \ && pushd /opt/gramine \ && git checkout $GRAMINE_HEAD \ - && conda run -n default meson setup build/ --buildtype=release -Ddirect=enabled -Dsgx=enabled \ + && conda run -n default meson setup build/ \ + --buildtype=release -Ddirect=enabled -Dsgx=enabled --prefix=/opt/conda/envs/default \ && conda run -n default ninja -C build/ \ && sudo /opt/conda/envs/default/bin/ninja -C build/ install \ && popd From 63821813e73a64fbf235b669549e2cafeb2dd026 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 2 Feb 2022 15:11:31 +0000 Subject: [PATCH 52/66] generage gramine files --- mnist-cpp/bin/build.sh | 21 +++++++++++++++++++-- mnist-cpp/bin/download_data.sh | 1 + mnist-cpp/bin/launch.sh | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/mnist-cpp/bin/build.sh b/mnist-cpp/bin/build.sh index b36fc9f..15a5872 100755 --- a/mnist-cpp/bin/build.sh +++ b/mnist-cpp/bin/build.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -e + # Configure cmake --no-warn-unused-cli \ -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE \ @@ -7,7 +9,7 @@ cmake --no-warn-unused-cli \ -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/x86_64-linux-gnu-g++-11 \ -H$PWD \ -B$PWD/build \ - -G "Unix Makefiles" + -G Ninja # Build cmake --build $PWD/build --config Debug --target all -j $(nproc) -- @@ -21,4 +23,19 @@ tar -czvf package/package.tar.gz client # Make seed SPLIT=0 N_SPLITS=1 build/train seed.pt -python client/helper.py pt2np seed.pt seed \ No newline at end of file +python client/helper.py pt2np seed.pt seed + +# Generate sig key if necessary +if [ ! -e "$HOME/.gramine/enclave-key.pem" ]; then + mkdir -p $HOME/.gramine + openssl genrsa -3 -out $HOME/.gramine/enclave-key.pem 3072 +fi + +# Generate SGX-related files +pushd client +for src in train validate; do + gramine-manifest -Dlog_level=debug $src.manifest.template $src.manifest + gramine-sgx-sign --key $HOME/.gramine/enclave-key.pem --manifest $src.manifest --output $src.manifest.sgx + gramine-sgx-get-token --output $src.token --sig $src.sig +done +popd \ No newline at end of file diff --git a/mnist-cpp/bin/download_data.sh b/mnist-cpp/bin/download_data.sh index 2240252..007c9c0 100755 --- a/mnist-cpp/bin/download_data.sh +++ b/mnist-cpp/bin/download_data.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e mkdir -p data pushd data diff --git a/mnist-cpp/bin/launch.sh b/mnist-cpp/bin/launch.sh index 05785ef..e345354 100755 --- a/mnist-cpp/bin/launch.sh +++ b/mnist-cpp/bin/launch.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e # Build container docker build \ From 2b60d6e06b1441f00a8beb864fec48aa34f51885 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 2 Feb 2022 15:27:09 +0000 Subject: [PATCH 53/66] add gramine templates --- mnist-cpp/.gitignore | 6 ++++- mnist-cpp/client/train.manifest.template | 30 +++++++++++++++++++++ mnist-cpp/client/validate.manifest.template | 30 +++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 mnist-cpp/client/train.manifest.template create mode 100644 mnist-cpp/client/validate.manifest.template diff --git a/mnist-cpp/.gitignore b/mnist-cpp/.gitignore index 171fa0a..7aa41c2 100644 --- a/mnist-cpp/.gitignore +++ b/mnist-cpp/.gitignore @@ -8,4 +8,8 @@ package src/fedn !.vscode/settings.json data -.env \ No newline at end of file +.env +*.sgx +*.manifest +*.sig +*.token \ No newline at end of file diff --git a/mnist-cpp/client/train.manifest.template b/mnist-cpp/client/train.manifest.template new file mode 100644 index 0000000..b19f51a --- /dev/null +++ b/mnist-cpp/client/train.manifest.template @@ -0,0 +1,30 @@ +loader.entrypoint = "file:{{ gramine.libos }}" +libos.entrypoint = "train" +loader.log_level = "{{ log_level }}" +loader.argv0_override = "train" +loader.insecure__use_cmdline_argv = true +loader.env.LD_LIBRARY_PATH = "/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu" + +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +fs.mount.usr_lib64.type = "chroot" +fs.mount.usr_lib64.path = "/usr/lib/x86_64-linux-gnu" +fs.mount.usr_lib64.uri = "file:/usr/lib/x86_64-linux-gnu" + +fs.mount.lib64.type = "chroot" +fs.mount.lib64.path = "/lib/x86_64-linux-gnu" +fs.mount.lib64.uri = "file:/lib/x86_64-linux-gnu" + +sgx.thread_num = 8 +sgx.debug = true +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:train", + "file:/usr/lib/x86_64-linux-gnu/", + "file:/lib/x86_64-linux-gnu/", + "file:{{ gramine.libos }}", + "file:{{ gramine.runtimedir() }}/", +] \ No newline at end of file diff --git a/mnist-cpp/client/validate.manifest.template b/mnist-cpp/client/validate.manifest.template new file mode 100644 index 0000000..a512f71 --- /dev/null +++ b/mnist-cpp/client/validate.manifest.template @@ -0,0 +1,30 @@ +loader.entrypoint = "file:{{ gramine.libos }}" +libos.entrypoint = "validate" +loader.log_level = "{{ log_level }}" +loader.argv0_override = "validate" +loader.insecure__use_cmdline_argv = true +loader.env.LD_LIBRARY_PATH = "/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu" + +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +fs.mount.usr_lib64.type = "chroot" +fs.mount.usr_lib64.path = "/usr/lib/x86_64-linux-gnu" +fs.mount.usr_lib64.uri = "file:/usr/lib/x86_64-linux-gnu" + +fs.mount.lib64.type = "chroot" +fs.mount.lib64.path = "/lib/x86_64-linux-gnu" +fs.mount.lib64.uri = "file:/lib/x86_64-linux-gnu" + +sgx.thread_num = 8 +sgx.debug = true +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:validate", + "file:/usr/lib/x86_64-linux-gnu/", + "file:/lib/x86_64-linux-gnu/", + "file:{{ gramine.libos }}", + "file:{{ gramine.runtimedir() }}/", +] \ No newline at end of file From fc6426fe4e9911358451d8c7643dbd5a5bbca8f9 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Thu, 3 Feb 2022 13:11:06 +0100 Subject: [PATCH 54/66] change gramine head --- mnist-cpp/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index fd2ca63..80ad0d9 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -11,7 +11,7 @@ ARG CONDA_VERSION=4.9.2 ARG COMPOSE_VERSION=1.29.2 ARG SGX_SDK_VERSION=2.15.101.1 ARG SGX_PSW_VERSION=2.15.1 -ARG GRAMINE_HEAD=e7429e13bb3e914c5f582ead4ca2e3f81d145a8f +ARG GRAMINE_HEAD=c9d75597b3586cddcef1604bab58d4780586a71c # Non-root user with sudo access ARG USERNAME=default From ece6835648add2aeddf0457bbe8876f6cee63092 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Fri, 4 Feb 2022 14:53:41 +0000 Subject: [PATCH 55/66] fix manifest --- mnist-cpp/bin/launch.sh | 9 +++++---- mnist-cpp/client/train.manifest.template | 18 +++++++++++++++++- mnist-cpp/client/train.sh | 2 +- mnist-cpp/client/validate.manifest.template | 18 +++++++++++++++++- mnist-cpp/client/validate.sh | 2 +- mnist-cpp/docker-compose.yml | 2 ++ 6 files changed, 43 insertions(+), 8 deletions(-) diff --git a/mnist-cpp/bin/launch.sh b/mnist-cpp/bin/launch.sh index e345354..fdb2b5b 100755 --- a/mnist-cpp/bin/launch.sh +++ b/mnist-cpp/bin/launch.sh @@ -7,15 +7,16 @@ docker build \ -t local/mnist-cpp \ --build-arg DOCKER_USER=$(whoami) \ --build-arg USER_UID=$UID \ - --build-arg $(id -u $USER) \ . # Run +if [ -z "$HOST_DATA_DIR" ]; then HOST_DATA_DIR="$PWD/data"; fi +if [ -z "$HOST_WRKSPC_DIR" ]; then HOST_WRKSPC_DIR="$PWD"; fi docker run --rm -it \ - -v $PWD:/mnist-cpp -w /mnist-cpp \ + -v "$HOST_WRKSPC_DIR:/mnist-cpp" -w /mnist-cpp \ -v /var/run/docker.sock:/var/run/docker.sock \ - -v $PWD/data:/app/data \ + -v "$HOST_DATA_DIR:/app/data" \ --net=host \ -u default \ local/mnist-cpp \ - /bin/bash -c "echo HOST_DATA_DIR=$PWD/data > .env && /bin/bash" \ No newline at end of file + /bin/bash -c "set -C; echo HOST_DATA_DIR=$HOST_DATA_DIR > .env; /bin/bash" \ No newline at end of file diff --git a/mnist-cpp/client/train.manifest.template b/mnist-cpp/client/train.manifest.template index b19f51a..b735255 100644 --- a/mnist-cpp/client/train.manifest.template +++ b/mnist-cpp/client/train.manifest.template @@ -3,7 +3,8 @@ libos.entrypoint = "train" loader.log_level = "{{ log_level }}" loader.argv0_override = "train" loader.insecure__use_cmdline_argv = true -loader.env.LD_LIBRARY_PATH = "/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu" +loader.insecure__use_host_env = true +loader.env.LD_LIBRARY_PATH = "/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu:/opt/torch/libtorch/lib" fs.mount.lib.type = "chroot" fs.mount.lib.path = "/lib" @@ -17,12 +18,27 @@ fs.mount.lib64.type = "chroot" fs.mount.lib64.path = "/lib/x86_64-linux-gnu" fs.mount.lib64.uri = "file:/lib/x86_64-linux-gnu" +fs.mount.libtorch.type = "chroot" +fs.mount.libtorch.path = "/opt/torch/libtorch/lib" +fs.mount.libtorch.uri = "file:/opt/torch/libtorch/lib" + +fs.mount.app.type = "chroot" +fs.mount.app.path = "/app" +fs.mount.app.uri = "file:/app" + +fs.mount.tmp.type = "chroot" +fs.mount.tmp.path = "/tmp" +fs.mount.tmp.uri = "file:/tmp" + sgx.thread_num = 8 sgx.debug = true sgx.nonpie_binary = true sgx.trusted_files = [ "file:train", + "file:/app/", + "file:/tmp/", + "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", "file:/lib/x86_64-linux-gnu/", "file:{{ gramine.libos }}", diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index ed821c8..a11aa0c 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -13,7 +13,7 @@ python helper.py np2pt "$model_in" "$model_in_name".pt export N_SPLITS=$(sudo docker ps --format "{{ .Names }}" | grep client | wc -l) SPLIT=$(sudo docker ps | grep $(hostname) | awk '{print substr($NF, length($NF), length($NF))}') export SPLIT=$(($SPLIT - 1)) -./train "$model_in_name".pt "$model_in_name".retrain.pt +$LOADER ./train "$model_in_name".pt "$model_in_name".retrain.pt # Convert pt to npz python helper.py pt2np "$model_in_name".retrain.pt "$model_out" \ No newline at end of file diff --git a/mnist-cpp/client/validate.manifest.template b/mnist-cpp/client/validate.manifest.template index a512f71..235ca37 100644 --- a/mnist-cpp/client/validate.manifest.template +++ b/mnist-cpp/client/validate.manifest.template @@ -3,7 +3,8 @@ libos.entrypoint = "validate" loader.log_level = "{{ log_level }}" loader.argv0_override = "validate" loader.insecure__use_cmdline_argv = true -loader.env.LD_LIBRARY_PATH = "/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu" +loader.insecure__use_host_env = true +loader.env.LD_LIBRARY_PATH = "/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu:/opt/torch/libtorch/lib" fs.mount.lib.type = "chroot" fs.mount.lib.path = "/lib" @@ -17,12 +18,27 @@ fs.mount.lib64.type = "chroot" fs.mount.lib64.path = "/lib/x86_64-linux-gnu" fs.mount.lib64.uri = "file:/lib/x86_64-linux-gnu" +fs.mount.libtorch.type = "chroot" +fs.mount.libtorch.path = "/opt/torch/libtorch/lib" +fs.mount.libtorch.uri = "file:/opt/torch/libtorch/lib" + +fs.mount.app.type = "chroot" +fs.mount.app.path = "/app" +fs.mount.app.uri = "file:/app" + +fs.mount.tmp.type = "chroot" +fs.mount.tmp.path = "/tmp" +fs.mount.tmp.uri = "file:/tmp" + sgx.thread_num = 8 sgx.debug = true sgx.nonpie_binary = true sgx.trusted_files = [ "file:validate", + "file:/app/", + "file:/tmp/", + "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", "file:/lib/x86_64-linux-gnu/", "file:{{ gramine.libos }}", diff --git a/mnist-cpp/client/validate.sh b/mnist-cpp/client/validate.sh index 6c388c7..fd25ff0 100755 --- a/mnist-cpp/client/validate.sh +++ b/mnist-cpp/client/validate.sh @@ -10,4 +10,4 @@ json_out="$2" python helper.py np2pt "$model_in" "$model_in_name".pt # Train -./validate "$model_in_name".pt "$json_out" \ No newline at end of file +$LOADER ./validate "$model_in_name".pt "$json_out" \ No newline at end of file diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index b9c4c8c..e86be63 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -84,11 +84,13 @@ services: client: environment: - GET_HOSTS_FROM=dns + - LOADER=${LOADER} build: . working_dir: /app command: fedn run client -in config/settings-client.yaml volumes: - ${HOST_DATA_DIR}/:/app/data - /var/run/docker.sock:/var/run/docker.sock + privileged: true deploy: replicas: 0 From 1409a3350ede583862d88058424d64527f13c977 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 7 Feb 2022 12:02:27 +0000 Subject: [PATCH 56/66] limit threads --- mnist-cpp/client/train.sh | 1 + mnist-cpp/client/validate.sh | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index a11aa0c..4764c49 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -13,6 +13,7 @@ python helper.py np2pt "$model_in" "$model_in_name".pt export N_SPLITS=$(sudo docker ps --format "{{ .Names }}" | grep client | wc -l) SPLIT=$(sudo docker ps | grep $(hostname) | awk '{print substr($NF, length($NF), length($NF))}') export SPLIT=$(($SPLIT - 1)) +export OMP_NUM_THREADS=4 $LOADER ./train "$model_in_name".pt "$model_in_name".retrain.pt # Convert pt to npz diff --git a/mnist-cpp/client/validate.sh b/mnist-cpp/client/validate.sh index fd25ff0..681ca36 100755 --- a/mnist-cpp/client/validate.sh +++ b/mnist-cpp/client/validate.sh @@ -9,5 +9,6 @@ json_out="$2" # Convert npz to pt python helper.py np2pt "$model_in" "$model_in_name".pt -# Train +# Validate +export OMP_NUM_THREADS=4 $LOADER ./validate "$model_in_name".pt "$json_out" \ No newline at end of file From e1cff47dc7bbea64219080823117c2a3005f622d Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Mon, 7 Feb 2022 12:26:55 +0000 Subject: [PATCH 57/66] train on 2 threads only --- mnist-cpp/client/train.sh | 2 +- mnist-cpp/client/validate.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index 4764c49..81aeabe 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -13,7 +13,7 @@ python helper.py np2pt "$model_in" "$model_in_name".pt export N_SPLITS=$(sudo docker ps --format "{{ .Names }}" | grep client | wc -l) SPLIT=$(sudo docker ps | grep $(hostname) | awk '{print substr($NF, length($NF), length($NF))}') export SPLIT=$(($SPLIT - 1)) -export OMP_NUM_THREADS=4 +export OMP_NUM_THREADS=2 $LOADER ./train "$model_in_name".pt "$model_in_name".retrain.pt # Convert pt to npz diff --git a/mnist-cpp/client/validate.sh b/mnist-cpp/client/validate.sh index 681ca36..154c45e 100755 --- a/mnist-cpp/client/validate.sh +++ b/mnist-cpp/client/validate.sh @@ -10,5 +10,5 @@ json_out="$2" python helper.py np2pt "$model_in" "$model_in_name".pt # Validate -export OMP_NUM_THREADS=4 +export OMP_NUM_THREADS=2 $LOADER ./validate "$model_in_name".pt "$json_out" \ No newline at end of file From 3775bcbbf3badc1c336eab22000d5105188fe603 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 8 Feb 2022 13:51:30 +0000 Subject: [PATCH 58/66] change tmpdir --- mnist-cpp/bin/build.sh | 18 +++++++++--------- mnist-cpp/bin/launch.sh | 7 ++++--- mnist-cpp/client/train.manifest.template | 6 +++--- mnist-cpp/client/validate.manifest.template | 6 +++--- mnist-cpp/docker-compose.yml | 1 + 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/mnist-cpp/bin/build.sh b/mnist-cpp/bin/build.sh index 15a5872..308b17d 100755 --- a/mnist-cpp/bin/build.sh +++ b/mnist-cpp/bin/build.sh @@ -17,14 +17,6 @@ cmake --build $PWD/build --config Debug --target all -j $(nproc) -- # Copy binaries to the right folder cp build/train build/validate client -# Make package -mkdir -p package -tar -czvf package/package.tar.gz client - -# Make seed -SPLIT=0 N_SPLITS=1 build/train seed.pt -python client/helper.py pt2np seed.pt seed - # Generate sig key if necessary if [ ! -e "$HOME/.gramine/enclave-key.pem" ]; then mkdir -p $HOME/.gramine @@ -38,4 +30,12 @@ for src in train validate; do gramine-sgx-sign --key $HOME/.gramine/enclave-key.pem --manifest $src.manifest --output $src.manifest.sgx gramine-sgx-get-token --output $src.token --sig $src.sig done -popd \ No newline at end of file +popd + +# Make package +mkdir -p package +tar -czvf package/package.tar.gz client + +# Make seed +SPLIT=0 N_SPLITS=1 build/train seed.pt +python client/helper.py pt2np seed.pt seed \ No newline at end of file diff --git a/mnist-cpp/bin/launch.sh b/mnist-cpp/bin/launch.sh index fdb2b5b..bb57188 100755 --- a/mnist-cpp/bin/launch.sh +++ b/mnist-cpp/bin/launch.sh @@ -2,10 +2,11 @@ set -e # Build container +if [ -z "$USERNAME" ]; then USERNAME="$(whoami)"; fi docker build \ -f Dockerfile \ -t local/mnist-cpp \ - --build-arg DOCKER_USER=$(whoami) \ + --build-arg USERNAME=$USERNAME \ --build-arg USER_UID=$UID \ . @@ -17,6 +18,6 @@ docker run --rm -it \ -v /var/run/docker.sock:/var/run/docker.sock \ -v "$HOST_DATA_DIR:/app/data" \ --net=host \ - -u default \ + -u $USERNAME \ local/mnist-cpp \ - /bin/bash -c "set -C; echo HOST_DATA_DIR=$HOST_DATA_DIR > .env; /bin/bash" \ No newline at end of file + /bin/bash -c "echo HOST_DATA_DIR=$HOST_DATA_DIR > .env; /bin/bash" \ No newline at end of file diff --git a/mnist-cpp/client/train.manifest.template b/mnist-cpp/client/train.manifest.template index b735255..63b806b 100644 --- a/mnist-cpp/client/train.manifest.template +++ b/mnist-cpp/client/train.manifest.template @@ -27,8 +27,8 @@ fs.mount.app.path = "/app" fs.mount.app.uri = "file:/app" fs.mount.tmp.type = "chroot" -fs.mount.tmp.path = "/tmp" -fs.mount.tmp.uri = "file:/tmp" +fs.mount.tmp.path = "/var/tmp" +fs.mount.tmp.uri = "file:/var/tmp" sgx.thread_num = 8 sgx.debug = true @@ -37,7 +37,7 @@ sgx.nonpie_binary = true sgx.trusted_files = [ "file:train", "file:/app/", - "file:/tmp/", + "file:/var/tmp/", "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", "file:/lib/x86_64-linux-gnu/", diff --git a/mnist-cpp/client/validate.manifest.template b/mnist-cpp/client/validate.manifest.template index 235ca37..dbe7def 100644 --- a/mnist-cpp/client/validate.manifest.template +++ b/mnist-cpp/client/validate.manifest.template @@ -27,8 +27,8 @@ fs.mount.app.path = "/app" fs.mount.app.uri = "file:/app" fs.mount.tmp.type = "chroot" -fs.mount.tmp.path = "/tmp" -fs.mount.tmp.uri = "file:/tmp" +fs.mount.tmp.path = "/var/tmp" +fs.mount.tmp.uri = "file:/var/tmp" sgx.thread_num = 8 sgx.debug = true @@ -37,7 +37,7 @@ sgx.nonpie_binary = true sgx.trusted_files = [ "file:validate", "file:/app/", - "file:/tmp/", + "file:/var/tmp/", "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", "file:/lib/x86_64-linux-gnu/", diff --git a/mnist-cpp/docker-compose.yml b/mnist-cpp/docker-compose.yml index e86be63..0c9d2f3 100644 --- a/mnist-cpp/docker-compose.yml +++ b/mnist-cpp/docker-compose.yml @@ -85,6 +85,7 @@ services: environment: - GET_HOSTS_FROM=dns - LOADER=${LOADER} + - TMPDIR=/var/tmp build: . working_dir: /app command: fedn run client -in config/settings-client.yaml From 21955ac227cffecaa983e73098d08c049c21c15f Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 8 Feb 2022 14:30:45 +0000 Subject: [PATCH 59/66] fix template data path --- mnist-cpp/client/train.manifest.template | 6 +++--- mnist-cpp/client/validate.manifest.template | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mnist-cpp/client/train.manifest.template b/mnist-cpp/client/train.manifest.template index 63b806b..b2bce73 100644 --- a/mnist-cpp/client/train.manifest.template +++ b/mnist-cpp/client/train.manifest.template @@ -23,8 +23,8 @@ fs.mount.libtorch.path = "/opt/torch/libtorch/lib" fs.mount.libtorch.uri = "file:/opt/torch/libtorch/lib" fs.mount.app.type = "chroot" -fs.mount.app.path = "/app" -fs.mount.app.uri = "file:/app" +fs.mount.app.path = "/app/data" +fs.mount.app.uri = "file:/app/data" fs.mount.tmp.type = "chroot" fs.mount.tmp.path = "/var/tmp" @@ -36,7 +36,7 @@ sgx.nonpie_binary = true sgx.trusted_files = [ "file:train", - "file:/app/", + "file:/app/data/", "file:/var/tmp/", "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", diff --git a/mnist-cpp/client/validate.manifest.template b/mnist-cpp/client/validate.manifest.template index dbe7def..eaed62a 100644 --- a/mnist-cpp/client/validate.manifest.template +++ b/mnist-cpp/client/validate.manifest.template @@ -23,8 +23,8 @@ fs.mount.libtorch.path = "/opt/torch/libtorch/lib" fs.mount.libtorch.uri = "file:/opt/torch/libtorch/lib" fs.mount.app.type = "chroot" -fs.mount.app.path = "/app" -fs.mount.app.uri = "file:/app" +fs.mount.app.path = "/app/data" +fs.mount.app.uri = "file:/app/data" fs.mount.tmp.type = "chroot" fs.mount.tmp.path = "/var/tmp" @@ -36,7 +36,7 @@ sgx.nonpie_binary = true sgx.trusted_files = [ "file:validate", - "file:/app/", + "file:/app/data/", "file:/var/tmp/", "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", From e41c4016744a7f2bb7ad841e63e7df5b4dc50dd5 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 8 Feb 2022 15:23:41 +0000 Subject: [PATCH 60/66] fix template for SGX --- mnist-cpp/client/train.manifest.template | 6 +++++- mnist-cpp/client/validate.manifest.template | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/mnist-cpp/client/train.manifest.template b/mnist-cpp/client/train.manifest.template index b2bce73..2a2e5fa 100644 --- a/mnist-cpp/client/train.manifest.template +++ b/mnist-cpp/client/train.manifest.template @@ -33,11 +33,15 @@ fs.mount.tmp.uri = "file:/var/tmp" sgx.thread_num = 8 sgx.debug = true sgx.nonpie_binary = true +sgx.enclave_size = "1G" + +sgx.allowed_files = [ + "file:/var/tmp/", +] sgx.trusted_files = [ "file:train", "file:/app/data/", - "file:/var/tmp/", "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", "file:/lib/x86_64-linux-gnu/", diff --git a/mnist-cpp/client/validate.manifest.template b/mnist-cpp/client/validate.manifest.template index eaed62a..d2d695d 100644 --- a/mnist-cpp/client/validate.manifest.template +++ b/mnist-cpp/client/validate.manifest.template @@ -33,11 +33,15 @@ fs.mount.tmp.uri = "file:/var/tmp" sgx.thread_num = 8 sgx.debug = true sgx.nonpie_binary = true +sgx.enclave_size = "1G" + +sgx.allowed_files = [ + "file:/var/tmp/", +] sgx.trusted_files = [ "file:validate", "file:/app/data/", - "file:/var/tmp/", "file:/opt/torch/libtorch/lib/", "file:/usr/lib/x86_64-linux-gnu/", "file:/lib/x86_64-linux-gnu/", From ed7af23ca47171dc49cc00fe9d421bcbc2273f6f Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Tue, 8 Feb 2022 15:38:26 +0000 Subject: [PATCH 61/66] increasing threads --- mnist-cpp/client/train.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index 81aeabe..4764c49 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -13,7 +13,7 @@ python helper.py np2pt "$model_in" "$model_in_name".pt export N_SPLITS=$(sudo docker ps --format "{{ .Names }}" | grep client | wc -l) SPLIT=$(sudo docker ps | grep $(hostname) | awk '{print substr($NF, length($NF), length($NF))}') export SPLIT=$(($SPLIT - 1)) -export OMP_NUM_THREADS=2 +export OMP_NUM_THREADS=4 $LOADER ./train "$model_in_name".pt "$model_in_name".retrain.pt # Convert pt to npz From d5b41f8ee49ae1f0f126df7d42811a447c4417b0 Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 9 Feb 2022 13:58:26 +0100 Subject: [PATCH 62/66] bumpi gramine --- mnist-cpp/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mnist-cpp/Dockerfile b/mnist-cpp/Dockerfile index 80ad0d9..5821fd0 100644 --- a/mnist-cpp/Dockerfile +++ b/mnist-cpp/Dockerfile @@ -11,7 +11,7 @@ ARG CONDA_VERSION=4.9.2 ARG COMPOSE_VERSION=1.29.2 ARG SGX_SDK_VERSION=2.15.101.1 ARG SGX_PSW_VERSION=2.15.1 -ARG GRAMINE_HEAD=c9d75597b3586cddcef1604bab58d4780586a71c +ARG GRAMINE_HEAD=v1.1 # Non-root user with sudo access ARG USERNAME=default From 31066524d649c577b0da5ae02032905c8bf85f4e Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 9 Feb 2022 14:24:30 +0000 Subject: [PATCH 63/66] training in enclave working --- mnist-cpp/bin/build.sh | 15 --------------- mnist-cpp/client/sgx-setup.sh | 10 ++++++++++ mnist-cpp/client/train.sh | 11 ++++++++--- mnist-cpp/client/validate.sh | 9 +++++++-- 4 files changed, 25 insertions(+), 20 deletions(-) create mode 100755 mnist-cpp/client/sgx-setup.sh diff --git a/mnist-cpp/bin/build.sh b/mnist-cpp/bin/build.sh index 308b17d..7ee6b84 100755 --- a/mnist-cpp/bin/build.sh +++ b/mnist-cpp/bin/build.sh @@ -17,21 +17,6 @@ cmake --build $PWD/build --config Debug --target all -j $(nproc) -- # Copy binaries to the right folder cp build/train build/validate client -# Generate sig key if necessary -if [ ! -e "$HOME/.gramine/enclave-key.pem" ]; then - mkdir -p $HOME/.gramine - openssl genrsa -3 -out $HOME/.gramine/enclave-key.pem 3072 -fi - -# Generate SGX-related files -pushd client -for src in train validate; do - gramine-manifest -Dlog_level=debug $src.manifest.template $src.manifest - gramine-sgx-sign --key $HOME/.gramine/enclave-key.pem --manifest $src.manifest --output $src.manifest.sgx - gramine-sgx-get-token --output $src.token --sig $src.sig -done -popd - # Make package mkdir -p package tar -czvf package/package.tar.gz client diff --git a/mnist-cpp/client/sgx-setup.sh b/mnist-cpp/client/sgx-setup.sh new file mode 100755 index 0000000..79ddf0f --- /dev/null +++ b/mnist-cpp/client/sgx-setup.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Parse args +src=$1 + +# Generate SGX files +openssl genrsa -3 -out enclave-key.pem 3072 +gramine-manifest -Dlog_level=debug $src.manifest.template $src.manifest +gramine-sgx-sign --key enclave-key.pem --manifest $src.manifest --output $src.manifest.sgx +gramine-sgx-get-token --output $src.token --sig $src.sig \ No newline at end of file diff --git a/mnist-cpp/client/train.sh b/mnist-cpp/client/train.sh index 4764c49..b472526 100755 --- a/mnist-cpp/client/train.sh +++ b/mnist-cpp/client/train.sh @@ -6,15 +6,20 @@ model_in="$1" model_in_name="$(basename ${model_in%.*})" model_out="$2" +# Setup SGX if necessary +if [[ ! -f train.token && "$LOADER" == "gramine-sgx" ]]; then + ./sgx-setup.sh train +fi + # Convert npz to pt -python helper.py np2pt "$model_in" "$model_in_name".pt +python helper.py np2pt "$model_in" "$TMPDIR/$model_in_name".pt # Train export N_SPLITS=$(sudo docker ps --format "{{ .Names }}" | grep client | wc -l) SPLIT=$(sudo docker ps | grep $(hostname) | awk '{print substr($NF, length($NF), length($NF))}') export SPLIT=$(($SPLIT - 1)) export OMP_NUM_THREADS=4 -$LOADER ./train "$model_in_name".pt "$model_in_name".retrain.pt +$LOADER ./train "$TMPDIR/$model_in_name".pt "$TMPDIR/$model_in_name".retrain.pt # Convert pt to npz -python helper.py pt2np "$model_in_name".retrain.pt "$model_out" \ No newline at end of file +python helper.py pt2np "$TMPDIR/$model_in_name".retrain.pt "$model_out" \ No newline at end of file diff --git a/mnist-cpp/client/validate.sh b/mnist-cpp/client/validate.sh index 154c45e..e702b57 100755 --- a/mnist-cpp/client/validate.sh +++ b/mnist-cpp/client/validate.sh @@ -6,9 +6,14 @@ model_in="$1" model_in_name="$(basename ${model_in%.*})" json_out="$2" +# Setup SGX if necessary +if [[ ! -f validate.token && "$LOADER" == "gramine-sgx" ]]; then + ./sgx-setup.sh validate +fi + # Convert npz to pt -python helper.py np2pt "$model_in" "$model_in_name".pt +python helper.py np2pt "$model_in" "$TMPDIR/$model_in_name".pt # Validate export OMP_NUM_THREADS=2 -$LOADER ./validate "$model_in_name".pt "$json_out" \ No newline at end of file +$LOADER ./validate "$TMPDIR/$model_in_name".pt "$json_out" \ No newline at end of file From a38ccf0caf77aabc4291e7c63abcaec56bb4b08c Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 9 Feb 2022 14:57:30 +0000 Subject: [PATCH 64/66] add TEE to docs --- mnist-cpp/README.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mnist-cpp/README.md b/mnist-cpp/README.md index eaf0042..5a848f5 100644 --- a/mnist-cpp/README.md +++ b/mnist-cpp/README.md @@ -6,6 +6,7 @@ This is an example of the classic MNIST hand-written text recognition task using - [Table of Contents](#table-of-contents) - [Prerequisites](#prerequisites) - [Running the example](#running-the-example) + - [Running in Truested Execution Environment (TEE)](#running-in-truested-execution-environment-tee) - [Clean up](#clean-up) ## Prerequisites @@ -13,17 +14,17 @@ The only prerequisite to run this example is [Docker](https://www.docker.com). ## Running the example -Start by downloading the data: -``` -bin/download_data.sh -``` - Start the Docker environment: ``` bin/launch.sh ``` > This may take a few minutes. +Download the data: +``` +bin/download_data.sh +``` + Build the compute package and train the seed model: ``` bin/build.sh @@ -43,5 +44,11 @@ sudo docker-compose up -d --scale client=2 Finally, you can navigate again to https://localhost:8090 and start the experiment from the "control" tab. +## Running in Truested Execution Environment (TEE) +The compute package in this example supports running training and validation in [Intel SGX TEE](https://www.intel.com/content/www/us/en/developer/tools/software-guard-extensions/overview.html) via [Gramine](https://grapheneproject.io). The code was tested using [Azure Confidential Computing](https://azure.microsoft.com/en-us/solutions/confidential-compute). To enable this running mode, after starting the development container with `bin/launch.sh` you can run: +``` +echo "LOADER=gramine-sgx" >> .env +``` + ## Clean up To clean up you can run: `sudo docker-compose down`. To exit the Docker environment simply run `exit`. \ No newline at end of file From 8184be6dd732512c418966b5bc73d0be151e0e2e Mon Sep 17 00:00:00 2001 From: mcapuccini Date: Wed, 9 Feb 2022 16:25:16 +0100 Subject: [PATCH 65/66] improve guide --- mnist-cpp/README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mnist-cpp/README.md b/mnist-cpp/README.md index 5a848f5..2232391 100644 --- a/mnist-cpp/README.md +++ b/mnist-cpp/README.md @@ -6,8 +6,8 @@ This is an example of the classic MNIST hand-written text recognition task using - [Table of Contents](#table-of-contents) - [Prerequisites](#prerequisites) - [Running the example](#running-the-example) - - [Running in Truested Execution Environment (TEE)](#running-in-truested-execution-environment-tee) - [Clean up](#clean-up) + - [Running in Truested Execution Environment (TEE)](#running-in-truested-execution-environment-tee) ## Prerequisites The only prerequisite to run this example is [Docker](https://www.docker.com). @@ -44,11 +44,8 @@ sudo docker-compose up -d --scale client=2 Finally, you can navigate again to https://localhost:8090 and start the experiment from the "control" tab. -## Running in Truested Execution Environment (TEE) -The compute package in this example supports running training and validation in [Intel SGX TEE](https://www.intel.com/content/www/us/en/developer/tools/software-guard-extensions/overview.html) via [Gramine](https://grapheneproject.io). The code was tested using [Azure Confidential Computing](https://azure.microsoft.com/en-us/solutions/confidential-compute). To enable this running mode, after starting the development container with `bin/launch.sh` you can run: -``` -echo "LOADER=gramine-sgx" >> .env -``` - ## Clean up -To clean up you can run: `sudo docker-compose down`. To exit the Docker environment simply run `exit`. \ No newline at end of file +To clean up you can run: `sudo docker-compose down`. To exit the Docker environment simply run `exit`. + +## Running in Truested Execution Environment (TEE) +The compute package in this example supports running training and validation in [Intel SGX TEE](https://www.intel.com/content/www/us/en/developer/tools/software-guard-extensions/overview.html) via [Gramine](https://grapheneproject.io). The code was tested using [Azure Confidential Computing](https://azure.microsoft.com/en-us/solutions/confidential-compute). To enable this running mode, after starting the development container with `bin/launch.sh` you can run: `echo "LOADER=gramine-sgx" >> .env` and repeat all of the subsequent seps. \ No newline at end of file From 738097b95729cc6e45ac418f6c8c387bdbbf56db Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 9 Feb 2022 15:27:26 +0000 Subject: [PATCH 66/66] minor fix --- mnist-cpp/bin/launch.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mnist-cpp/bin/launch.sh b/mnist-cpp/bin/launch.sh index bb57188..28c6b2a 100755 --- a/mnist-cpp/bin/launch.sh +++ b/mnist-cpp/bin/launch.sh @@ -13,6 +13,7 @@ docker build \ # Run if [ -z "$HOST_DATA_DIR" ]; then HOST_DATA_DIR="$PWD/data"; fi if [ -z "$HOST_WRKSPC_DIR" ]; then HOST_WRKSPC_DIR="$PWD"; fi +mkdir -p ./data docker run --rm -it \ -v "$HOST_WRKSPC_DIR:/mnist-cpp" -w /mnist-cpp \ -v /var/run/docker.sock:/var/run/docker.sock \ @@ -20,4 +21,4 @@ docker run --rm -it \ --net=host \ -u $USERNAME \ local/mnist-cpp \ - /bin/bash -c "echo HOST_DATA_DIR=$HOST_DATA_DIR > .env; /bin/bash" \ No newline at end of file + /bin/bash -c "echo HOST_DATA_DIR=$HOST_DATA_DIR > .env; /bin/bash"