diff --git a/.github/workflows/interactive-kvasir.yml b/.github/workflows/interactive-kvasir.yml deleted file mode 100644 index 12b38d618f..0000000000 --- a/.github/workflows/interactive-kvasir.yml +++ /dev/null @@ -1,36 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Interactive API - Pytorch Kvasir UNet - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - -permissions: - contents: read - -jobs: - build: - if: github.event.pull_request.draft == false - strategy: - matrix: - os: ['ubuntu-latest', 'windows-latest'] - runs-on: ${{ matrix.os }} - - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v3 - with: - python-version: "3.8" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install . - - name: Interactive API - pytorch_kvasir_unet - run: | - python setup.py build_grpc - pip install torch==1.13.1 - pip install torchvision==0.14.1 - python -m tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.run diff --git a/.github/workflows/interactive-tensorflow.yml b/.github/workflows/interactive-tensorflow.yml deleted file mode 100644 index 8ff870b09f..0000000000 --- a/.github/workflows/interactive-tensorflow.yml +++ /dev/null @@ -1,32 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Interactive API - Tensorflow MNIST - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - -permissions: - contents: read - -jobs: - build: - if: github.event.pull_request.draft == false - runs-on: ubuntu-latest # Add Windows support after https://github.com/keras-team/keras/issues/16308 is merged - - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v3 - with: - python-version: "3.8" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install . - - name: Interactive API - tensorflow_mnist - run: | - python setup.py build_grpc - pip install tensorflow==2.13 - python -m tests.github.interactive_api_director.experiments.tensorflow_mnist.run diff --git a/.github/workflows/pki.yml b/.github/workflows/pki.yml deleted file mode 100644 index e8b650e31a..0000000000 --- a/.github/workflows/pki.yml +++ /dev/null @@ -1,35 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Private Key Infrastructure - -on: - workflow_call: - workflow_dispatch: - -permissions: - contents: read - -env: - # A workaround for long FQDN names provided by GitHub actions. - FQDN: "localhost" - -jobs: - test_insecure_client: - if: github.event.pull_request.draft == false - runs-on: ubuntu-latest - timeout-minutes: 15 - - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install . - - name: Test PKI - run: | - python tests/github/pki_insecure_client.py diff --git a/.github/workflows/pr_pipeline.yml b/.github/workflows/pr_pipeline.yml index 3c56db1af7..a910031f9e 100644 --- a/.github/workflows/pr_pipeline.yml +++ b/.github/workflows/pr_pipeline.yml @@ -43,10 +43,6 @@ jobs: name: Hadolint Security Scan uses: ./.github/workflows/hadolint.yml - private_key_infrastructure: - name: Private Key Infrastructure - uses: ./.github/workflows/pki.yml - pytest_coverage: name: Pytest and code coverage uses: ./.github/workflows/pytest_coverage.yml diff --git a/Jenkinsfile b/Jenkinsfile index ddd2ea3e2c..5bdb50bac0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,20 +10,6 @@ def snykData = [ 'openfl-workspace_torch_cnn_mnist': 'openfl-workspace/torch/mnist/requirements.txt', 'openfl-workspace_torch_unet_kvasir': 'openfl-workspace/torch/unet_kvasir/requirements.txt', 'openfl-workspace_keras_cnn_mnist': 'openfl-workspace/keras/mnist/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_medmnist_2d_envoy': 'openfl-tutorials/interactive_api/PyTorch_MedMNIST_2D/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_dogscats_vit_workspace': 'openfl-tutorials/interactive_api/PyTorch_DogsCats_ViT/workspace/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_histology_envoy': 'openfl-tutorials/interactive_api/PyTorch_Histology/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_mxnet_landmarks_workspace': 'openfl-tutorials/interactive_api/MXNet_landmarks/workspace/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_histology_fedcurv_envoy': 'openfl-tutorials/interactive_api/PyTorch_Histology_FedCurv/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_tensorflow_word_prediction_workspace': 'openfl-tutorials/interactive_api/Tensorflow_Word_Prediction/workspace/requirements.txt', - 'openfl-tutorials_interactive_api_jax_linear_regression_envoy': 'openfl-tutorials/interactive_api/jax_linear_regression/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_medmnist_3d_envoy': 'openfl-tutorials/interactive_api/PyTorch_MedMNIST_3D/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_numpy_linear_regression_workspace': 'openfl-tutorials/interactive_api/numpy_linear_regression/workspace/requirements.txt', - 'openfl-tutorials_interactive_api_numpy_linear_regression_envoy': 'openfl-tutorials/interactive_api/numpy_linear_regression/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_market_re-id_workspace': 'openfl-tutorials/interactive_api/PyTorch_Market_Re-ID/workspace/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_market_re-id_envoy': 'openfl-tutorials/interactive_api/PyTorch_Market_Re-ID/envoy/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_tinyimagenet_workspace': 'openfl-tutorials/interactive_api/PyTorch_TinyImageNet/workspace/requirements.txt', - 'openfl-tutorials_interactive_api_pytorch_tinyimagenet_envoy': 'openfl-tutorials/interactive_api/PyTorch_TinyImageNet/envoy/requirements.txt' ] pipeline { agent { diff --git a/docs/about/features.rst b/docs/about/features.rst index bd3e05581d..b632cbd1f3 100644 --- a/docs/about/features.rst +++ b/docs/about/features.rst @@ -19,15 +19,6 @@ Task Runner features_index/taskrunner -Interactive (Deprecated) - Setup long-lived components to run many experiments in series. Recommended for FL research when many changes to model, dataloader, or hyperparameters are expected. - Formerly known as the director-based workflow. - For more info see :doc:`features_index/interactive` - - .. toctree:: - :hidden: - - features_index/interactive Workflow Interface (Experimental) Formulate the experiment as a series of tasks, or a flow. Every flow begins with the start task and concludes with end. diff --git a/docs/about/features_index/interactive.rst b/docs/about/features_index/interactive.rst deleted file mode 100644 index 3cf05571d5..0000000000 --- a/docs/about/features_index/interactive.rst +++ /dev/null @@ -1,673 +0,0 @@ -.. # Copyright (C) 2020-2023 Intel Corporation -.. # SPDX-License-Identifier: Apache-2.0 - -.. _running_interactive: - -============================ -Interactive API (Deprecated) -============================ - -A director-based workflow uses long-lived components in a federation. These components continue to be available to distribute more experiments in the federation. - -- The *Director* is the central node of the federation. This component starts an *Aggregator* for each experiment, sends data to connected collaborator nodes, and provides updates on the status. -- The *Envoy* runs on collaborator nodes connected to the *Director*. When the *Director* starts an experiment, the *Envoy* starts the *Collaborator* to train the global model. - - -The director-based workflow comprises the following roles and their tasks: - - - `Director Manager: Set Up the Director`_ - - `Collaborator Manager: Set Up the Envoy`_ - - `Experiment Manager: Describe an Experiment`_ - -Follow the procedure in the director-based workflow to become familiar with the setup required and APIs provided for each role in the federation: *Experiment manager (Data scientist)*, *Director manager*, and *Collaborator manager*. - -- *Experiment manager* (or Data scientist) is a person or group of people using OpenFL. -- *Director Manager* is ML model creator's representative controlling Director. -- *Collaborator manager* is Data owner's representative controlling Envoy. - -.. note:: - The Open Federated Learning (OpenFL) interactive Python API enables the Experiment manager (data scientists) to define and start a federated learning experiment from a single entry point: a Jupyter\*\ notebook or a Python\*\ script. - - See `Interactive Python API (Beta)`_ for details. - -An overview of this workflow is shown below. - -.. figure:: ../../source/openfl/director_workflow.svg - -.. centered:: Overview of the Director-Based Workflow - - -.. # Copyright (C) 2020-2023 Intel Corporation -.. # SPDX-License-Identifier: Apache-2.0 - - -.. _establishing_federation_director: - -Director Manager: Set Up the Director -------------------------------------- - -The *Director manager* sets up the *Director*, which is the central node of the federation. - - - :ref:`plan_agreement_director` - - :ref:`optional_step_create_pki_using_step_ca` - - :ref:`step0_install_director_prerequisites` - - :ref:`step1_start_the_director` - -.. _plan_agreement_director: - -OPTIONAL STEP: Director's Plan Agreement -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In order to carry out a secure federation, the Director must approve the FL Plan before starting the experiment. This check could be enforced with the use of the setting :code:`review_experiment: True` in director config. Refer to **director_config_review_exp.yaml** file under **PyTorch_Histology** interactive API example. -After the Director approves the experiment, it starts the aggregator and sends the experiment archive to all the participanting Envoys for review. -On the other hand, if the Director rejects the experiment, the experiment is aborted right away, no aggregator is started and the Envoys don't receive the experiment archive at all. - -.. _optional_step_create_pki_using_step_ca: - -OPTIONAL STEP: Create PKI Certificates Using Step-CA -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The use of mutual Transport Layer Security (mTLS) is recommended for deployments in untrusted environments to establish participant identity and to encrypt communication. You may either import certificates provided by your organization or generate certificates with the :ref:`semi-automatic PKI ` provided by OpenFL. - -.. _step0_install_director_prerequisites: - -STEP 1: Install Open Federated Learning (OpenFL) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Install OpenFL in a virtual Python\*\ environment. See :ref:`installation` for details. - -.. _step1_start_the_director: - -STEP 2: Start the Director -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Start the Director on a node with at least two open ports. See :ref:`openfl_ll_components` to learn more about the Director entity. - -1. Create a Director workspace with a default config file. - - .. code-block:: shell - - $ fx director create-workspace -p path/to/director_workspace_dir - - This workspace will contain received experiments and supplementary files (Director config file and certificates). - -2. Modify the Director config file according to your federation setup. - - The default config file contains the Director node FQDN, an open port, path of certificates, and :code:`sample_shape` and :code:`target_shape` fields with string representation of the unified data interface in the federation. - -3. Start the Director. - - If mTLS protection is not set up, run this command. - - .. code-block:: shell - - $ fx director start --disable-tls -c director_config.yaml - - If you have a federation with PKI certificates, run this command. - - .. code-block:: shell - - $ fx director start -c director_config.yaml \ - -rc cert/root_ca.crt \ - -pk cert/priv.key \ - -oc cert/open.crt - - - -.. _establishing_federation_envoy: - -Collaborator Manager: Set Up the Envoy --------------------------------------- - -The *Collaborator manager* sets up the *Envoys*, which are long-lived components on collaborator nodes. When started, Envoys will try to connect to the Director. Envoys receive an experiment archive and provide access to local data. - - - :ref:`plan_agreement_envoy` - - :ref:`optional_step_sign_pki_envoy` - - :ref:`step0_install_envoy_prerequisites` - - :ref:`step1_start_the_envoy` - -.. _plan_agreement_envoy: - -OPTIONAL STEP: Envoy's Plan Agreement -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In order to carry out a secure federation, each of the Envoys must approve the experiment before it is started, after the Director's approval. This check could be enforced with the use of the parameter :code:`review_experiment: True` in envoy config. Refer to **envoy_config_review_exp.yaml** file under **PyTorch_Histology** interactive API example. -If any of the Envoys rejects the experiment, a :code:`set_experiment_failed` request is sent to the Director to stop the aggregator. - -.. _optional_step_sign_pki_envoy: - -OPTIONAL STEP: Sign PKI Certificates (Optional) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The use of mTLS is recommended for deployments in untrusted environments to establish participant identity and to encrypt communication. You may either import certificates provided by your organization or use the :ref:`semi-automatic PKI certificate ` provided by OpenFL. - - -.. _step0_install_envoy_prerequisites: - -STEP 1: Install OpenFL -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Install OpenFL in a Python\*\ virtual environment. See :ref:`installation` for details. - - -.. _step1_start_the_envoy: - -STEP 2: Start the Envoy -^^^^^^^^^^^^^^^^^^^^^^^ - -1. Create an Envoy workspace with a default config file and shard descriptor Python\*\ script. - - .. code-block:: shell - - $ fx envoy create-workspace -p path/to/envoy_workspace_dir - -2. Modify the Envoy config file and local shard descriptor template. - - - Provide the settings field with the arbitrary settings required to initialize the shard descriptor. - - Complete the shard descriptor template field with the address of the local shard descriptor class. - - .. note:: - The shard descriptor is an object to provide a unified data interface for FL experiments. - The shard descriptor implements :code:`get_dataset()` method as well as several additional - methods to access **sample shape**, **target shape**, and **shard description** that may be used to identify - participants during experiment definition and execution. - - :code:`get_dataset()` method accepts the dataset_type (for instance train, validation, query, gallery) and returns - an iterable object with samples and targets. - - User's implementation of ShardDescriptor should be inherented from :code:`openfl.interface.interactive_api.shard_descriptor.ShardDescriptor`. It should implement :code:`get_dataset`, :code:`sample_shape` and :code:`target_shape` methods to describe the way data samples and labels will be loaded from disk during training. - -3. Start the Envoy. - - If mTLS protection is not set up, run this command. - - .. code-block:: shell - - ENVOY_NAME=envoy_example_name - - $ fx envoy start \ - -n "$ENVOY_NAME" \ - --disable-tls \ - --envoy-config-path envoy_config.yaml \ - -dh director_fqdn \ - -dp port - - If you have a federation with PKI certificates, run this command. - - .. code-block:: shell - - $ ENVOY_NAME=envoy_example_name - - $ fx envoy start \ - -n "$ENVOY_NAME" \ - --envoy-config-path envoy_config.yaml \ - -dh director_fqdn \ - -dp port \ - -rc cert/root_ca.crt \ - -pk cert/"$ENVOY_NAME".key \ - -oc cert/"$ENVOY_NAME".crt - - -.. _establishing_federation_experiment_manager: - -Experiment Manager: Describe an Experiment ------------------------------------------- - -The process of defining an experiment is decoupled from the process of establishing a federation. -The Experiment manager (or data scientist) is able to prepare an experiment in a Python environment. -Then the Experiment manager registers experiments into the federation using `Interactive Python API (Beta)`_ -that is allow to communicate with the Director using a gRPC client. - - -.. _interactive_python_api: - -Interactive Python API (Beta) ------------------------------ - -The Open Federated Learning (OpenFL) interactive Python API enables the Experiment manager (data scientists) to define and start a federated learning experiment from a single entry point: a Jupyter\*\ notebook or a Python script. - - - `Prerequisites`_ - - `Define a Federated Learning Experiment`_ - - `Federation API`_ - - `Experiment API`_ - - `Start an FL Experiment`_ - - -.. _prerequisites: - -Prerequisites -^^^^^^^^^^^^^ - -The Experiment manager requires the following: - -Python Intepreter - Create a virtual Python environment with packages required for conducting the experiment. The Python environment is replicated on collaborator nodes. - -A Local Experiment Workspace - Initialize a workspace by creating an empty directory and placing inside the workspace a Jupyter\*\ notebook or a Python script. - - Items in the workspace may include: - - - source code of objects imported into the notebook from local modules - - local test data stored in a **data** directory - - certificates stored in a **cert** directory - - .. note:: - - This workspace will be archived and transferred to collaborator nodes. Ensure only relevant source code or resources are stored in the workspace. - **data** and **cert** directories will not be included in the archive. - - -.. _federation_api_define_fl_experiment: - -Define a Federated Learning Experiment -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The definition process of a federated learning experiment uses the interactive Python API to set up several interface entities and experiment parameters. - -The following are the interactive Python API to define an experiment: - - - `Federation API`_ - - `Experiment API`_ - - `Start an FL Experiment`_ - - `Observe the Experiment Execution`_ - -.. note:: - Each federation is bound to some Machine Learning problem in a sense that all collaborators dataset shards should allow to solve the same data science problem. - For example object detection and semantic segmentation problems should be solved in different federations. \ - - -.. _federation_api: - -Federation API -"""""""""""""" - -The *Federation* entity is designed to be a bridge between a notebook and *Director*. - - -1. Import the Federation class from openfl package - - .. code-block:: python - - from openfl.interface.interactive_api.federation import Federation - - -2. Initialize the Federation object with the Director node network address and encryption settings. - - .. code-block:: python - - federation = Federation( - client_id: str, director_node_fqdn: str, director_port: str - tls: bool, cert_chain: str, api_cert: str, api_private_key: str) - - .. note:: - You may disable mTLS in trusted environments or enable mTLS by providing paths to the certificate chain of the API authority, aggregator certificate, and a private key. - - -.. note:: - Methods available in the Federation API: - - - :code:`get_dummy_shard_descriptor`: creates a dummy shard descriptor for debugging the experiment pipeline - - :code:`get_shard_registry`: returns information about the Envoys connected to the Director and their shard descriptors - -.. _experiment_api: - -Experiment API -"""""""""""""" - -The *Experiment* entity registers training-related objects, federated learning (FL) tasks, and settings. - -1. Import the FLExperiment class from openfl package - - .. code-block:: python - - from openfl.interface.interactive_api.experiment import FLExperiment - -2. Initialize the experiment with the following parameters: a federation object and a unique experiment name. - - .. code-block:: python - - fl_experiment = FLExperiment(federation: Federation, experiment_name: str) - -3. Import these supplementary interface classes: :code:`TaskInterface`, :code:`DataInterface`, and :code:`ModelInterface`. - - .. code-block:: python - - from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface - - -.. _experiment_api_modelinterface: - -Register the Model and Optimizer ( :code:`ModelInterface` ) - -Instantiate and initialize a model and optimizer in your preferred deep learning framework. - - .. code-block:: python - - from openfl.interface.interactive_api.experiment import ModelInterface - MI = ModelInterface(model, optimizer, framework_plugin: str) - -The initialized model and optimizer objects should be passed to the :code:`ModelInterface` along with the path to correct Framework Adapter plugin inside the OpenFL package -or from local workspace. - -.. note:: - The OpenFL interactive API supports *TensorFlow* and *PyTorch* frameworks via existing plugins. - User can add support for other deep learning frameworks via the plugin interface and point to your implementation of a :code:`framework_plugin` in :code:`ModelInterface`. - - -.. _experiment_api_taskinterface: - -Register FL Tasks ( :code:`TaskInterface` ) - -An FL task accepts the following objects: - - - :code:`model` - will be rebuilt with relevant weights for every task by `TaskRunner` - - :code:`data_loader` - data loader that will provide local data - - :code:`device` - a device to be used for execution on collaborator machines - - :code:`optimizer` (optional) - model optimizer; only for training tasks - -Register an FL task and accompanying information. - - .. code-block:: python - - TI = TaskInterface() - - task_settings = { - 'batch_size': 32, - 'some_arg': 228, - } - @TI.add_kwargs(**task_settings) - @TI.register_fl_task(model='my_model', data_loader='train_loader', - device='device', optimizer='my_Adam_opt') - def foo(my_model, train_loader, my_Adam_opt, device, batch_size, some_arg=356): - # training or validation logic - ... - -FL tasks return a dictionary object with metrics: :code:`{metric name: metric value for this task}`. - -.. note:: - The OpenFL interactive API currently allows registering only standalone functions defined in the main module or imported from other modules inside the workspace. - - The :code:`TaskInterface` class must be instantiated before you can use its methods to register FL tasks. - - - :code:`@TI.register_fl_task()` needs tasks argument names for :code:`model`, :code:`data_loader`, :code:`device` , and :code:`optimizer` (optional) that constitute a *task contract*. This method adds the callable and the task contract to the task registry. - - :code:`@TI.add_kwargs()` should be used to set up arguments that are not included in the contract. - - -.. _experiment_api_datainterface: - -Register Federated Data Loader ( :code:`DataInterface` ) - -A *shard descriptor* defines how to read and format the local data. Therefore, the *data loader* contains the batching and augmenting data logic, which are common for all collaborators. - -Subclass :code:`DataInterface` and implement the following methods. - - .. code-block:: python - - class CustomDataLoader(DataInterface): - def __init__(self, **kwargs): - # Initialize superclass with kwargs: this array will be passed - # to get_data_loader methods - super().__init__(**kwargs) - # Set up augmentation, save required parameters, - # use it as you regular dataset class - validation_fraction = kwargs.get('validation_fraction', 0.5) - ... - - @property - def shard_descriptor(self): - return self._shard_descriptor - - @shard_descriptor.setter - def shard_descriptor(self, shard_descriptor): - self._shard_descriptor = shard_descriptor - # You can implement data splitting logic here - # Or update your data set according to local Shard Descriptor atributes if required - - def get_train_loader(self, **kwargs): - # these are the same kwargs you provided to __init__, - # But passed on a collaborator machine - bs = kwargs.get('train_batch_size', 32) - return foo_loader() - - # so on, see the full list of methods below - - -The following are shard descriptor setter and getter methods: - - - :code:`shard_descriptor(self, shard_descriptor)` is called during the *Collaborator* initialization procedure with the local shard descriptor. Include in this method any logic that is triggered with the shard descriptor replacement. - - :code:`get_train_loader(self, **kwargs)` is called before the execution of training tasks. This method returns the outcome of the training task according to the :code:`data_loader` contract argument. The :code:`kwargs` dict returns the same information that was provided during the :code:`DataInterface` initialization. - - :code:`get_valid_loader(self, **kwargs)` is called before the execution of validation tasks. This method returns the outcome of the validation task according to the :code:`data_loader` contract argument. The :code:`kwargs` dict returns the same information that was provided during the :code:`DataInterface` initialization. - - :code:`get_train_data_size(self)` returns the number of samples in the local dataset for training. Use the information provided by the shard descriptor to determine how to split your training and validation tasks. - - :code:`get_valid_data_size(self)` returns the number of samples in the local dataset for validation. - - -.. note:: - - - The *User Dataset* class should be instantiated to pass further to the *Experiment* object. - - Dummy *shard descriptor* (or a custom local one) may be set up to test the augmentation or batching pipeline. - - Keyword arguments used during initialization on the frontend node may be used during dataloaders construction on collaborator machines. - - - -.. _federation_api_start_fl_experiment: - -Start an FL Experiment -^^^^^^^^^^^^^^^^^^^^^^ - -Use the Experiment API to prepare a workspace archive to transfer to the *Director*. - - .. code-block:: python - - FLExperiment.start() - - .. note:: - Instances of interface classes :code:`(TaskInterface, DataInterface, ModelInterface)` must be passed to :code:`FLExperiment.start()` method along with other parameters. - - This method: - - - Compiles all provided settings to a Plan object. The Plan is the central place where all actors in federation look up their parameters. - - Saves **plan.yaml** to the :code:`plan` folder inside the workspace. - - Serializes interface objects on the disk. - - Prepares **requirements.txt** for remote Python environment setup. - - Compresses the whole workspace to an archive. - - Sends the experiment archive to the *Director* so it may distribute the archive across the federation and start the *Aggregator*. - -FLExperiment :code:`start()` Method Parameters -"""""""""""""""""""""""""""""""""""""""""""""" - -The following are parameters of the :code:`start()` method in FLExperiment: - -:code:`model_provider` - This parameter is defined earlier by the :code:`ModelInterface` object. - -:code:`task_keeper` - This parameter is defined earlier by the :code:`TaskInterface` object. - -:code:`data_loader` - This parameter is defined earlier by the :code:`DataInterface` object. - -:code:`task_assigner` - This parameter is optional. You can pass a `Custom task assigner function`_. - -:code:`rounds_to_train` - This parameter defines the number of aggregation rounds needed to be conducted before the experiment is considered finished. - -:code:`delta_updates` - This parameter sets up the aggregation to use calculated gradients instead of model checkpoints. - -:code:`opt_treatment` - This parameter defines the optimizer state treatment in the federation. The following are available values: - - - **RESET**: the optimizer state is initialized each round from noise - - **CONTINUE_LOCAL**: the optimizer state will be reused locally by every collaborator - - **CONTINUE_GLOBAL**: the optimizer's state will be aggregated - -:code:`device_assignment_policy` - The following are available values: - - - **CPU_ONLY**: the :code:`device` parameter (which is a part of a task contract) that is passed to an FL task each round will be **cpu** - - **CUDA_PREFFERED**: the :code:`device` parameter will be **cuda:{index}** if CUDA devices are enabled in the Envoy config and **cpu** otherwise. - - -.. _federation_api_observe_fl_experiment: - -Observe the Experiment Execution -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If the experiment was accepted by the *Director*, you can oversee its execution with the :code:`FLexperiment.stream_metrics()` method. This method prints metrics from the FL tasks (and saves TensorBoard logs). - -.. _federation_api_get_fl_experiment_status: - -Get Experiment Status -^^^^^^^^^^^^^^^^^^^^^ - -You can get the current experiment status with the :code:`FLexperiment.get_experiment_status()` method. The status could be pending, in progress, finished, rejected or failed. - -.. _federation_api_complete_fl_experiment: - -Complete the Experiment -^^^^^^^^^^^^^^^^^^^^^^^ - -When the experiment has completed: - - - retrieve trained models in the native format using :code:`FLexperiment.get_best_model()` and :code:`FLexperiment.get_last_model()`. - - erase experiment artifacts from the Director with :code:`FLexperiment.remove_experiment_data()`. - - -You may use the same federation object to report another experiment or even schedule several experiments that will be executed in series. - -Custom task assigner function -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -OpenFL has an entity named Task Assigner, that responsible for aggregator task assigning to collaborators. -There are three default tasks that are used: :code:`train`, :code:`locally_tuned_model_validate`, -:code:`aggregated_model_validate`. -When you register a train function and pass optimizer it generates a train task: - - .. code-block:: python - - task_keeper = TaskInterface() - - - @task_keeper.register_fl_task(model='net_model', data_loader='train_loader', - device='device', optimizer='optimizer') - def train(net_model, train_loader, optimizer, device, loss_fn=cross_entropy, some_parameter=None): - torch.manual_seed(0) - ... - -When you register a validate function, it generates two tasks: :code:`locally_tuned_model_validate` and -:code:`aggregated_model_validate`. -:code:`locally_tuned_model_validate` is applied by collaborator to locally trained model, -:code:`aggregated_model_validate` - to a globally aggregated model. -If there not a train task only aggregated_model_validate are generated. - -Since 1.3 version it is possible to create a custom task assigner function to implement your own task assigning logic. -You can get registered task from :code:`task_keeper` calling method :code:`get_registered_tasks`: - - .. code-block:: python - - tasks = task_keeper.get_registered_tasks() - - -And then implement your own assigner function: - - .. code-block:: python - - def random_assigner(collaborators, round_number, **kwargs): - """Assigning task groups randomly while ensuring target distribution""" - import random - random.shuffle(collaborators) - collaborator_task_map = {} - for idx, col in enumerate(collaborators): - # select only 70% collaborators for training and validation, 30% for validation - if (idx+1)/len(collaborators) <= 0.7: - collaborator_task_map[col] = tasks.values() # all three tasks - else: - collaborator_task_map[col] = [tasks['aggregated_model_validate']] - return collaborator_task_map - -And then pass that function to fl_experiment start method: - .. code-block:: python - - fl_experiment.start( - model_provider=model_interface, - task_keeper=task_keeper, - data_loader=fed_dataset, - task_assigner=random_assigner, - rounds_to_train=50, - opt_treatment='CONTINUE_GLOBAL', - device_assignment_policy='CUDA_PREFERRED' - ) - - -It will be passed to assigner and tasks will be assigned to collaborators by using this function. - -Another example. -If you want only exclude some collaborators from experiment, you can define next assigner function: - - .. code-block:: python - - def filter_assigner(collaborators, round_number, **kwargs): - collaborator_task_map = {} - exclude_collaborators = ['env_two', 'env_three'] - for collaborator_name in collaborators: - if collaborator_name in exclude_collaborators: - continue - collaborator_task_map[collaborator_name] = [ - tasks['train'], - tasks['locally_tuned_model_validate'], - tasks['aggregated_model_validate'] - ] - return collaborator_task_map - - -Also you can use static shard information to exclude any collaborators without cuda devices from training: - - .. code-block:: python - - shard_registry = federation.get_shard_registry() - def filter_by_shard_registry_assigner(collaborators, round_number, **kwargs): - collaborator_task_map = {} - for collaborator in collaborators: - col_status = shard_registry.get(collaborator) - if not col_status or not col_status['is_online']: - continue - node_info = col_status['shard_info'].node_info - # Assign train task if collaborator has GPU with total memory more that 8 GB - if len(node_info.cuda_devices) > 0 and node_info.cuda_devices[0].memory_total > 8 * 1024**3: - collaborator_task_map[collaborator] = [ - tasks['train'], - tasks['locally_tuned_model_validate'], - tasks['aggregated_model_validate'], - ] - else: - collaborator_task_map[collaborator] = [ - tasks['aggregated_model_validate'], - ] - return collaborator_task_map - - -Assigner with additional validation round: - - .. code-block:: python - - rounds_to_train = 3 - total_rounds = rounds_to_train + 1 # use fl_experiment.start(..., rounds_to_train=total_rounds,...) - - def assigner_with_last_round_validation(collaborators, round_number, **kwargs): - collaborator_task_map = {} - for collaborator in collaborators: - if round_number == total_rounds - 1: - collaborator_task_map[collaborator] = [ - tasks['aggregated_model_validate'], - ] - else: - collaborator_task_map[collaborator] = [ - tasks['train'], - tasks['locally_tuned_model_validate'], - tasks['aggregated_model_validate'] - ] - return collaborator_task_map - - -.. toctree -.. overview.how_can_intel_protect_federated_learning -.. overview.what_is_intel_federated_learning diff --git a/docs/about/features_index/taskrunner.rst b/docs/about/features_index/taskrunner.rst index 9deaa18add..86caeb59c0 100644 --- a/docs/about/features_index/taskrunner.rst +++ b/docs/about/features_index/taskrunner.rst @@ -108,10 +108,6 @@ Each task subsection contains the following: .. _running_the_federation_manual: -.. _interactive_api (Deprecated): - - - Bare Metal Approach ------------------- diff --git a/docs/about/features_index/workflowinterface.rst b/docs/about/features_index/workflowinterface.rst index 8ef440f4ce..4f567b4342 100644 --- a/docs/about/features_index/workflowinterface.rst +++ b/docs/about/features_index/workflowinterface.rst @@ -117,7 +117,7 @@ Prior interfaces in OpenFL support the standard horizontal FL training workflow: 4. The collaborator performs validation with their local validation dataset on their locally trained model, and sends their validation metrics to the aggregator (locally_tuned_model_validation task) 5. The aggregator applies an aggregation function (weighted average, FedCurv, FedProx, etc.) to the model weights, and reports the aggregate metrics. -The Task Assigner determines the list of collaborator tasks to be performed, and both in the task runner API as well as the interactive API these tasks can be modified (to varying degrees). For example, to perform federated evaluation of a model, only the aggregated_model_validation task would be selected for the assigner's block of the federated plan. Equivalently for the interactive API, this can be done by only registering a single validation task. But there are many other types of workflows that can't be easily represented purely by training / validation tasks performed on a collaborator with a single model. An example is training a Federated Generative Adversarial Network (GAN); because this may be represented by separate generative and discriminator models, and could leak information about a collaborator dataset, the interface we provide should allow for better control over what gets sent over the network and how. Another common request we get is for validation with an aggregator's dataset after training. Prior to OpenFL 1.5, there has not a great way to support this in OpenFL. +The Task Assigner determines the list of collaborator tasks to be performed, and these tasks can be modified to varying degrees. For example, to perform federated evaluation of a model, only the aggregated_model_validation task would be selected for the assigner's block of the federated plan. But there are many other types of workflows that can't be easily represented purely by training/validation tasks performed on a collaborator with a single model. An example is training a Federated Generative Adversarial Network (GAN); because this may be represented by separate generative and discriminator models, and could leak information about a collaborator dataset, the interface we provide should allow for better control over what gets sent over the network and how. Another common request we get is for validation with an aggregator's dataset after training. Prior to OpenFL 1.5, there has not been a great way to support this in OpenFL. Goals ===== diff --git a/docs/developer_guide/advanced_topics/overriding_agg_fn.rst b/docs/developer_guide/advanced_topics/overriding_agg_fn.rst index 253a437bf6..2befc76a6e 100644 --- a/docs/developer_guide/advanced_topics/overriding_agg_fn.rst +++ b/docs/developer_guide/advanced_topics/overriding_agg_fn.rst @@ -183,34 +183,6 @@ The following is an example of a **plan.yaml** with a modified aggregation funct - loss -Interactive API (Deprecated) -============================ -You can override aggregation function that will be used for the task this function corresponds to. -In order to do this, call the ``set_aggregation_function`` decorator method of ``TaskInterface`` and pass ``AggregationFunction`` subclass instance as a parameter. -For example, you can try: - -.. code-block:: python - - from openfl.interface.aggregation_functions import Median - TI = TaskInterface() - agg_fn = Median() - @TI.register_fl_task(model='model', data_loader='train_loader', \ - device='device', optimizer='optimizer') - @TI.set_aggregation_function(agg_fn) - -.. warning:: - All tasks with the same type of aggregation use the same class instance. - If ``AggregationFunction`` implementation has its own state, then this state will be shared across tasks. - - -``AggregationFunction`` requires a single ``call`` function. -This function receives tensors for a single parameter from multiple collaborators with additional metadata (see definition of :meth:`openfl.interface.aggregation_functions.core.AggregationFunction.call`) and returns a single tensor that represents the result of aggregation. - - -.. note:: - See the `definition `_ of :class:`openfl.interface.aggregation_functions.core.AggregationFunction.call` for details. - - Example of a Custom Aggregation Function ======================================== diff --git a/docs/developer_guide/structure/communication.rst b/docs/developer_guide/structure/communication.rst deleted file mode 100644 index d189a1cfb0..0000000000 --- a/docs/developer_guide/structure/communication.rst +++ /dev/null @@ -1,37 +0,0 @@ -.. # Copyright (C) 2020-2023 Intel Corporation -.. # SPDX-License-Identifier: Apache-2.0 - -.. _director_communications: - -*************************************** -Director Service Communication Diagrams -*************************************** - -The following diagrams depict existing procedure calls to the Director service. Included are interactions with the Director's inner representations to better understand their signatures. - -Director-Envoy Communication -============================ - -The following diagram depicts a typical process of establishing a Federation and registering an experiment. - -.. mermaid:: ../../mermaid/director_envoy.mmd - :caption: Basic Scenario of Director-Envoy Communication - :align: center - -Director Side Envoy Representation and Related Remote Procedure Calls -===================================================================== - -This diagram shows possible interactions with Envoy handles on the Director side. - -.. mermaid:: ../../mermaid/envoy_representation_and_RPCs.mmd - :caption: Communications Altering or Requesting Envoy-Related Information - :align: center - -Director Side Experiment Representation and Related Remote Procedure Calls -========================================================================== - -This diagram shows possible interactions with Experiment handles on the Director side. - -.. mermaid:: ../../mermaid/experiment_representation_and_RPCs.mmd - :caption: Communications Altering or Requesting Experiment-Related Information - :align: center diff --git a/docs/developer_guide/structure/components.rst b/docs/developer_guide/structure/components.rst index cc59e95949..f04376b3a4 100644 --- a/docs/developer_guide/structure/components.rst +++ b/docs/developer_guide/structure/components.rst @@ -10,8 +10,6 @@ Core Components Open Federated Learning (OpenFL) has the following components: - :ref:`openfl_short_lived_components` - - :ref:`openfl_ll_components` - .. _openfl_short_lived_components: @@ -60,54 +58,3 @@ Converting tensor objects is handled by :ref:`framework adapter ` (Deprecated). - - - The *Director* is the central node of the federation. This component starts an *Aggregator* for each experiment, broadcasts experiment archive to connected collaborator nodes, and provides updates on the status. - - The *Envoy* runs on collaborator nodes and is always connected to the *Director*. When the *Director* starts an experiment, the *Envoy* starts the *Collaborator* to train the global model. - -These components stay available to distribute several of experiments in the federation. - -.. _openfl_ll_components_director: - -Director --------- - -The Director is a long-lived entity and is the central node of the federation. It accepts connections from: - - - Frontend clients (data scientists using :ref:`interactive_python_api`) (Deprecated) - - Envoys, if their Shard Descriptors are complient to the same data interface - -The Director supports concurrent frontend connections. -While the Director may take in several experiments, the experiments are executed in series. - -When an experiment is reported, the Director starts an Aggregator and sends the experiment data to involved Envoys. -While an experiment is running, the Director oversees the Aggregator and delivers updates on the status of -the experiment, which includes trained model snapshots and metrics by request. - - -.. _openfl_ll_components_envoy: - -Envoy ------ - -The Envoy is a long-lived entity that runs on collaborator nodes connected to the Director. - -Every Envoy is matched to one `shard descriptor `_ -in order to run. When the Director starts an experiment, the Envoy accepts the experiment workspace, -prepares the environment, and starts a Collaborator. (Note this approach is deprecated) - -The envoy is also responsible for sending heartbeat messages to the Director. These messages may also include information -regarding collaborator machine resource utilization. Refer to :ref:`device monitor plugin ` for details. - - -Static Diagram -============== - -.. figure:: ../../images/director_workflow.svg diff --git a/docs/get_started/examples.rst b/docs/get_started/examples.rst index dcb172d233..6d51e42fe6 100644 --- a/docs/get_started/examples.rst +++ b/docs/get_started/examples.rst @@ -24,33 +24,6 @@ See :ref:`running_the_task_runner` :ref:`running_the_task_runner` -------------------------- -Python Native API (Deprecated) -------------------------- -Intended for quick simulation purposes - -See :ref:`python_native_pytorch_mnist` - -.. toctree:: - :hidden: - :maxdepth: 1 - - examples/python_native_pytorch_mnist - - ----------------------------- -Interactive API (Deprecated) ----------------------------- -Setup long-lived components to run many experiments - -See :ref:`interactive_tensorflow_mnist` - -.. toctree:: - :hidden: - :maxdepth: 1 - - examples/interactive_tensorflow_mnist - --------------------------------- Workflow Interface (Experimental) --------------------------------- diff --git a/docs/get_started/examples/interactive_tensorflow_mnist.rst b/docs/get_started/examples/interactive_tensorflow_mnist.rst deleted file mode 100644 index 191b4c6aca..0000000000 --- a/docs/get_started/examples/interactive_tensorflow_mnist.rst +++ /dev/null @@ -1,374 +0,0 @@ -.. # Copyright (C) 2020-2023 Intel Corporation -.. # SPDX-License-Identifier: Apache-2.0 - -.. _interactive_tensorflow_mnist: - -Interactive API (Deprecated): MNIST Classification Tutorial -=========================================================== - -In this tutorial, we will set up a federation and train a basic TensoFlow model on the MNIST dataset using the interactive API. -See `full tutorial `_. - -**About the dataset** - -It is a dataset of 60,000 small square 28x28 pixel grayscale images of handwritten single digits -between 0 and 9. More info at `wiki `_. - -.. note:: - - This tutorial will be run without TLS and will be done locally as a simulation - ------------------------------------ -Step 0: Installation ------------------------------------ -- If you haven't done so already, create a virtual environment, upgrade pip and install OpenFL (See :ref:`installation`) - ------------------------------------ -Step 1: Set up environment ------------------------------------ -Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) and activate the virtual environment created in Step 0 - -.. code-block:: shell - - $ source venv/bin/activate - -Clone the OpenFL repository: - -.. code-block:: shell - - $ git clone https://github.com/securefederatedai/openfl.git - - -Navigate to the tutorial: - -.. code-block:: shell - - $ cd openfl/openfl-tutorials/interactive_api/Tensorflow_MNIST - ------------------------------------ -Step 2: Setting up Director ------------------------------------ -In the first terminal, run the director: - -.. code-block:: shell - - $ cd director - $ ./start_director.sh - ------------------------------------ -Step 3: Setting up Envoy ------------------------------------ -In the second terminal, run the envoy: - -.. code-block:: shell - - $ cd envoy - $ ./start_envoy.sh env_one envoy_config_one.yaml - -Optional: Run a second envoy in an additional terminal: - -- Ensure steps 0 and 1 are complete for this terminal as well. - -- Run the second envoy: - -.. code-block:: shell - - $ cd envoy - $ ./start_envoy.sh env_two envoy_config_two.yaml - ------------------------------------ -Step 4: Run the federation ------------------------------------ -In the third terminal (or forth terminal, if you chose to do two envoys) run the `Tensorflow_MNIST.ipynb` Jupyter Notebook: - -.. code-block:: shell - - $ cd workspace - $ jupyter lab Tensorflow_MNIST.ipynb - - -**Notebook walkthrough:** - -Contents of this notebook can be found `here `_. - -Install additional dependencies if not already installed - -.. code-block:: shell - - $ pip install tensorflow==2.8 - -Import: - -.. code-block:: python - - import tensorflow as tf - print('TensorFlow', tf.__version__) - -Connect to the Federation - -Be sure to start Director and Envoy (Steps 2 and 3) before proceeding with this cell. - -This cell connects this notebook to the Federation. - -.. code-block:: python - - from openfl.interface.interactive_api.federation import Federation - - # please use the same identificator that was used in signed certificate - client_id = 'api' - cert_dir = 'cert' - director_node_fqdn = 'localhost' - director_port = 50051 - - # Run with TLS disabled (trusted environment) - - # Create a Federation - federation = Federation( - client_id=client_id, - director_node_fqdn=director_node_fqdn, - director_port=director_port, - tls=False - ) - -Query Datasets from Shard Registry - -.. code-block:: python - - shard_registry = federation.get_shard_registry() - shard_registry - -.. code-block:: python - - # First, request a dummy_shard_desc that holds information about the federated dataset - dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10) - dummy_shard_dataset = dummy_shard_desc.get_dataset('train') - sample, target = dummy_shard_dataset[0] - f"Sample shape: {sample.shape}, target shape: {target.shape}" - -Describing FL experiment - -.. code-block:: python - - from openfl.interface.interactive_api.experiment import TaskInterface - from openfl.interface.interactive_api.experiment import ModelInterface - from openfl.interface.interactive_api.experiment import FLExperiment - -Register model - -.. code-block:: python - - # Define model - model = tf.keras.Sequential([ - tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), - tf.keras.layers.MaxPooling2D((2, 2)), - tf.keras.layers.BatchNormalization(), - tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)), - tf.keras.layers.MaxPooling2D((2, 2)), - tf.keras.layers.BatchNormalization(), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(10, activation=None), - ], name='simplecnn') - model.summary() - - # Define optimizer - optimizer = tf.optimizers.Adam(learning_rate=1e-3) - - # Loss and metrics. These will be used later. - loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) - train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy() - val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy() - - # Create ModelInterface - framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin' - MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter) - -Register dataset - -.. code-block:: python - - import numpy as np - from tensorflow.keras.utils import Sequence - - from openfl.interface.interactive_api.experiment import DataInterface - - - class DataGenerator(Sequence): - - def __init__(self, shard_descriptor, batch_size): - self.shard_descriptor = shard_descriptor - self.batch_size = batch_size - self.indices = np.arange(len(shard_descriptor)) - self.on_epoch_end() - - def __len__(self): - return len(self.indices) // self.batch_size - - def __getitem__(self, index): - index = self.indices[index * self.batch_size:(index + 1) * self.batch_size] - batch = [self.indices[k] for k in index] - - X, y = self.shard_descriptor[batch] - return X, y - - def on_epoch_end(self): - np.random.shuffle(self.indices) - - - class MnistFedDataset(DataInterface): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - @property - def shard_descriptor(self): - return self._shard_descriptor - - @shard_descriptor.setter - def shard_descriptor(self, shard_descriptor): - """ - Describe per-collaborator procedures or sharding. - - This method will be called during a collaborator initialization. - Local shard_descriptor will be set by Envoy. - """ - self._shard_descriptor = shard_descriptor - - self.train_set = shard_descriptor.get_dataset('train') - self.valid_set = shard_descriptor.get_dataset('val') - - def __getitem__(self, index): - return self.shard_descriptor[index] - - def __len__(self): - return len(self.shard_descriptor) - - def get_train_loader(self): - """ - Output of this method will be provided to tasks with optimizer in contract - """ - if self.kwargs['train_bs']: - batch_size = self.kwargs['train_bs'] - else: - batch_size = 32 - return DataGenerator(self.train_set, batch_size=batch_size) - - def get_valid_loader(self): - """ - Output of this method will be provided to tasks without optimizer in contract - """ - if self.kwargs['valid_bs']: - batch_size = self.kwargs['valid_bs'] - else: - batch_size = 32 - - return DataGenerator(self.valid_set, batch_size=batch_size) - - def get_train_data_size(self): - """ - Information for aggregation - """ - - return len(self.train_set) - - def get_valid_data_size(self): - """ - Information for aggregation - """ - return len(self.valid_set) - -Create Mnist federated dataset - -.. code-block:: python - - fed_dataset = MnistFedDataset(train_bs=64, valid_bs=512) - -Define and register FL tasks - -.. code-block:: python - - import time - - TI = TaskInterface() - - # from openfl.interface.aggregation_functions import AdagradAdaptiveAggregation # Uncomment this lines to use - # agg_fn = AdagradAdaptiveAggregation(model_interface=MI, learning_rate=0.4) # Adaptive Federated Optimization - # @TI.set_aggregation_function(agg_fn) # alghorithm! - # # See details in the: - # # https://arxiv.org/abs/2003.00295 - - @TI.register_fl_task(model='model', data_loader='train_dataset', device='device', optimizer='optimizer') - def train(model, train_dataset, optimizer, device, loss_fn=loss_fn, warmup=False): - start_time = time.time() - - # Iterate over the batches of the dataset. - for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): - with tf.GradientTape() as tape: - logits = model(x_batch_train, training=True) - loss_value = loss_fn(y_batch_train, logits) - grads = tape.gradient(loss_value, model.trainable_weights) - optimizer.apply_gradients(zip(grads, model.trainable_weights)) - - # Update training metric. - train_acc_metric.update_state(y_batch_train, logits) - - # Log every 200 batches. - if step % 200 == 0: - print( - "Training loss (for one batch) at step %d: %.4f" - % (step, float(loss_value)) - ) - print("Seen so far: %d samples" % ((step + 1) * 64)) - if warmup: - break - - # Display metrics at the end of each epoch. - train_acc = train_acc_metric.result() - print("Training acc over epoch: %.4f" % (float(train_acc),)) - - # Reset training metrics at the end of each epoch - train_acc_metric.reset_states() - - - return {'train_acc': train_acc,} - - - @TI.register_fl_task(model='model', data_loader='val_dataset', device='device') - def validate(model, val_dataset, device): - # Run a validation loop at the end of each epoch. - for x_batch_val, y_batch_val in val_dataset: - val_logits = model(x_batch_val, training=False) - # Update val metrics - val_acc_metric.update_state(y_batch_val, val_logits) - val_acc = val_acc_metric.result() - val_acc_metric.reset_states() - print("Validation acc: %.4f" % (float(val_acc),)) - - return {'validation_accuracy': val_acc,} - -Time to start a federated learning experiment - -.. code-block:: python - - # create an experimnet in federation - experiment_name = 'mnist_experiment' - fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,serializer_plugin='openfl.plugins.interface_serializer.keras_seri - -.. code-block:: python - - # print the default federated learning plan - import openfl.native as fx - print(fx.get_plan(fl_plan=fl_experiment.plan)) - -.. code-block:: python - - # The following command zips the workspace and python requirements to be transfered to collaborator nodes - fl_experiment.start(model_provider=MI, - task_keeper=TI, - data_loader=fed_dataset, - rounds_to_train=5, - opt_treatment='CONTINUE_GLOBAL', - override_config={'aggregator.settings.db_store_rounds': 1, 'compression_pipeline.template': 'openfl.pipelines.KCPip - -.. code-block:: python - - fl_experiment.stream_metrics() diff --git a/docs/openfl.component.rst b/docs/openfl.component.rst index 7098477c8c..df105bf10d 100644 --- a/docs/openfl.component.rst +++ b/docs/openfl.component.rst @@ -13,7 +13,6 @@ aggregator assigner collaborator - director - envoy + straggler_handling_functions .. TODO(MasterSkepticista) Shrink API namespace diff --git a/docs/openfl.interface.rst b/docs/openfl.interface.rst index 1542f7a0df..576806e74f 100644 --- a/docs/openfl.interface.rst +++ b/docs/openfl.interface.rst @@ -15,8 +15,6 @@ cli_helper cli collaborator - director - envoy experimental model pki diff --git a/openfl-tutorials/README.md b/openfl-tutorials/README.md index 4a6db1d00b..1a39c2373c 100644 --- a/openfl-tutorials/README.md +++ b/openfl-tutorials/README.md @@ -1,6 +1,42 @@ -# **Welcome to the OpenFL Tutorials!** -## The **Director Workflow** tutorials can be found in the "interactive_api" folder. All other tutorials are for the **Aggregator Workflow**. +# Tutorials -
+These tutorials cover a range of frameworks, models, and datasets to help you get started with OpenFL. This directory provides notebooks for the [Workflow API](https://openfl.readthedocs.io/en/latest/about/features_index/workflowinterface.html), one of two ways to run Federated Learning experiments in OpenFL. -### The OpenFL tutorials cover a variety of frameworks, models, and datasets making them a great way to get started with OpenFL! +> [!NOTE] +> If you are looking for an enterprise-ready API with support for Trusted Execution Environments (TEEs), refer to the [TaskRunner API](https://openfl.readthedocs.io/en/latest/about/features_index/taskrunner.html), and the corresponding [quickstart](https://openfl.readthedocs.io/en/latest/tutorials/taskrunner.html) guide. + +## Getting Started + +Install OpenFL following the [installation guide](https://openfl.readthedocs.io/en/latest/installation.html) and activate workflow API. + +```bash +fx experimental activate +``` + +### Beginner + +- [MNIST](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb): An introductory guide to Workflow Interface for federated learning with a small PyTorch CNN model on the MNIST dataset. +- [Aggregator Validation](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/102_Aggregator_Validation.ipynb): Shows how to perform validation on the aggregator after training using OpenFL Workflow Interface. +- [Cyclic Institutional Incremental Learning](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/103_Cyclic_Institutional_Incremental_Learning.ipynb): Demonstrates cyclic institutional incremental learning using OpenFL Workflow Interface. +- [Keras MNIST with CPU](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/104_Keras_MNIST_with_CPU.ipynb): Trains a CNN on MNIST using Keras on CPU with OpenFL Workflow Interface. +- [Keras MNIST with GPU](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/104_Keras_MNIST_with_GPU.ipynb): Uses Keras to train a CNN on MNIST with GPU support via OpenFL Workflow Interface. +- [MNIST XPU](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/104_MNIST_XPU.ipynb): Trains a CNN on MNIST using Intel Data Center GPU Max Series with OpenFL Workflow Interface. +- [Numpy Linear Regression](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/105_Numpy_Linear_Regression_Workflow.ipynb): Implements linear regression with MSE loss using Numpy and OpenFL Workflow Interface. +- [Scikit Learn Linear Regression](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/106_Scikit_Learn_Linear_Regression_Workflow.ipynb): Trains a linear regression model with Ridge regularization using scikit-learn and OpenFL Workflow Interface. +- [Exclusive GPUs with Ray](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/201_Exclusive_GPUs_with_Ray.ipynb): Utilizes Ray Backend for exclusive GPU allocation in federated learning with OpenFL. +- [MNIST Watermarking](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb): Demonstrates embedding watermark in a DL model trained on MNIST in a federated learning setup. +- [FedProx with Synthetic non-IID](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/401_FedProx_with_Synthetic_nonIID.ipynb): Compares FedProx and FedAvg algorithms on a synthetic non-IID dataset using OpenFL Workflow Interface. +- [MNIST Aggregator Validation Ray Watermarking](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/402_MNIST_Aggregator_Validation_Ray_Watermarking.ipynb): Combines aggregator validation, watermarking, and multi-GPU training using Ray Backend in OpenFL. +- [Federated FedProx PyTorch MNIST Workflow Tutorial](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/403_Federated_FedProx_PyTorch_MNIST_Workflow_Tutorial.ipynb): Implements FedProx algorithm for distributed training on MNIST using PyTorch and Workflow API. +- [Keras MNIST with FedProx](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/404_Keras_MNIST_with_FedProx.ipynb): Trains a TensorFlow Keras model on MNIST using OpenFL Workflow Interface with FedProx optimization. +- [Workspace Creation from JupyterNotebook](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb): Demonstrates converting a Jupyter Notebook Federated Learning experiment into an OpenFL workspace. + +### Advanced + +Refer to the respective README files for detailed information on the executing these tutorials. + +- [CrowdGuard](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/CrowdGuard): Implementing CrowdGuard to mitigate backdoor attacks in Federated Learning. +- [Federated Runtime](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/FederatedRuntime): Demonstrates use of FederatedRuntime for taking workflow API from simulation to deployment. +- [Differential Privacy](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/Global_DP): Differential Privacy in Federated Learning. +- [LLM Fine-tuning](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/LLM): Fine-tuning a Large Language Model (LLM) using OpenFL Workflow Interface. +- [Privacy Meter](https://github.com/securefederatedai/openfl/tree/develop/openfl-tutorials/experimental/workflow/Privacy_Meter): Demonstrates integration and use of ML Privacy Meter library with OpenFL to audit privacy loss in federated learning models. \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/README.md b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/README.md deleted file mode 100644 index a77ea83073..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/README.md +++ /dev/null @@ -1,106 +0,0 @@ -# Federated FLAX CIFAR-10 CNN Tutorial - -### 1. About dataset - -The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. - -Define the below param in envoy.yaml config to shard the dataset across participants/envoy. -- rank_worldsize - -### 2. About model - -A simple multi-layer CNN is used with XLA compiled and Auto-grad based parameter updates. -Definition provided in the notebook. - -### 3. Notebook Overview - -1. Class `CustomTrainState` - Subclasses `flax.training.TrainState` - - Variable `opt_vars` to keep track of generic optimizer variables. - - Method `update_state` to update the OpenFL `ModelInterface` registered state with the new_state returned within the `TaskInterface` registered training loop. - -2. Method `create_train_state`: Creates a new `TrainState` by encapsulating model layer definitions, random model parameters, and optax optimizer state. - -3. Method `apply_model` (`@jax.jit` decorated function): It takes a TrainState, images, and labels as parameters. It computes and returns the gradients, loss, and accuracy. These gradients are applied to a given state in the `update_model` method (`@jax.jit` decorated function) and a new TrainState instance is returned. - -### 4. How to run this tutorial (without TLS and locally as a simulation): - -0. Pre-requisites: - - - Nvidia Driver >= 495.29.05 - - CUDA >= 11.1.105 - - cuDNN >= 8 - - Activate virtual environment (Python - 3.8.10) and install packages from requirements.txt - - Set the variable `DEFAULT_DEVICE to 'CPU' or 'GPU'` in `start_envoy.sh` and notebook to enforce/control the execution platform. - -```sh -cd Flax_CNN_CIFAR -pip install -r requirements.txt -``` - -1. Run director: - -```sh -cd director -./start_director.sh -``` - -2. Run envoy: - -```sh -cd envoy -./start_envoy.sh "envoy_identifier" envoy_config.yaml -``` - -Optional: start second envoy: - -- Copy `envoy` folder to another place and follow the same process as above: - -```sh -./start_envoy.sh "envoy_identifier_2" envoy_config_2.yaml -``` - -3. Run `FLAX_CIFAR10_CNN.ipynb` jupyter notebook: - -```sh -cd workspace -jupyter lab FLAX_CIFAR10_CNN.ipynb -``` - -4. Visualization: - -``` -tensorboard --logdir logs/ -``` - - -### 5. Known issues - -1. #### CUDA_ERROR_OUT_OF_MEMORY Exception - JAX XLA pre-allocates 90% of the GPU at start - -- set XLA_PYTHON_CLIENT_PREALLOCATE to start with a small memory footprint. -``` -%env XLA_PYTHON_CLIENT_PREALLOCATE=false -``` -OR - -- Below flag to restrict max GPU allocation to 50% -``` -%env XLA_PYTHON_CLIENT_MEM_FRACTION=.5 -``` - - -2. #### Tensorflow pre-allocates 90% of the GPU (Potential OOM Errors). - -- set TF_FORCE_GPU_ALLOW_GROWTH to start with a small memory footprint. -``` -%env TF_FORCE_GPU_ALLOW_GROWTH=true -``` - -3. #### DNN library Not found error - -- Make sure the jaxlib(cuda version), Nvidia Driver, CUDA and cuDNN versions are specific, relevant and compatible as per the documentation. -- Reference: - - CUDA and cuDNN Compatibility Matrix: https://docs.nvidia.com/deeplearning/cudnn/support-matrix/index.html - - Official JAX Compatible CUDA Releases: https://storage.googleapis.com/jax-releases/jax_cuda_releases.html diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/director/director_config.yaml deleted file mode 100644 index 9259c98386..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50055 - sample_shape: ['32', '32', '3'] # [[shape], channel] - target_shape: ['1'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/cifar10_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/cifar10_shard_descriptor.py deleted file mode 100644 index 0fc2b59061..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/cifar10_shard_descriptor.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""CIFAR10 Shard Descriptor (using `TFDS` API)""" -import jax.numpy as jnp -import logging -import tensorflow as tf -import tensorflow_datasets as tfds - -from typing import List, Tuple -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class CIFAR10ShardDescriptor(ShardDescriptor): - """ - CIFAR10 Shard Descriptor - - This example is based on `tfds` data loader. - Note that the ingestion of any model/task requires an iterable dataloader. - Hence, it is possible to utilize these pipelines without explicit need of a - new interface. - """ - - def __init__( - self, - rank_worldsize: str = '1, 1', - **kwargs - ) -> None: - """Download/Prepare CIFAR10 dataset""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - # Load dataset - train_ds, valid_ds = self._download_and_prepare_dataset(self.rank, self.worldsize) - - # Set attributes - self._sample_shape = train_ds['image'].shape[1:] - self._target_shape = tf.expand_dims(train_ds['label'], -1).shape[1:] - - self.splits = { - 'train': train_ds, - 'valid': valid_ds - } - - def _download_and_prepare_dataset(self, rank: int, worldsize: int) -> Tuple[dict]: - """ - Download, Cache CIFAR10 and prepare `tfds` builder. - - Provide `rank` and `worldsize` to virtually split dataset across shards - uniquely for each client for simulation purposes. - - Returns: - Tuple (train_dict, test_dict) of dictionary with JAX DeviceArray (image and label) - dict['image'] -> DeviceArray float32 - dict['label'] -> DeviceArray int32 - {'image' : DeviceArray(...), 'label' : DeviceArray(...)} - - """ - - dataset_builder = tfds.builder('cifar10') - dataset_builder.download_and_prepare() - - datasets = dataset_builder.as_dataset() - - train_shard_size = int(len(datasets['train']) / worldsize) - test_shard_size = int(len(datasets['test']) / worldsize) - - self.train_segment = f'train[{train_shard_size * (rank - 1)}:{train_shard_size * rank}]' - self.test_segment = f'test[{test_shard_size * (rank - 1)}:{test_shard_size * rank}]' - train_dataset = dataset_builder.as_dataset(split=self.train_segment, batch_size=-1) - test_dataset = dataset_builder.as_dataset(split=self.test_segment, batch_size=-1) - train_ds = tfds.as_numpy(train_dataset) - test_ds = tfds.as_numpy(test_dataset) - - train_ds['image'] = jnp.float32(train_ds['image']) / 255. - test_ds['image'] = jnp.float32(test_ds['image']) / 255. - train_ds['label'] = jnp.int32(train_ds['label']) - test_ds['label'] = jnp.int32(test_ds['label']) - - return train_ds, test_ds - - def get_shard_dataset_types(self) -> List[str]: - """Get available split names""" - return list(self.splits) - - def get_split(self, name: str) -> tf.data.Dataset: - """Return a shard dataset by type.""" - if name not in self.splits: - raise Exception(f'Split name `{name}` not found.' - f' Expected one of {list(self.splits.keys())}') - return self.splits[name] - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return list(map(str, self._sample_shape)) - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return list(map(str, self._target_shape)) - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - n_train = len(self.splits['train']['label']) - n_test = len(self.splits['valid']['label']) - return (f'CIFAR10 dataset, Shard Segments {self.train_segment}/{self.test_segment}, ' - f'rank/world {self.rank}/{self.worldsize}.' - f'\n num_samples [Train/Valid]: [{n_train}/{n_test}]') diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/envoy_config_1.yaml b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/envoy_config_1.yaml deleted file mode 100644 index 6856d89aaf..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/envoy_config_1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: cifar10_shard_descriptor.CIFAR10ShardDescriptor - params: - rank_worldsize: 1, 2 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/start_envoy.sh deleted file mode 100755 index 2e18424c42..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/envoy/start_envoy.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -DEFAULT_DEVICE='CPU' - -if [[ $DEFAULT_DEVICE == 'CPU' ]] -then - export JAX_PLATFORMS="cpu" # Force XLA to use CPU - export CUDA_VISIBLE_DEVICES='-1' # Force TF to use CPU -else - export XLA_PYTHON_CLIENT_PREALLOCATE=false - export TF_FORCE_GPU_ALLOW_GROWTH=true -fi - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50055 diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/requirements.txt b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/requirements.txt deleted file mode 100644 index c81a69fea8..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html -jax -jaxlib -tensorflow==2.13 -tensorflow-datasets==4.6.0 diff --git a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/workspace/FLAX_CIFAR10_CNN.ipynb b/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/workspace/FLAX_CIFAR10_CNN.ipynb deleted file mode 100644 index e59c8f2630..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Flax_CNN_CIFAR/workspace/FLAX_CIFAR10_CNN.ipynb +++ /dev/null @@ -1,599 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated FLAX/JAX CIFAR10 Tutorial\n", - "Using `TFDS` API as a data loader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1329f2e6", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install ml_collections flax -q" - ] - }, - { - "cell_type": "markdown", - "id": "e0d30942", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "markdown", - "id": "bba06315-142b-4339-92a7-f2fa38fb79f1", - "metadata": {}, - "source": [ - "`TF_FORCE_GPU_ALLOW_GROWTH=true` - Starts out allocating very little memory, and as the program gets run and more GPU memory is needed, the GPU memory region is extended for the TensorFlow process.\n", - "\n", - "`XLA_PYTHON_CLIENT_PREALLOCATE=false` - This disables the preallocation behavior. JAX will instead allocate GPU memory as needed, potentially decreasing the overall memory usage. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58ce140e", - "metadata": {}, - "outputs": [], - "source": [ - "DEFAULT_DEVICE='cpu'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56eae004-fce0-442e-8c0d-659c7db77f4e", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if DEFAULT_DEVICE == 'cpu':\n", - " os.environ['JAX_PLATFORMS']='cpu' # Force XLA to use CPU\n", - " os.environ['CUDA_VISIBLE_DEVICES']='-1' # Force TF to use CPU\n", - "elif DEFAULT_DEVICE == 'GPU':\n", - " os.environ['XLA_PYTHON_CLIENT_PREALLOCATE']='false'\n", - " os.environ['TF_FORCE_GPU_ALLOW_GROWTH']='true'\n", - " os.environ['TF_ENABLE_ONEDNN_OPTS']='0' # Disable oneDNN custom operations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0833dfc9", - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "print('TensorFlow', tf.__version__)\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "60763633-919a-41c1-b755-50f3bb3baf3d", - "metadata": {}, - "outputs": [], - "source": [ - "from flax import linen as nn\n", - "from flax.metrics import tensorboard\n", - "from flax.training import train_state\n", - "import jax\n", - "import jax.numpy as jnp\n", - "import logging\n", - "import ml_collections\n", - "import optax\n", - "import tensorflow_datasets as tfds\n", - "from tensorflow.keras.utils import Progbar\n", - "from dataclasses import field\n" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation\n", - "\n", - "Start `Director` and `Envoy` before proceeding with this cell. \n", - "\n", - "This cell connects this notebook to the Federation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50055\n", - "\n", - "# Create a Federation\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6efe22a8", - "metadata": {}, - "source": [ - "## Query Datasets from Shard Registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4670d19e-b0f5-472d-9794-ddb3cefbb3d7", - "metadata": {}, - "outputs": [], - "source": [ - "def get_config():\n", - " \"\"\"Get the default hyperparameter configuration.\"\"\"\n", - " config = ml_collections.ConfigDict()\n", - " config.learning_rate = 0.01\n", - " config.momentum = 0.9\n", - " config.batch_size = 128\n", - " config.num_epochs = 10\n", - " config.rounds_to_train = 3\n", - " return config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd3faf78-2f8a-4fc9-aca0-450d700e625c", - "metadata": {}, - "outputs": [], - "source": [ - "config = get_config()" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": { - "tags": [] - }, - "source": [ - "## Describing FL experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface\n", - "from openfl.interface.interactive_api.experiment import ModelInterface\n", - "from openfl.interface.interactive_api.experiment import FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "3b468ae1", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06545bbb", - "metadata": {}, - "outputs": [], - "source": [ - "# Define model\n", - "class CNN(nn.Module):\n", - " \"\"\"A simple CNN model.\"\"\"\n", - " \n", - " @nn.compact\n", - " def __call__(self, x):\n", - " x = nn.Conv(features=32, kernel_size=(3, 3))(x)\n", - " x = nn.relu(x)\n", - " x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))\n", - " x = nn.Conv(features=64, kernel_size=(3, 3))(x)\n", - " x = nn.relu(x)\n", - " x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))\n", - " x = nn.Conv(features=128, kernel_size=(3, 3))(x)\n", - " x = nn.relu(x)\n", - " x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))\n", - " x = x.reshape((x.shape[0], -1)) # flatten\n", - " x = nn.Dense(features=256)(x)\n", - " x = nn.relu(x)\n", - " x = nn.Dense(features=10)(x)\n", - " return x\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b3aef5d-2828-489c-b645-dacf8e1ee440", - "metadata": {}, - "outputs": [], - "source": [ - "class CustomTrainState(train_state.TrainState):\n", - " \"\"\" Subclass `train_state.Trainstate` and `update_state` method\n", - " to allow update of `model parameters` and `optimizer state` \n", - " during `training` loop execution\n", - " \"\"\"\n", - " opt_vars : list = field(default_factory=list)\n", - " \n", - " def update_state(self, new_state: train_state.TrainState) -> None:\n", - " ''' \n", - " Update the model states, used during evaluation/inference.\n", - " \n", - " Parameters\n", - " ----------\n", - " new_state : train_state.TrainState\n", - " Updated state with applied gradients.\n", - " update the `state` variable used to initialize ModelInterface\n", - " with the `new_state` parameters\n", - "\n", - " Returns\n", - " -------\n", - " None\n", - " '''\n", - " # Update Params\n", - " self.params.update(new_state.params)\n", - " \n", - " # Update Optimizer States\n", - " for var in self.opt_vars:\n", - " opt_var_dict = getattr(self.opt_state[0], var)\n", - " new_opt_var_dict = getattr(new_state.opt_state[0], var)\n", - " opt_var_dict.update(new_opt_var_dict)\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2786ae49-6293-4596-bbc2-0b791905d900", - "metadata": {}, - "outputs": [], - "source": [ - "def _get_opt_vars(x):\n", - " return False if x.startswith('_') or x in ['index', 'count'] else True\n", - "\n", - "def create_train_state(rng, config):\n", - " \"\"\"Creates initial `TrainState`.\"\"\"\n", - " cnn = CNN()\n", - " params = cnn.init(rng, jnp.ones([1, 32, 32, 3]))['params'].unfreeze() # Random Parameters\n", - " tx = optax.sgd(config.learning_rate, config.momentum) # Optimizer\n", - " optvars = list(filter(_get_opt_vars, dir(tx.init(params)[0])))\n", - " initial_model_state = CustomTrainState.create(apply_fn=cnn.apply, params=params, tx=tx, opt_vars=optvars)\n", - " return initial_model_state" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0abc9a1a-a606-4cef-ba51-c4618f924bec", - "metadata": {}, - "outputs": [], - "source": [ - "# PRNG - Pseudo Random Number Generator Seed\n", - "rng = jax.random.PRNGKey(0)\n", - "rng, init_rng = jax.random.split(rng)\n", - "\n", - "# Initialize parameters and optimizers \n", - "# Encapsulate within TrainState class and apply gradients in an easy way\n", - "state = create_train_state(init_rng, config)\n", - "\n", - "# Create ModelInterface - Register the state\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.flax_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=state, optimizer=None, framework_plugin=framework_adapter)" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": { - "tags": [] - }, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8c9eb50", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import DataInterface\n", - "\n", - "class CIFAR10FedDataset(DataInterface):\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " # shard_descriptor.get_split(...) returns a tf.data.Dataset\n", - " # Check cifar10_shard_descriptor.py for details\n", - " self.train_set = shard_descriptor.get_split('train')\n", - " self.valid_set = shard_descriptor.get_split('valid')\n", - "\n", - " def get_train_loader(self):\n", - " \"\"\"Output of this method will be provided to tasks with optimizer in contract\"\"\"\n", - " return self.train_set\n", - " # bs = self.kwargs.get('train_bs', 32)\n", - " # return self.train_set.batch(bs)\n", - "\n", - " def get_valid_loader(self):\n", - " \"\"\"Output of this method will be provided to tasks without optimizer in contract\"\"\"\n", - " return self.valid_set\n", - " # bs = self.kwargs.get('valid_bs', 32)\n", - " # return self.valid_set.batch(bs)\n", - " \n", - " def get_train_data_size(self) -> int:\n", - " \"\"\"Information for aggregation\"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self) -> int:\n", - " \"\"\"Information for aggregation\"\"\"\n", - " return len(self.valid_set)" - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create CIFAR10 federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = CIFAR10FedDataset()" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56e62caa-187e-4525-a167-a1b9d7f1435e", - "metadata": {}, - "outputs": [], - "source": [ - "@jax.jit\n", - "def apply_model(state, images, labels):\n", - " \"\"\"Computes gradients, loss and accuracy for a single batch.\"\"\"\n", - "\n", - " def loss_fn(params):\n", - " logits = state.apply_fn({'params': params}, images)\n", - " one_hot = jax.nn.one_hot(labels, 10) # 10 - Total number of classes for a given dataset\n", - " loss = jnp.mean(optax.softmax_cross_entropy(logits=logits, labels=one_hot))\n", - " return loss, logits\n", - "\n", - " grad_fn = jax.value_and_grad(loss_fn, has_aux=True)\n", - " (loss, logits), grads = grad_fn(state.params)\n", - " accuracy = jnp.mean(jnp.argmax(logits, -1) == labels)\n", - " return grads, loss, accuracy\n", - "\n", - "\n", - "@jax.jit\n", - "def update_model(state, grads):\n", - " \"\"\"Return an immutable and updated state with applied gradients\"\"\"\n", - " return state.apply_gradients(grads=grads)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "44a2dac6-53d7-4711-9368-e4310e9c1f48", - "metadata": {}, - "outputs": [], - "source": [ - "def train_epoch(state, train_ds, batch_size, rng):\n", - " \"\"\"Train for a single epoch.\"\"\"\n", - " train_ds_size = len(train_ds['image'])\n", - " steps_per_epoch = train_ds_size // batch_size\n", - " pbar = Progbar(steps_per_epoch)\n", - " \n", - " # Randomize the batch selection.\n", - " # Permute the dataset index selection\n", - " perms = jax.random.permutation(rng, train_ds_size)\n", - " perms = perms[:steps_per_epoch * batch_size] # skip incomplete batch\n", - " perms = perms.reshape((steps_per_epoch, batch_size))\n", - "\n", - " epoch_loss = []\n", - " epoch_accuracy = []\n", - " step = 1\n", - " for perm in perms:\n", - " batch_images = train_ds['image'][perm, ...] # Same as [perm, :, :, :]\n", - " batch_labels = train_ds['label'][perm, ...]\n", - " # apply_model -> Forward pass through the layers with the given model `state` as a parameter\n", - " grads, loss, accuracy = apply_model(state, batch_images, batch_labels)\n", - " # Apply gradients and get the updated `state`\n", - " # jitted methods are statelessssssss!\n", - " state = update_model(state, grads)\n", - " epoch_loss.append(loss)\n", - " epoch_accuracy.append(accuracy)\n", - " pbar.update(step, values={'epoch loss': loss, 'epoch accuracy': accuracy}.items())\n", - " step = step + 1\n", - " \n", - " train_loss = jnp.array(epoch_loss).mean().item()\n", - " train_accuracy = jnp.array(epoch_accuracy).mean().item()\n", - " return state, train_loss, train_accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "@TI.register_fl_task(model='state', data_loader='dataset', optimizer='optimizer', device='device') \n", - "def train(state, dataset, optimizer, device, loss_fn=None, warmup=False):\n", - " new_state, train_loss, train_accuracy = train_epoch(state, dataset, config.batch_size, init_rng)\n", - " state.update_state(new_state) # Update `model` parameters registered in ModelInterface with the `new_state` parameters.\n", - " return {'train_acc': train_accuracy,}\n", - "\n", - "@TI.register_fl_task(model='state', data_loader='dataset', device='device')\n", - "def validate(state, dataset, device):\n", - " _, val_loss, val_accuracy = apply_model(state, dataset['image'], dataset['label'])\n", - " # print(\"Validation accuracy: %.4f\" % (float(val_accuracy),))\n", - " return {'validation_accuracy': val_accuracy,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Start federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'cifar10_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "\n", - "fl_experiment.start(model_provider=MI,\n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=config.rounds_to_train,\n", - " opt_treatment='CONTINUE_GLOBAL',\n", - " device_assignment_policy='CUDA_PREFERRED')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "74eeb27d-d637-4041-8281-b82ed6bb22f0", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8573e808-13cb-4da9-bed0-f36ed7724378", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "vscode": { - "interpreter": { - "hash": "b06d1b655d4d5fbe90b113436047673f65880809b3fc5b52db1ddd0c49488bf8" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/README.md b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/README.md deleted file mode 100644 index 79cf6da947..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/README.md +++ /dev/null @@ -1,269 +0,0 @@ -# PyTorch_Kvasir_UNet - -## **Habana Tutorials** -#### The name of the file/example that contain HPU adaptations start with "HPU". -For example: PyTorch_Kvasir_UNet.ipynb placed under workspace folder contains the required HPU adaptations. - - All the execution steps mention in last section (**V. How to run this tutorial**) remain same for HPU examples but as pre-requisite it needs some additional environment setup and Habana supported package installations which is explained below from **section I to V**. - - **Note:** By default these experiments utilize 1 HPU device - -
- - ## **I. Intel Developer Cloud Setup** -This example was test on the Intel Developer Cloud utilizing Gaudi2 instance. - -For accessing the Gaudi2 instances on the Intel Developer Cloud follow the instructions [here](https://developer.habana.ai/intel-developer-cloud/) - -The Gaudi instance in the Intel Developer Cloud comes SynapseAI SW Stack for Gaudi2 installed. Skip sections (**II. , III.***) - -Further more our testing was done using the habana based Docker container built using the Dockerfile base discussed below: - -Let's create a Dockerfile with the following content and name it Dockerfile_Habana: - -``` - -FROM vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest - -ENV HABANA_VISIBLE_DEVICES=all -ENV OMPI_MCA_btl_vader_single_copy_mechanism=none - -ENV DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC -RUN apt-get update && apt-get install -y tzdata bash-completion \ - #RUN apt update && apt-get install -y tzdata bash-completion \ - python3-pip openssh-server vim git iputils-ping net-tools curl bc gawk \ - && rm -rf /var/lib/apt/lists/* - - -RUN pip install numpy \ - && pip install jupyterlab \ - && pip install matplotlib \ - && pip install openfl - - -RUN git clone https://github.com/securefederatedai/openfl.git /root/openfl - -WORKDIR /root - -``` - -This base container comes with HPU Pytorch packages already installed. Hence you could skip step: **IV.** below. - -Build the above container and then launch it using: - -``` -export GAUDI_DOCKER_IMAGE="gaudi-docker-ubuntu20.04-openfl" - -docker build -t ${GAUDI_DOCKER_IMAGE} -f Dockerfile_Habana . -docker run --net host -id --name openfl_gaudi_run ${GAUDI_DOCKER_IMAGE} bash -``` - -Then access the container bash shell using: - -``` -docker exec -it openfl_gaudi_run bash - -``` - -Once inside the container, ensure openfl repo is cloned! - -otherwise clone the openfl repo using: - -``` -git clone https://github.com/securefederatedai/openfl.git -``` - -Then check if the openfl package is installed - -``` -pip list | grep openfl - -``` - -if not, then install it using: - -``` -pip install openfl -``` - -Then follow instruction in section **V. HPU Adaptations For PyTorch Examples** below. - - -
- - ## **II. AWS DL1 Instance Setup** - - This example was tested on AWS EC2 instance created by following the instructions mentioned [here](https://docs.habana.ai/en/latest/AWS_EC2_DL1_and_PyTorch_Quick_Start/AWS_EC2_DL1_and_PyTorch_Quick_Start.html) . - - Test setup - Habana 1.7 and Ubuntu 20.04 - -
- - ## **III. Set Up SynapseAI SW Stack** - - - To perform an installation of the full driver and SynapseAI software stack independently on the EC2 instance, run the following command: - - ``` - wget -nv https://vault.habana.ai/artifactory/gaudi-installer/latest/habanalabs-installer.sh -chmod +x habanalabs-installer.sh -./habanalabs-installer.sh install --type base -``` - **NOTE:** Habanalabs requires python 3.8 version. It is hardcoded in [habanalabs-installer.sh](https://vault.habana.ai/ui/native/gaudi-installer/latest/habanalabs-installer.sh) - -You can refer the [Habana docs](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-synapseai-sw-stack) mentioned [GitHub repository](https://github.com/HabanaAI/Setup_and_Install) for detailed instructions. - -
- - ## **IV. HPU Pytorch Installation** - - For this example make sure to install the PyTorch package provided by Habana. These packages are optimized for Habana Gaudi HPU. Installing public PyTorch packages is not supported. - Habana PyTorch packages consist of: - - **torch** - PyTorch framework package with Habana support - -- **habana-torch-plugin** - Libraries and modules needed to execute PyTorch on single card, single node and multi node setup. - -- **habana-torch-dataloader** - Habana multi-threaded dataloader package. - -- **torchvision** - Torchvision package compiled in torch environment. No Habana specific changes in this package. - - Run the following command to install the above Habana PyTorch environment - - ``` - ./habanalabs-installer.sh install --type pytorch --venv - ``` - - The `-- venv` flag installs the relevant framework inside the virtual environment. To activate a virtual environment please perform the following: - - - ``` - cd $HOME/habanalabs-venv - source ./bin/activate - ``` - -The default virtual environment folder is `$HOME/habanalabs-venv`. To override the default, run the following command: - - ``` - export HABANALABS_VIRTUAL_DIR=/path/to/dir - ``` - - You can refer the [Habana docs](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-synapseai-sw-stack) mentioned [GitHub repository](https://github.com/HabanaAI/Setup_and_Install) for detailed instructions. - -
- - ## **V. HPU Adaptations For PyTorch Examples** - -The following set of code additions are required in the workspace notebook to run a model on Habana. The following steps cover Eager and Lazy modes of execution. - -### 1. Target the Gaudi HPU device: - -``` -device = torch.device("hpu") -``` -### 2. Move the model to the device: - -**There is a dependency in the order of execution (moving model to HPU and intializing optimizer). The workaround is to execute this step before initializing any optimizers.** - -``` -model.to(device) -``` -### 3. Import the Habana Torch Library: - -``` -import habana_frameworks.torch.core as htcore -``` -### 4. Enable Lazy execution mode by setting the environment variable shown below. -Do not set this environment variable if you want to execute your code in Eager mode: - -``` -os.environ["PT_HPU_LAZY_MODE"] = "1" -``` -### 5. In Lazy mode, execution is triggered wherever data is read back to the host from the Habana device. -For example, execution is triggered if you are running a topology and getting loss value into the host from the device with `loss.item()`. Adding a `mark_step()` in the code is another way to trigger execution: - -``` -htcore.mark_step() -``` - -The placement of `mark_step()` is required at the following points in a training script: - -* Right after `optimizer.step()` to cleanly demarcate training iterations, -* Between `loss.backward` and `optimizer.step()` if the optimizer being used is a Habana custom optimizer. - -Refer [getting started with PyTorch](https://www.intel.com/content/www/us/en/developer/articles/technical/get-started-habana-gaudi-deep-learning-training.html#articleparagraph_cop_711677074) for detailed explaination and PyTorch Habana architecture. Sample example can be found [here](https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html) - -
- -## **VI. How to run this tutorial (without TLC and locally as a simulation):** -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/HPU/PyTorch_Kvasir_UNet - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r sd_requirements.txt -``` - - If you have GPUs: -```sh -./start_envoy.sh env_one envoy_config.yaml -``` - - For no GPUs, use: -```sh -./start_envoy.sh env_one envoy_config_no_gpu.yaml -``` - - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Repeat step 4 instructions above but change "env_one" name to "env_two" (or another name of your choice). - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab --allow-root PyTorch_Kvasir_UNet.ipynb -``` - -When running on remote host inside a docker container as the case of Gaudi2, one need to port forward jupyter lab to your local host. On your local terminal port formal - -```sh -ssh -NL 8888:127.0.0.1:8888 gaudi2_host -``` -- A Jupyter Server URL will appear in your terminal. In your local browser, proceed to that link. Once the webpage loads, click on the PyTorch_Kvasir_UNet.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/director_config.yaml deleted file mode 100644 index 860e0436f5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['300', '400', '3'] - target_shape: ['300', '400'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/start_director.sh deleted file mode 100644 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/start_director_with_tls.sh deleted file mode 100644 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/envoy_config_no_gpu.yaml b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/envoy_config_no_gpu.yaml deleted file mode 100644 index 10086275eb..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/envoy_config_no_gpu.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - -optional_plugin_components: {} - -shard_descriptor: - template: kvasir_shard_descriptor.KvasirShardDescriptor - params: - data_folder: kvasir_data - rank_worldsize: 1,10 - enforce_image_hw: '300,400' - \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor.py deleted file mode 100644 index b50c2038a2..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Kvasir shard descriptor.""" - -import os -from pathlib import Path - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import validate_file_hash - - -class KvasirShardDataset(ShardDataset): - """Kvasir Shard dataset class.""" - - def __init__(self, dataset_dir: Path, rank=1, worldsize=1, enforce_image_hw=None): - """Initialize KvasirShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.enforce_image_hw = enforce_image_hw - self.images_path = self.dataset_dir / 'segmented-images' / 'images' - self.masks_path = self.dataset_dir / 'segmented-images' / 'masks' - - self.images_names = [ - img_name - for img_name in sorted(os.listdir(self.images_path)) - if len(img_name) > 3 and img_name[-3:] == 'jpg' - ] - # Sharding - self.images_names = self.images_names[self.rank - 1::self.worldsize] - - def __getitem__(self, index): - """Return a item by the index.""" - name = self.images_names[index] - # Reading data - img = Image.open(self.images_path / name) - mask = Image.open(self.masks_path / name) - if self.enforce_image_hw is not None: - # If we need to resize data - # PIL accepts (w,h) tuple, not (h,w) - img = img.resize(self.enforce_image_hw[::-1]) - mask = mask.resize(self.enforce_image_hw[::-1]) - img = np.asarray(img) - mask = np.asarray(mask) - assert img.shape[2] == 3 - - return img, mask[:, :, 0].astype(np.uint8) - - def __len__(self): - """Return the len of the dataset.""" - return len(self.images_names) - - -class KvasirShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, data_folder: str = 'kvasir_data', - rank_worldsize: str = '1,1', - enforce_image_hw: str = None) -> None: - """Initialize KvasirShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_data(self.data_folder) - - # Settings for resizing data - self.enforce_image_hw = None - if enforce_image_hw is not None: - self.enforce_image_hw = tuple(int(size) for size in enforce_image_hw.split(',')) - - # Calculating data and target shapes - ds = self.get_dataset() - sample, target = ds[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = [str(dim) for dim in target.shape] - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - return KvasirShardDataset( - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize, - enforce_image_hw=self.enforce_image_hw - ) - - @staticmethod - def download_data(data_folder): - """Download data.""" - zip_file_path = data_folder / 'kvasir.zip' - os.makedirs(data_folder, exist_ok=True) - os.system( - 'wget -nc' - " 'https://datasets.simula.no/downloads/" - "hyper-kvasir/hyper-kvasir-segmented-images.zip'" - f' -O {zip_file_path.relative_to(Path.cwd())}' - ) - zip_sha384 = ('66cd659d0e8afd8c83408174' - '1ade2b75dada8d4648b816f2533c8748b1658efa3d49e205415d4116faade2c5810e241e') - validate_file_hash(zip_file_path, zip_sha384) - os.system(f'unzip -n {zip_file_path.relative_to(Path.cwd())}' - f' -d {data_folder.relative_to(Path.cwd())}') - - @property - def sample_shape(self): - """Return the sample shape info.""" - return self._sample_shape - - @property - def target_shape(self): - """Return the target shape info.""" - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Kvasir dataset, shard number {self.rank} ' - f'out of {self.worldsize}') - - -if __name__ == '__main__': - from openfl.interface.cli import setup_logging - - setup_logging() - - data_folder = 'data' - rank_worldsize = '1,100' - enforce_image_hw = '529,622' - - kvasir_sd = KvasirShardDescriptor( - data_folder, - rank_worldsize=rank_worldsize, - enforce_image_hw=enforce_image_hw) - - print(kvasir_sd.dataset_description) - print(kvasir_sd.sample_shape, kvasir_sd.target_shape) - - from openfl.component.envoy.envoy import Envoy - - shard_name = 'one' - director_host = 'localhost' - director_port = 50051 - - keeper = Envoy( - shard_name=shard_name, - director_host=director_host, - director_port=director_port, - shard_descriptor=kvasir_sd, - tls=True, - root_certificate='./cert/root_ca.crt', - private_key='./cert/one.key', - certificate='./cert/one.crt', - ) - - keeper.start() diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py deleted file mode 100644 index 4a35d1179c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Kvasir shard descriptor.""" - - -import os -from pathlib import Path - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import validate_file_hash -from openfl.utilities.data_splitters import RandomNumPyDataSplitter - - -class KvasirShardDataset(ShardDataset): - """Kvasir Shard dataset class.""" - - def __init__(self, dataset_dir: Path, rank=1, worldsize=1, enforce_image_hw=None): - """Initialize KvasirShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.enforce_image_hw = enforce_image_hw - - self.images_path = self.dataset_dir / 'segmented-images' / 'images' - self.masks_path = self.dataset_dir / 'segmented-images' / 'masks' - - self.images_names = [ - img_name - for img_name in sorted(os.listdir(self.images_path)) - if len(img_name) > 3 and img_name[-3:] == 'jpg' - ] - # Sharding - data_splitter = RandomNumPyDataSplitter() - shard_idx = data_splitter.split(self.images_names, self.worldsize)[self.rank] - self.images_names = [self.images_names[i] for i in shard_idx] - - def __getitem__(self, index): - """Return a item by the index.""" - name = self.images_names[index] - # Reading data - img = Image.open(self.images_path / name) - mask = Image.open(self.masks_path / name) - if self.enforce_image_hw is not None: - # If we need to resize data - # PIL accepts (w,h) tuple, not (h,w) - img = img.resize(self.enforce_image_hw[::-1]) - mask = mask.resize(self.enforce_image_hw[::-1]) - img = np.asarray(img) - mask = np.asarray(mask) - assert img.shape[2] == 3 - - return img, mask[:, :, 0].astype(np.uint8) - - def __len__(self): - """Return the len of the dataset.""" - return len(self.images_names) - - -class KvasirShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, data_folder: str = 'kvasir_data', - rank_worldsize: str = '1,1', - enforce_image_hw: str = None) -> None: - """Initialize KvasirShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_data(self.data_folder) - - # Settings for resizing data - self.enforce_image_hw = None - if enforce_image_hw is not None: - self.enforce_image_hw = tuple(int(size) for size in enforce_image_hw.split(',')) - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - return KvasirShardDataset( - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize, - enforce_image_hw=self.enforce_image_hw - ) - - @staticmethod - def download_data(data_folder): - """Download data.""" - zip_file_path = data_folder / 'kvasir.zip' - os.makedirs(data_folder, exist_ok=True) - os.system('wget -nc' - " 'https://datasets.simula.no/downloads/hyper-kvasir/" - "hyper-kvasir-segmented-images.zip'" - f' -O {zip_file_path.relative_to(Path.cwd())}') - zip_sha384 = ('66cd659d0e8afd8c83408174' - '1ade2b75dada8d4648b816f2533c8748b1658efa3d49e205415d4116faade2c5810e241e') - validate_file_hash(zip_file_path, zip_sha384) - os.system(f'unzip -n {zip_file_path.relative_to(Path.cwd())}' - f' -d {data_folder.relative_to(Path.cwd())}') - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['300', '400', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['300', '400'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return f'Kvasir dataset, shard number {self.rank} out of {self.worldsize}' diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/sd_requirements.txt deleted file mode 100644 index 0d33a9eee5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/sd_requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy -pillow diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/start_envoy.sh deleted file mode 100644 index 913486f560..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config_no_gpu.yaml -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/start_envoy_with_tls.sh deleted file mode 100644 index 97e3f4d893..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh "$DIRECTOR_FQDN" -dp 50050 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/workspace/PyTorch_Kvasir_UNet.ipynb b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/workspace/PyTorch_Kvasir_UNet.ipynb deleted file mode 100644 index 99a0f0afc3..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/workspace/PyTorch_Kvasir_UNet.ipynb +++ /dev/null @@ -1,635 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "liquid-jacket", - "metadata": {}, - "source": [ - "# Federated Kvasir with Director example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alike-sharing", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "#!pip install torchvision==0.8.1\n", - "import habana_frameworks.torch.core as htcore" - ] - }, - { - "cell_type": "markdown", - "id": "16986f22", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4485ac79", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50050\n", - "\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = 'cert/root_ca.crt'\n", - "# API_certificate = 'cert/frontend.crt'\n", - "# API_private_key = 'cert/frontend.key'\n", - "\n", - "# federation = Federation(\n", - "# client_id=client_id,\n", - "# director_node_fqdn=director_node_fqdn,\n", - "# director_port=director_port,\n", - "# tls=True,\n", - "# cert_chain=cert_chain,\n", - "# api_cert=api_certificate,\n", - "# api_private_key=api_private_key\n", - "# )\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e35802d5", - "metadata": {}, - "outputs": [], - "source": [ - "# import time\n", - "# while True:\n", - "# shard_registry = federation.get_shard_registry()\n", - "# print(shard_registry)\n", - "# time.sleep(5)\n", - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67ae50de", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "920216d3", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rubber-address", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "markdown", - "id": "unlike-texas", - "metadata": {}, - "source": [ - "We extract User dataset class implementation.\n", - "Is it convinient?\n", - "What if the dataset is not a class?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f37dcf", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import PIL\n", - "import numpy as np\n", - "from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler\n", - "from torchvision import transforms as tsf\n", - "\n", - "\n", - "class KvasirShardDataset(Dataset):\n", - " \n", - " def __init__(self, dataset):\n", - " self._dataset = dataset\n", - " \n", - " # Prepare transforms\n", - " self.img_trans = tsf.Compose([\n", - " tsf.ToPILImage(),\n", - " tsf.Resize((332, 332)),\n", - " tsf.ToTensor(),\n", - " tsf.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])\n", - " self.mask_trans = tsf.Compose([\n", - " tsf.ToPILImage(),\n", - " tsf.Resize((332, 332), interpolation=PIL.Image.NEAREST),\n", - " tsf.ToTensor()])\n", - " \n", - " def __getitem__(self, index):\n", - " img, mask = self._dataset[index]\n", - " img = self.img_trans(img).numpy()\n", - " mask = self.mask_trans(mask).numpy()\n", - " return img, mask\n", - " \n", - " def __len__(self):\n", - " return len(self._dataset)\n", - "\n", - " \n", - "\n", - "# Now you can implement you data loaders using dummy_shard_desc\n", - "class KvasirSD(DataInterface):\n", - "\n", - " def __init__(self, validation_fraction=1/8, **kwargs):\n", - " super().__init__(**kwargs)\n", - " \n", - " self.validation_fraction = validation_fraction\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self._shard_dataset = KvasirShardDataset(shard_descriptor.get_dataset('train'))\n", - " \n", - " validation_size = max(1, int(len(self._shard_dataset) * self.validation_fraction))\n", - " \n", - " self.train_indeces = np.arange(len(self._shard_dataset) - validation_size)\n", - " self.val_indeces = np.arange(len(self._shard_dataset) - validation_size, len(self._shard_dataset))\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " train_sampler = SubsetRandomSampler(self.train_indeces)\n", - " return DataLoader(\n", - " self._shard_dataset,\n", - " num_workers=8,\n", - " batch_size=self.kwargs['train_bs'],\n", - " sampler=train_sampler\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " val_sampler = SubsetRandomSampler(self.val_indeces)\n", - " return DataLoader(\n", - " self._shard_dataset,\n", - " num_workers=8,\n", - " batch_size=self.kwargs['valid_bs'],\n", - " sampler=val_sampler\n", - " )\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_indeces)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.val_indeces)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8df35f5", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = KvasirSD(train_bs=4, valid_bs=8)\n", - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " print(sample.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "caring-distinction", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "visible-victor", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "foreign-gospel", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "UNet model definition\n", - "\"\"\"\n", - "from layers import soft_dice_coef, soft_dice_loss, DoubleConv, Down, Up\n", - "\n", - "\n", - "class UNet(nn.Module):\n", - " def __init__(self, n_channels=3, n_classes=1):\n", - " super().__init__()\n", - " self.inc = DoubleConv(n_channels, 64)\n", - " self.down1 = Down(64, 128)\n", - " self.down2 = Down(128, 256)\n", - " self.down3 = Down(256, 512)\n", - " self.up1 = Up(512, 256)\n", - " self.up2 = Up(256, 128)\n", - " self.up3 = Up(128, 64)\n", - " self.outc = nn.Conv2d(64, n_classes, 1)\n", - "\n", - " def forward(self, x):\n", - " x1 = self.inc(x)\n", - " x2 = self.down1(x1)\n", - " x3 = self.down2(x2)\n", - " x4 = self.down3(x3)\n", - " x = self.up1(x4, x3)\n", - " x = self.up2(x, x2)\n", - " x = self.up3(x, x1)\n", - " x = self.outc(x)\n", - " x = torch.sigmoid(x)\n", - " return x\n", - " \n", - "model_unet = UNet()\n", - "\n", - "# NOTE: This is a workaround to move model to HPU before initializing any optimizers \n", - "model_unet.to('hpu')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "greater-activation", - "metadata": {}, - "outputs": [], - "source": [ - "optimizer_adam = optim.Adam(model_unet.parameters(), lr=1e-4)" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handled-teens", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model_unet, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_unet)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "increasing-builder", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "import torch\n", - "\n", - "import tqdm\n", - "from openfl.interface.aggregation_functions import Median\n", - "\n", - "# The Interactive API supports registering functions definied in main module or imported.\n", - "def function_defined_in_notebook(some_parameter):\n", - " print(f'Also I accept a parameter and it is {some_parameter}')\n", - "\n", - "#The Interactive API supports overriding of the aggregation function\n", - "aggregation_function = Median()\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**{'some_parameter': 42, 'device':'hpu'})\n", - "@TI.register_fl_task(model='unet_model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "@TI.set_aggregation_function(aggregation_function)\n", - "def train(unet_model, train_loader, optimizer, device, loss_fn=soft_dice_loss, some_parameter=None):\n", - "\n", - " print(f'\\n\\n TASK TRAIN GOT DEVICE {device}\\n\\n')\n", - " \n", - " function_defined_in_notebook(some_parameter)\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " \n", - " unet_model.train()\n", - " unet_model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device, dtype=torch.float32)\n", - " optimizer.zero_grad()\n", - " output = unet_model(data)\n", - " loss = loss_fn(output=output, target=target)\n", - " loss.backward()\n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " optimizer.step()\n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "@TI.add_kwargs(**{'device':'hpu'})\n", - "@TI.register_fl_task(model='unet_model', data_loader='val_loader', device='device') \n", - "def validate(unet_model, val_loader, device):\n", - " print(f'\\n\\n TASK VALIDATE GOT DEVICE {device}\\n\\n')\n", - " \n", - " unet_model.eval()\n", - " unet_model.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - "\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " output = unet_model(data)\n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " val = soft_dice_coef(output, target)\n", - " val_score += val.sum().cpu().numpy()\n", - " \n", - " return {'dice_coef': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mature-renewal", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'kvasir_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "lightweight-causing", - "metadata": {}, - "outputs": [], - "source": [ - "# If I use autoreload I got a pickling error\n", - "\n", - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=2,\n", - " opt_treatment='CONTINUE_GLOBAL')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1543a36", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going \n", - "# fl_experiment.restore_experiment_state(MI)\n", - "\n", - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "8c30b301", - "metadata": {}, - "source": [ - "## Now we validate the best model!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55acff59", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Now we validate the best model!\\n\")\n", - "best_model = fl_experiment.get_best_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9479fb7f", - "metadata": {}, - "outputs": [], - "source": [ - "# # We remove exremove_experiment_datamove_experiment_datamove_experiment_datariment data from director\n", - "fl_experiment.remove_experiment_data()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "75c8aeab", - "metadata": {}, - "outputs": [], - "source": [ - "best_model.inc.conv[0].weight\n", - "# model_unet.inc.conv[0].weight" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2acb7e6", - "metadata": {}, - "outputs": [], - "source": [ - "# Validating initial model\n", - "validate(initial_model, fed_dataset.get_valid_loader(), 'hpu')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c12ca93f", - "metadata": {}, - "outputs": [], - "source": [ - "# Validating trained model\n", - "validate(best_model, fed_dataset.get_valid_loader(), 'hpu')" - ] - }, - { - "cell_type": "markdown", - "id": "1e6734f6", - "metadata": {}, - "source": [ - "## We can tune model further!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3940e75e", - "metadata": {}, - "outputs": [], - "source": [ - "MI = ModelInterface(model=best_model, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "fl_experiment.start(model_provider=MI, task_keeper=TI, data_loader=fed_dataset, rounds_to_train=2, \\\n", - " opt_treatment='CONTINUE_GLOBAL')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9490ed9", - "metadata": {}, - "outputs": [], - "source": [ - "# After the federation execution completes for the first time the grpc connection is lost. Adding this API keeps the connection live between \n", - "# director and envoy for the 2 nd round of federation \n", - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bd786d2", - "metadata": {}, - "outputs": [], - "source": [ - "best_model = fl_experiment.get_best_model()\n", - "# Validating trained model\n", - "validate(best_model, fed_dataset.get_valid_loader(), 'hpu')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/workspace/layers.py b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/workspace/layers.py deleted file mode 100644 index 12d913c15e..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_Kvasir_UNet/workspace/layers.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Layers for Unet model.""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def soft_dice_loss(output, target): - """Calculate loss.""" - num = target.size(0) - m1 = output.view(num, -1) - m2 = target.view(num, -1) - intersection = m1 * m2 - score = 2.0 * (intersection.sum(1) + 1) / (m1.sum(1) + m2.sum(1) + 1) - score = 1 - score.sum() / num - return score - - -def soft_dice_coef(output, target): - """Calculate soft DICE coefficient.""" - num = target.size(0) - m1 = output.view(num, -1) - m2 = target.view(num, -1) - intersection = m1 * m2 - score = 2.0 * (intersection.sum(1) + 1) / (m1.sum(1) + m2.sum(1) + 1) - return score.sum() - - -class DoubleConv(nn.Module): - """Pytorch double conv class.""" - - def __init__(self, in_ch, out_ch): - """Initialize layer.""" - super(DoubleConv, self).__init__() - self.in_ch = in_ch - self.out_ch = out_ch - self.conv = nn.Sequential( - nn.Conv2d(in_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - nn.Conv2d(out_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - """Do forward pass.""" - x = self.conv(x) - return x - - -class Down(nn.Module): - """Pytorch nn module subclass.""" - - def __init__(self, in_ch, out_ch): - """Initialize layer.""" - super(Down, self).__init__() - self.mpconv = nn.Sequential( - nn.MaxPool2d(2), - DoubleConv(in_ch, out_ch) - ) - - def forward(self, x): - """Do forward pass.""" - x = self.mpconv(x) - return x - - -class Up(nn.Module): - """Pytorch nn module subclass.""" - - def __init__(self, in_ch, out_ch, bilinear=False): - """Initialize layer.""" - super(Up, self).__init__() - self.in_ch = in_ch - self.out_ch = out_ch - if bilinear: - self.up = nn.Upsample( - scale_factor=2, - mode='bilinear', - align_corners=True - ) - else: - self.up = nn.ConvTranspose2d(in_ch, in_ch // 2, 2, stride=2) - self.conv = DoubleConv(in_ch, out_ch) - - def forward(self, x1, x2): - """Do forward pass.""" - x1 = self.up(x1) - diff_y = x2.size()[2] - x1.size()[2] - diff_x = x2.size()[3] - x1.size()[3] - - x1 = F.pad( - x1, - (diff_x // 2, diff_x - diff_x // 2, diff_y // 2, diff_y - diff_y // 2) - ) - - x = torch.cat([x2, x1], dim=1) - x = self.conv(x) - return x diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/README.md b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/README.md deleted file mode 100644 index 461b7759d7..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/README.md +++ /dev/null @@ -1,271 +0,0 @@ -# MedMNIST 2D Classification Tutorial - -![MedMNISTv2_overview](https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/assets/medmnistv2.jpg) - -For more details, please refer to the original paper: -**MedMNIST v2: A Large-Scale Lightweight Benchmark for 2D and 3D Biomedical Image Classification** ([arXiv](https://arxiv.org/abs/2110.14795)), and [PyPI](https://pypi.org/project/medmnist/). - - -## **Habana Tutorials** - -## **About model and experiments** - -We use a simple convolutional neural network and settings coming from [the experiments](https://github.com/MedMNIST/experiments) repository. -
- -#### The name of the file/example that contain HPU adaptations start with "HPU". -For example: HPU_PyTorch_MedMNIST_2D.ipynb placed under workspace folder contains the required HPU adaptations. - - All the execution steps mention in last section (**V. How to run this tutorial**) remain same for HPU examples but as pre-requisite it needs some additional environment setup and Habana supported package installations which is explained below from **section I to IV**. - - **Note:** By default these experiments utilize only 1 HPU device - -
- - - ## **I. Intel Developer Cloud Setup** -This example was test on the Intel Developer Cloud utilizing Gaudi2 instance. - -For accessing the Gaudi2 instances on the Intel Developer Cloud follow the instructions [here](https://developer.habana.ai/intel-developer-cloud/) - -The Gaudi instance in the Intel Developer Cloud comes SynapseAI SW Stack for Gaudi2 installed. Skip sections (**II. , III.***) - -Further more our testing was done using the habana based Docker container built using the Dockerfile base discussed below: - -Let's create a Dockerfile with the following content and name it Dockerfile_Habana: - -``` - -FROM vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest - -ENV HABANA_VISIBLE_DEVICES=all -ENV OMPI_MCA_btl_vader_single_copy_mechanism=none - -ENV DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC -RUN apt-get update && apt-get install -y tzdata bash-completion \ - #RUN apt update && apt-get install -y tzdata bash-completion \ - python3-pip openssh-server vim git iputils-ping net-tools curl bc gawk \ - && rm -rf /var/lib/apt/lists/* - - -RUN pip install numpy \ - && pip install jupyterlab \ - && pip install matplotlib \ - && pip install openfl - - -RUN git clone https://github.com/securefederatedai/openfl.git /root/openfl - -WORKDIR /root - -``` - -This base container comes with HPU Pytorch packages already installed. Hence you could skip step: **IV.** below. - -Build the above container and then launch it using: - -``` -export GAUDI_DOCKER_IMAGE="gaudi-docker-ubuntu20.04-openfl" - -docker build -t ${GAUDI_DOCKER_IMAGE} -f Dockerfile_Habana . -docker run --net host -id --name openfl_gaudi_run ${GAUDI_DOCKER_IMAGE} bash -``` - -Then access the container bash shell using: - -``` -docker exec -it openfl_gaudi_run bash - -``` - -Once inside the container, ensure openfl repo is cloned! - -otherwise clone the openfl repo using: - -``` -git clone https://github.com/securefederatedai/openfl.git -``` - -Then check if the openfl package is installed - -``` -pip list | grep openfl - -``` - -if not, then install it using: - -``` -pip install openfl -``` - -Then follow instruction in section **V. HPU Adaptations For PyTorch Examples** below. - -
- - ## **II. AWS DL1 Instance Setup** - - This example was tested on AWS EC2 instance created by following the instructions mentioned [here](https://docs.habana.ai/en/latest/AWS_EC2_DL1_and_PyTorch_Quick_Start/AWS_EC2_DL1_and_PyTorch_Quick_Start.html) . - - Test setup - Habana 1.7 and Ubuntu 20.04 - -
- - ## **III. Set Up SynapseAI SW Stack** - - - To perform an installation of the full driver and SynapseAI software stack independently on the EC2 instance, run the following command: - - ``` - wget -nv https://vault.habana.ai/artifactory/gaudi-installer/latest/habanalabs-installer.sh -chmod +x habanalabs-installer.sh -./habanalabs-installer.sh install --type base -``` - **NOTE:** Habanalabs requires python 3.8 version. It is hardcoded in [habanalabs-installer.sh](https://vault.habana.ai/ui/native/gaudi-installer/latest/habanalabs-installer.sh) - -You can refer the [Habana docs](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-synapseai-sw-stack) mentioned [GitHub repository](https://github.com/HabanaAI/Setup_and_Install) for detailed instructions. - -
- - ## **IV. HPU Pytorch Installation** - - For this example make sure to install the PyTorch package provided by Habana. These packages are optimized for Habana Gaudi HPU. Installing public PyTorch packages is not supported. - Habana PyTorch packages consist of: - - **torch** - PyTorch framework package with Habana support - -- **habana-torch-plugin** - Libraries and modules needed to execute PyTorch on single card, single node and multi node setup. - -- **habana-torch-dataloader** - Habana multi-threaded dataloader package. - -- **torchvision** - Torchvision package compiled in torch environment. No Habana specific changes in this package. - - Run the following command to install the above Habana PyTorch environment - - ``` - ./habanalabs-installer.sh install --type pytorch --venv - ``` - - The -- venv flag installs the relevant framework inside the virtual environment. To activate a virtual environment please perform the following: - - - ``` - cd $HOME/habanalabs-venv - source ./bin/activate - ``` - -The default virtual environment folder is $HOME/habanalabs-venv. To override the default, run the following command: - - ``` - export HABANALABS_VIRTUAL_DIR=/path/to/dir - ``` - - You can refer the [Habana docs](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-synapseai-sw-stack) mentioned [GitHub repository](https://github.com/HabanaAI/Setup_and_Install) for detailed instructions. - -
- - ## **V. HPU Adaptations For PyTorch Examples** - -The following set of code additions are required in the workspace notebook to run a model on Habana. The following steps cover Eager and Lazy modes of execution. - -### 1. Target the Gaudi HPU device: - -``` -device = torch.device("hpu") -``` -### 2. Move the model to the device: - -**There is a dependency in the order of execution (moving model to HPU and intializing optimizer). The workaround is to execute this step before initializing any optimizers.** - -``` -model.to(device) -``` -### 3. Import the Habana Torch Library: - -``` -import habana_frameworks.torch.core as htcore -``` -### 4. Enable Lazy execution mode by setting the environment variable shown below. -Do not set this environment variable if you want to execute your code in Eager mode: - -``` -os.environ["PT_HPU_LAZY_MODE"] = "1" -``` -### 5. In Lazy mode, execution is triggered wherever data is read back to the host from the Habana device. -For example, execution is triggered if you are running a topology and getting loss value into the host from the device with `loss.item()`. Adding a `mark_step()` in the code is another way to trigger execution: - -``` -htcore.mark_step() -``` - -The placement of `mark_step()` is required at the following points in a training script: - -* Right after `optimizer.step()` to cleanly demarcate training iterations, -* Between `loss.backward` and `optimizer.step()` if the optimizer being used is a Habana custom optimizer. - -Refer [getting started with PyTorch](https://www.intel.com/content/www/us/en/developer/articles/technical/get-started-habana-gaudi-deep-learning-training.html#articleparagraph_cop_711677074) for detailed explaination and PyTorch Habana architecture. Sample example can be found [here](https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html) - -
- -## **VI. How to run this tutorial (without TLC and locally as a simulation):** -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/HPU/PyTorch_MedMNIST_2D - ``` -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` -
- -### 5. In the third terminal (or forth terminal, if you chose to do two envoys) run the Jupyter Notebook: - -```sh -cd workspace -jupyter lab --allow-root HPU_Pytorch_MedMNIST_2D.ipynb -``` - -When running on remote host inside a docker container as the case of Gaudi2, one need to port forward jupyter lab to your local host. On your local terminal port formal - -```sh -ssh -NL 8888:127.0.0.1:8888 gaudi2_host -``` - -- A Jupyter Server URL will appear in your terminal. In your local browser, proceed to that link. Once the webpage loads, click on the HPU_Pytorch_MedMNIST_2D.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiments runs, and when the experiment is finished the director terminal will display a message that the experiment was finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/director/director_config.yaml deleted file mode 100644 index f7d3847170..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['28', '28', '3'] - target_shape: ['1','1'] diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/director/start_director.sh deleted file mode 100644 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/envoy_config.yaml deleted file mode 100644 index 22ab4c3039..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/envoy_config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - -optional_plugin_components: {} - -shard_descriptor: - template: medmnist_shard_descriptor.MedMNISTShardDescriptor - params: - rank_worldsize: 1, 1 - datapath: data/. - dataname: bloodmnist diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/medmnist_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/medmnist_shard_descriptor.py deleted file mode 100644 index 6ad2298e61..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/medmnist_shard_descriptor.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""MedMNIST Shard Descriptor.""" - -import logging -import os -from typing import Any, List, Tuple -from medmnist.info import INFO, HOMEPAGE - -import numpy as np - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MedMNISTShardDataset(ShardDataset): - """MedMNIST Shard dataset class.""" - - def __init__(self, x, y, data_type: str = 'train', rank: int = 1, worldsize: int = 1) -> None: - """Initialize MedMNISTDataset.""" - self.data_type = data_type - self.rank = rank - self.worldsize = worldsize - self.x = x[self.rank - 1::self.worldsize] - self.y = y[self.rank - 1::self.worldsize] - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """Return an item by the index.""" - return self.x[index], self.y[index] - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.x) - - -class MedMNISTShardDescriptor(ShardDescriptor): - """MedMNIST Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - datapath: str = '', - dataname: str = 'bloodmnist', - **kwargs - ) -> None: - """Initialize MedMNISTShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.datapath = datapath - self.dataset_name = dataname - self.info = INFO[self.dataset_name] - - (x_train, y_train), (x_test, y_test) = self.load_data() - self.data_by_type = { - 'train': (x_train, y_train), - 'val': (x_test, y_test) - } - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train') -> MedMNISTShardDataset: - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return MedMNISTShardDataset( - *self.data_by_type[dataset_type], - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return ['28', '28', '3'] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return ['1', '1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'MedMNIST dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - @staticmethod - def download_data(datapath: str = 'data/', - dataname: str = 'bloodmnist', - info: dict = {}) -> None: - - logger.info(f"{datapath}\n{dataname}\n{info}") - try: - from torchvision.datasets.utils import download_url - download_url(url=info["url"], - root=datapath, - filename=dataname, - md5=info["MD5"]) - except Exception: - raise RuntimeError('Something went wrong when downloading! ' - + 'Go to the homepage to download manually. ' - + HOMEPAGE) - - def load_data(self) -> Tuple[Tuple[Any, Any], Tuple[Any, Any]]: - """Download prepared dataset.""" - - dataname = self.dataset_name + '.npz' - dataset = os.path.join(self.datapath, dataname) - - if not os.path.isfile(dataset): - logger.info(f"Dataset {dataname} not found at:{self.datapath}.\n\tDownloading...") - MedMNISTShardDescriptor.download_data(self.datapath, dataname, self.info) - logger.info("DONE!") - - data = np.load(dataset) - - x_train = data["train_images"] - x_test = data["test_images"] - - y_train = data["train_labels"] - y_test = data["test_labels"] - logger.info('MedMNIST data was loaded!') - return (x_train, y_train), (x_test, y_test) diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/requirements.txt deleted file mode 100644 index 363c0d69f9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -medmnist -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/start_envoy.sh deleted file mode 100644 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/workspace/HPU_Pytorch_MedMNIST_2D.ipynb b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/workspace/HPU_Pytorch_MedMNIST_2D.ipynb deleted file mode 100644 index cbd9520df8..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_MedMNIST_2D/workspace/HPU_Pytorch_MedMNIST_2D.ipynb +++ /dev/null @@ -1,588 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated MedMNIST2D " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5504ab79", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install medmnist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0570122", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "import tqdm\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import Dataset, DataLoader\n", - "from torchvision import transforms as T\n", - "import torch.nn.functional as F\n", - "import habana_frameworks.torch.core as htcore\n", - "\n", - "import medmnist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22ba64da", - "metadata": {}, - "outputs": [], - "source": [ - "from medmnist import INFO, Evaluator\n", - "\n", - "## Change dataflag here to reflect the ones defined in the envoy_conifg_xxx.yaml\n", - "dataname = 'bloodmnist'\n" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "director_node_fqdn = 'localhost'\n", - "director_port=50051\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experimen" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "9b3081a6", - "metadata": {}, - "source": [ - "## Load MedMNIST INFO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e0377d3a", - "metadata": {}, - "outputs": [], - "source": [ - "num_epochs = 3\n", - "TRAIN_BS, VALID_BS = 64, 128\n", - "\n", - "lr = 0.001\n", - "gamma=0.1\n", - "milestones = [0.5 * num_epochs, 0.75 * num_epochs]\n", - "\n", - "info = INFO[dataname]\n", - "task = info['task']\n", - "n_channels = info['n_channels']\n", - "n_classes = len(info['label'])" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0dc457e", - "metadata": {}, - "outputs": [], - "source": [ - "## Data transformations\n", - "data_transform = T.Compose([T.ToTensor(), \n", - " T.Normalize(mean=[.5], std=[.5])]\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "09ba2f64", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image\n", - "\n", - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " \n", - " img, label = self.dataset[index]\n", - " \n", - " if self.target_transform:\n", - " label = self.target_transform(label) \n", - " else:\n", - " label = label.astype(int)\n", - " \n", - " if self.transform:\n", - " img = Image.fromarray(img)\n", - " img = self.transform(img)\n", - " else:\n", - " base_transform = T.PILToTensor()\n", - " img = Image.fromarray(img)\n", - " img = base_transform(img) \n", - "\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db2d563e", - "metadata": {}, - "outputs": [], - "source": [ - "class MedMnistFedDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - "\n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=data_transform\n", - " ) \n", - " \n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=data_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(\n", - " self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True)\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create Mnist federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MedMnistFedDataset(train_bs=TRAIN_BS, valid_bs=VALID_BS)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f63908e", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " print(sample.shape, target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "075d1d6c", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8477a001", - "metadata": {}, - "outputs": [], - "source": [ - "# define a simple CNN model\n", - "class Net(nn.Module):\n", - " def __init__(self, in_channels, num_classes):\n", - " super(Net, self).__init__()\n", - "\n", - " self.layer1 = nn.Sequential(\n", - " nn.Conv2d(in_channels, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU())\n", - "\n", - " self.layer2 = nn.Sequential(\n", - " nn.Conv2d(16, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.layer3 = nn.Sequential(\n", - " nn.Conv2d(16, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - " \n", - " self.layer4 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - "\n", - " self.layer5 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3, padding=1),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.fc = nn.Sequential(\n", - " nn.Linear(64 * 4 * 4, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, num_classes))\n", - "\n", - " def forward(self, x):\n", - " x = self.layer1(x)\n", - " x = self.layer2(x)\n", - " x = self.layer3(x)\n", - " x = self.layer4(x)\n", - " x = self.layer5(x)\n", - " x = x.view(x.size(0), -1)\n", - " x = self.fc(x)\n", - " return x\n", - "\n", - "# NOTE: This is a workaround to move model to HPU before initializing any optimizers \n", - "device = torch.device('hpu')\n", - "model = Net(in_channels=n_channels, num_classes=n_classes).to(device)\n", - " \n", - "# define optimizer \n", - "optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2154486", - "metadata": {}, - "outputs": [], - "source": [ - "print(model)" - ] - }, - { - "cell_type": "markdown", - "id": "8d1c78ee", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59831bcd", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ff463bd", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.register_fl_task(model='model', data_loader='train_loader',\n", - " device='device', optimizer='optimizer')\n", - "def train(model, train_loader, device, optimizer):\n", - " # This task is designed to run on remote nodes equipped with HPU hence device name is hardcoded here.\n", - " device = 'hpu'\n", - " total_loss = []\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " model.train()\n", - " model.to(device)\n", - " \n", - " #bloodmnist is multi-class dataset and hence CrossEntropyLoss() is choosen\n", - " criterion = nn.CrossEntropyLoss()\n", - " \n", - " for inputs, targets in train_loader:\n", - " \n", - " optimizer.zero_grad()\n", - " outputs = model(inputs.to(device))\n", - "\n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " \n", - " total_loss.append(loss.item())\n", - " \n", - " loss.backward()\n", - " htcore.mark_step()\n", - " optimizer.step()\n", - " htcore.mark_step()\n", - "\n", - " return {'train_loss': np.mean(total_loss),}\n", - "\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device):\n", - " # This task is designed to run on remote nodes equipped with HPU hence device name is hardcoded here.\n", - " device = 'hpu'\n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " model.eval()\n", - " model.to(device)\n", - "\n", - " val_score = 0\n", - " total_samples = 0\n", - " total_loss = []\n", - " y_score = torch.tensor([]).to(device)\n", - " \n", - " #bloodmnist is multi-class dataset and hence CrossEntropyLoss() is choosen\n", - " criterion = nn.CrossEntropyLoss()\n", - "\n", - " with torch.no_grad():\n", - " for inputs, targets in val_loader:\n", - " outputs = model(inputs.to(device))\n", - " \n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets) \n", - "\n", - " total_loss.append(loss.item())\n", - " total_samples += targets.shape[0]\n", - " \n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " \n", - " pred = outputs.argmax(dim=1)\n", - " val_score += pred.eq(targets).sum().cpu().numpy()\n", - " \n", - " acc = val_score / total_samples \n", - " test_loss = sum(total_loss) / len(total_loss)\n", - "\n", - " return {'acc': acc, \n", - " 'test_loss': test_loss,\n", - " }\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'medmnist_exp'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=3,\n", - " opt_treatment='RESET')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fa7cea", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "vscode": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/README.md b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/README.md deleted file mode 100644 index 48f8284383..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/README.md +++ /dev/null @@ -1,265 +0,0 @@ -# PyTorch TinyImageNet - -## **Habana Tutorials** -#### The name of the file/example that contain HPU adaptations start with "HPU". -For example: HPU_pytorch_tinyimagenet.ipynb placed under workspace folder contains the required HPU adaptations. - - All the execution steps mention in last section (**V. How to run this tutorial**) remain same for HPU examples but as pre-requisite it needs some additional environment setup and Habana supported package installations which is explained below from **section I to IV**. - -**Note:** By default these experiments utilize only 1 HPU device - -
- - - ## **I. Intel Developer Cloud Setup** -This example was test on the Intel Developer Cloud utilizing Gaudi2 instance. - -For accessing the Gaudi2 instances on the Intel Developer Cloud follow the instructions [here](https://developer.habana.ai/intel-developer-cloud/) - -The Gaudi instance in the Intel Developer Cloud comes SynapseAI SW Stack for Gaudi2 installed. Skip sections (**II. , III.***) - -Further more our testing was done using the habana based Docker container built using the Dockerfile base discussed below: - -Let's create a Dockerfile with the following content and name it Dockerfile_Habana: - -``` - -FROM vault.habana.ai/gaudi-docker/1.10.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1/latest - -ENV HABANA_VISIBLE_DEVICES=all -ENV OMPI_MCA_btl_vader_single_copy_mechanism=none - -ENV DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC -RUN apt-get update && apt-get install -y tzdata bash-completion \ - #RUN apt update && apt-get install -y tzdata bash-completion \ - python3-pip openssh-server vim git iputils-ping net-tools curl bc gawk \ - && rm -rf /var/lib/apt/lists/* - - -RUN pip install numpy \ - && pip install jupyterlab \ - && pip install matplotlib \ - && pip install openfl - - -RUN git clone https://github.com/securefederatedai/openfl.git /root/openfl - -WORKDIR /root - -``` - -This base container comes with HPU Pytorch packages already installed. Hence you could skip step: **IV.** below. - -Build the above container and then launch it using: - -``` -export GAUDI_DOCKER_IMAGE="gaudi-docker-ubuntu20.04-openfl" - -docker build -t ${GAUDI_DOCKER_IMAGE} -f Dockerfile_Habana . -docker run --net host -id --name openfl_gaudi_run ${GAUDI_DOCKER_IMAGE} bash -``` - -Then access the container bash shell using: - -``` -docker exec -it openfl_gaudi_run bash - -``` - -Once inside the container, ensure openfl repo is cloned! - -otherwise clone the openfl repo using: - -``` -git clone https://github.com/securefederatedai/openfl.git -``` - -Then check if the openfl package is installed - -``` -pip list | grep openfl - -``` - -if not, then install it using: - -``` -pip install openfl -``` - -Then follow instruction in section **V. HPU Adaptations For PyTorch Examples** below. - -
- - ## **II. AWS DL1 Instance Setup** - - This example was tested on AWS EC2 instance created by following the instructions mentioned [here](https://docs.habana.ai/en/latest/AWS_EC2_DL1_and_PyTorch_Quick_Start/AWS_EC2_DL1_and_PyTorch_Quick_Start.html) . - - Test setup - Habana 1.7 and Ubuntu 20.04 - -
- - ## **III. Set Up SynapseAI SW Stack** - - - To perform an installation of the full driver and SynapseAI software stack independently on the EC2 instance, run the following command: - - ``` - wget -nv https://vault.habana.ai/artifactory/gaudi-installer/latest/habanalabs-installer.sh -chmod +x habanalabs-installer.sh -./habanalabs-installer.sh install --type base -``` - **NOTE:** Habanalabs requires python 3.8 version. It is hardcoded in [habanalabs-installer.sh](https://vault.habana.ai/ui/native/gaudi-installer/latest/habanalabs-installer.sh) - -You can refer the [Habana docs](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-synapseai-sw-stack) mentioned [GitHub repository](https://github.com/HabanaAI/Setup_and_Install) for detailed instructions. - -
- - ## **IV. HPU Pytorch Installation** - - For this example make sure to install the PyTorch package provided by Habana. These packages are optimized for Habana Gaudi HPU. Installing public PyTorch packages is not supported. - Habana PyTorch packages consist of: - - **torch** - PyTorch framework package with Habana support - -- **habana-torch-plugin** - Libraries and modules needed to execute PyTorch on single card, single node and multi node setup. - -- **habana-torch-dataloader** - Habana multi-threaded dataloader package. - -- **torchvision** - Torchvision package compiled in torch environment. No Habana specific changes in this package. - - Run the following command to install the above Habana PyTorch environment - - ``` - ./habanalabs-installer.sh install --type pytorch --venv - ``` - - The -- venv flag installs the relevant framework inside the virtual environment. To activate a virtual environment please perform the following: - - - ``` - cd $HOME/habanalabs-venv - source ./bin/activate - ``` - -The default virtual environment folder is $HOME/habanalabs-venv. To override the default, run the following command: - - ``` - export HABANALABS_VIRTUAL_DIR=/path/to/dir - ``` - - You can refer the [Habana docs](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-synapseai-sw-stack) mentioned [GitHub repository](https://github.com/HabanaAI/Setup_and_Install) for detailed instructions. - -
- - ## **V. HPU Adaptations For PyTorch Examples** - -The following set of code additions are required in the workspace notebook to run a model on Habana. The following steps cover Eager and Lazy modes of execution. - -### 1. Target the Gaudi HPU device: - -``` -device = torch.device("hpu") -``` -### 2. Move the model to the device: - -**There is a dependency in the order of execution (moving model to HPU and intializing optimizer). The workaround is to execute this step before initializing any optimizers.** - -``` -model.to(device) -``` -### 3. Import the Habana Torch Library: - -``` -import habana_frameworks.torch.core as htcore -``` -### 4. Enable Lazy execution mode by setting the environment variable shown below. -Do not set this environment variable if you want to execute your code in Eager mode: - -``` -os.environ["PT_HPU_LAZY_MODE"] = "1" -``` -### 5. In Lazy mode, execution is triggered wherever data is read back to the host from the Habana device. -For example, execution is triggered if you are running a topology and getting loss value into the host from the device with loss.item(). Adding a mark_step() in the code is another way to trigger execution: - -``` -htcore.mark_step() -``` - -The placement of mark_step() is required at the following points in a training script: - -* Right after optimizer.step() to cleanly demarcate training iterations, -* Between loss.backward and optimizer.step() if the optimizer being used is a Habana custom optimizer. - -Refer [getting started with PyTorch](https://www.intel.com/content/www/us/en/developer/articles/technical/get-started-habana-gaudi-deep-learning-training.html#articleparagraph_cop_711677074) for detailed explaination and PyTorch Habana architecture. Sample example can be found [here](https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html) - -
- - -## **VI. How to run this tutorial (without TLS and locally as a simulation):** -
- -### 0. If you haven't done so already, install OpenFL in the virtual environment created during Habana setup, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment same as in section **III**: - - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/HPU/PyTorch_TinyImageNet - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config_1.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config_2.yaml -``` - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab --allow-root hpu_pytorch_tinyimagenet.ipynb -``` -When running on remote host inside a docker container as the case of Gaudi2, one need to port forward jupyter lab to your local host. On your local terminal port formal - -```sh -ssh -NL 8888:127.0.0.1:8888 gaudi2_host -``` - -- A Jupyter Server URL will appear in your terminal. In your local browser, proceed to that link. Once the webpage loads, click on the pytorch_tinyimagenet.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - -
- diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/director/director_config.yaml deleted file mode 100644 index 3fc4137943..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['64', '64', '3'] - target_shape: ['64', '64'] diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/director/start_director.sh deleted file mode 100644 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/envoy_config_1.yaml b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/envoy_config_1.yaml deleted file mode 100644 index cb04056976..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/envoy_config_1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: tinyimagenet_shard_descriptor.TinyImageNetShardDescriptor - params: - data_folder: tinyimagenet_data - rank_worldsize: 1,1 diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/requirements.txt deleted file mode 100644 index 7f361a8e94..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow==10.3.0 diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/start_envoy.sh deleted file mode 100644 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/tinyimagenet_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/tinyimagenet_shard_descriptor.py deleted file mode 100644 index 6cdfbadc86..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/envoy/tinyimagenet_shard_descriptor.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""TinyImageNet Shard Descriptor.""" - -import glob -import logging -import os -import shutil -from pathlib import Path -from typing import Tuple - -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class TinyImageNetDataset(ShardDataset): - """TinyImageNet shard dataset class.""" - - NUM_IMAGES_PER_CLASS = 500 - - def __init__(self, data_folder: Path, data_type='train', rank=1, worldsize=1): - """Initialize TinyImageNetDataset.""" - self.data_type = data_type - self._common_data_folder = data_folder - self._data_folder = os.path.join(data_folder, data_type) - self.labels = {} # fname - label number mapping - self.image_paths = sorted( - glob.iglob( - os.path.join(self._data_folder, '**', '*.JPEG'), - recursive=True - ) - )[rank - 1::worldsize] - with open(os.path.join(self._common_data_folder, 'wnids.txt'), 'r') as fp: - self.label_texts = sorted([text.strip() for text in fp.readlines()]) - self.label_text_to_number = {text: i for i, text in enumerate(self.label_texts)} - self.fill_labels() - - def __len__(self) -> int: - """Return the len of the shard dataset.""" - return len(self.image_paths) - - def __getitem__(self, index: int) -> Tuple['Image', int]: - """Return an item by the index.""" - file_path = self.image_paths[index] - label = self.labels[os.path.basename(file_path)] - return self.read_image(file_path), label - - def read_image(self, path: Path) -> Image: - """Read the image.""" - img = Image.open(path) - return img - - def fill_labels(self) -> None: - """Fill labels.""" - if self.data_type == 'train': - for label_text, i in self.label_text_to_number.items(): - for cnt in range(self.NUM_IMAGES_PER_CLASS): - self.labels[f'{label_text}_{cnt}.JPEG'] = i - elif self.data_type == 'val': - with open(os.path.join(self._data_folder, 'val_annotations.txt'), 'r') as fp: - for line in fp.readlines(): - terms = line.split('\t') - file_name, label_text = terms[0], terms[1] - self.labels[file_name] = self.label_text_to_number[label_text] - - -class TinyImageNetShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__( - self, - data_folder: str = 'data', - rank_worldsize: str = '1,1', - **kwargs - ): - """Initialize TinyImageNetShardDescriptor.""" - self.common_data_folder = Path.cwd() / data_folder - self.data_folder = Path.cwd() / data_folder / 'tiny-imagenet-200' - self.download_data() - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - def download_data(self): - """Download prepared shard dataset.""" - zip_file_path = self.common_data_folder / 'tiny-imagenet-200.zip' - os.makedirs(self.common_data_folder, exist_ok=True) - os.system(f'wget --no-clobber http://cs231n.stanford.edu/tiny-imagenet-200.zip' - f' -O {zip_file_path}') - shutil.unpack_archive(str(zip_file_path), str(self.common_data_folder)) - - def get_dataset(self, dataset_type): - """Return a shard dataset by type.""" - return TinyImageNetDataset( - data_folder=self.data_folder, - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['64', '64', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['64', '64'] - - @property - def dataset_description(self) -> str: - """Return the shard dataset description.""" - return (f'TinyImageNetDataset dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/workspace/hpu_pytorch_tinyimagenet.ipynb b/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/workspace/hpu_pytorch_tinyimagenet.ipynb deleted file mode 100644 index 4cc40fc5cf..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/HPU/PyTorch_TinyImageNet/workspace/hpu_pytorch_tinyimagenet.ipynb +++ /dev/null @@ -1,490 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated PyTorch TinyImageNet Tutorial" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "billion-drunk", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import glob\n", - "\n", - "from PIL import Image\n", - "\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "from copy import deepcopy\n", - "import torchvision\n", - "from torchvision import transforms as T\n", - "from torch.utils.data import Dataset\n", - "from torch.utils.data import DataLoader\n", - "import tqdm\n", - "\n", - "import habana_frameworks.torch.core as htcore\n", - "\n", - "torch.manual_seed(0)\n", - "np.random.seed(0)" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051',\n", - "# cert_chain=cert_chain, api_cert=api_certificate, api_private_key=api_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051', tls=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1abebd90", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "print(sample.shape)\n", - "print(target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7dda1680", - "metadata": {}, - "outputs": [], - "source": [ - "normalize = T.Normalize(\n", - " mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225]\n", - ")\n", - "\n", - "augmentation = T.RandomApply(\n", - " [T.RandomHorizontalFlip(),\n", - " T.RandomRotation(10),\n", - " T.RandomResizedCrop(64)], \n", - " p=.8\n", - ")\n", - "\n", - "training_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " augmentation,\n", - " normalize]\n", - ")\n", - "\n", - "valid_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " normalize]\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0314d5bf", - "metadata": {}, - "outputs": [], - "source": [ - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " img, label = self.dataset[index]\n", - " label = self.target_transform(label) if self.target_transform else label\n", - " img = self.transform(img) if self.transform else img\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01369e3b", - "metadata": {}, - "outputs": [], - "source": [ - "class TinyImageNetDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=training_transform\n", - " )\n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=valid_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " generator=torch.Generator()\n", - " generator.manual_seed(0)\n", - " return DataLoader(\n", - " self.train_set, batch_size=self.kwargs['train_bs'], shuffle=True, generator=generator\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a6cedef", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = TinyImageNetDataset(train_bs=64, valid_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "74cac654", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43e25fe3", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "MobileNetV2 model\n", - "\"\"\"\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self):\n", - " torch.manual_seed(0)\n", - " super(Net, self).__init__()\n", - " self.model = torchvision.models.mobilenet_v2(pretrained=True)\n", - " self.model.requires_grad_(False)\n", - " self.model.classifier[1] = torch.nn.Linear(in_features=1280, \\\n", - " out_features=200, bias=True)\n", - "\n", - " def forward(self, x):\n", - " x = self.model.forward(x)\n", - " return x\n", - "\n", - "model_net = Net()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79021778", - "metadata": {}, - "outputs": [], - "source": [ - "# NOTE: This is a workaround to move model to HPU before initializing any optimizers \n", - "model_net.to('hpu')\n", - "params_to_update = []\n", - "for param in model_net.parameters():\n", - " if param.requires_grad == True:\n", - " params_to_update.append(param)\n", - " \n", - "optimizer_adam = optim.Adam(params_to_update, lr=1e-4)\n", - "\n", - "def cross_entropy(output, target):\n", - " \"\"\"Binary cross-entropy metric\n", - " \"\"\"\n", - " return F.cross_entropy(input=output,target=target)" - ] - }, - { - "cell_type": "markdown", - "id": "f097cdc5", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a8cca8", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "model_interface = ModelInterface(model=model_net, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_net)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "task_interface = TaskInterface()\n", - "\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@task_interface.add_kwargs(**{ 'device':'hpu'})\n", - "@task_interface.register_fl_task(model='net_model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(net_model, train_loader, optimizer, device, loss_fn=cross_entropy, some_parameter=None):\n", - " torch.manual_seed(0)\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " net_model.train()\n", - " net_model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device)\n", - " optimizer.zero_grad()\n", - " output = net_model(data)\n", - " loss = loss_fn(output=output, target=target)\n", - " loss.backward()\n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " optimizer.step()\n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "@task_interface.add_kwargs(**{'device':'hpu'})\n", - "@task_interface.register_fl_task(model='net_model', data_loader='val_loader', device='device') \n", - "def validate(net_model, val_loader, device):\n", - " torch.manual_seed(0)\n", - " net_model.eval()\n", - " net_model.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " output = net_model(data)\n", - " # API call to trigger execution\n", - " htcore.mark_step()\n", - " pred = output.argmax(dim=1,keepdim=True)\n", - " val_score += pred.eq(target).sum().cpu().numpy()\n", - " \n", - " return {'acc': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'tinyimagenet_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=model_interface, \n", - " task_keeper=task_interface,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83edd88f", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/README.md b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/README.md deleted file mode 100644 index 7db9d0c988..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/README.md +++ /dev/null @@ -1,122 +0,0 @@ -# MXNet Facial Keypoints Detection tutorial ---- -**Note:** - -Please pay attention that this task uses the dataset from Kaggle. To get the dataset you -will need a Kaggle account and accept "Facial Keypoints Detection" [competition rules](https://www.kaggle.com/c/facial-keypoints-detection/rules). - ---- - -This tutorial shows how to use any other framework, different from already supported PyTorch and TensorFlow, together with OpenFl. - -## Installation of Kaggle API credentials - -**Before the start please make sure that you installed sd_requirements.txt on your virtual -environment on an envoy machine.** - -To use the [Kaggle API](https://github.com/Kaggle/kaggle-api), sign up for -a [Kaggle account](https://www.kaggle.com). Then go to the `'Account'` tab of your user -profile `(https://www.kaggle.com//account)` and select `'Create API Token'`. This will -trigger the download of `kaggle.json`, a file containing your API credentials. Place this file in -the location `~/.kaggle/kaggle.json` - -For your security, ensure that other users of your computer do not have read access to your -credentials. On Unix-based systems you can do this with the following command: - -`chmod 600 ~/.kaggle/kaggle.json` - -If you need proxy add "proxy": `"http://" in kaggle.json`. It should looks like -that: `{"username":"your_username","key":"token", "proxy": "ip_addr:port"}` - -*Information about Kaggle API settings has been taken from kagge-api [readme](https://github.com/Kaggle/kaggle-api).* - -*Useful [link](https://github.com/Kaggle/kaggle-api/issues/6) for a problem with proxy settings.* - -### 1. About dataset - -All information about the dataset you may find -on [link](https://www.kaggle.com/c/facial-keypoints-detection/data) - -### 2. Adding support for a third-party framework - -You need to write your own adapter class which is based on `FrameworkAdapterPluginInterface` [class](https://github.com/securefederatedai/openfl/blob/develop/openfl/plugins/frameworks_adapters/framework_adapter_interface.py). This class should contain at least two methods: - - - `get_tensor_dict(model, optimizer=None)` - extracts tensor dict from a model and optionally[^1] an optimizer. The resulting tensors must be converted to **dict{str: numpy.array}** for forwarding and aggregation. - - - `set_tensor_dict(model, tensor_dict, optimizer=None, device=None)` - sets aggregated numpy arrays into the model or model and optimizer. To do so it gets `tensor_dict` variable as **dict{str: numpy.array}** and should convert it into suitable for your model or model and optimizer tensors. After that, it must load the prepared parameters into the model/model and optimizer. - - Your adapter should be placed in workspace directory. When you create `ModelInterface` class object at the `'***.ipunb'`, place the name of your adapter to the input parameter `framework_plugin`. Example: - ```py - framework_adapter = 'mxnet_adapter.FrameworkAdapterPlugin' - - MI = ModelInterface(model=model, optimizer=optimizer, - framework_plugin=framework_adapter) -``` - -[^1]: Whether or not to forward the optimizer parameters is set in the `start` method (FLExperiment [class](https://github.com/securefederatedai/openfl/blob/develop/openfl/interface/interactive_api/experiment.py) object, parameter `opt_treatment`). - -### Run experiment - -1. Create a folder for each `envoy`. -2. Put a relevant envoy_config in each of the n folders (n - number of envoys which you would like - to use, in this tutorial there is two of them, but you may use any number of envoys) and copy - other files from `envoy` folder there as well. -3. Modify each `envoy` accordingly: - - - At `start_envoy.sh` change env_one to env_two (or any unique `envoy` names you like) - - - Put a relevant envoy_config `envoy_config_one.yaml` or `envoy_config_two.yaml` (or any other - config file name consistent to the configuration file that is called in `start_envoy.sh`). -4. Make sure that you installed requirements for each `envoy` in your virtual - environment: `pip install -r sd_requirements.txt` -5. Run the `director`: - ```sh - cd director_folder - ./start_director.sh - ``` - -6. Run the `envoys`: - ```sh - cd envoy_folder - ./start_envoy.sh env_one shard_config_one.yaml - ``` - If kaggle-API setting are - correct the download of the dataset will be started. If this is not the first `envoy` launch - then the dataset will be redownloaded only if some part of the data are missing. - -7. Run the [MXNet_landmarks.ipynb](workspace/MXNet_landmarks.ipynb) notebook using - Jupyter lab in a prepared virtual environment. For more information about preparation virtual - environment look **[ - Preparation virtual environment](#preparation-virtual-environment)** - . - - * Install [MXNet 1.9.0](https://pypi.org/project/mxnet/1.9.0/) framework with CPU or GPU (preferred) support and [verify](https://mxnet.apache.org/versions/1.4.1/install/validate_mxnet.html) it: - ```bash - pip install mxnet-cuXXX==1.9.0 - ``` - - * Run jupyter-lab: - ```bash - cd workspare - jupyter-lab - ``` - -### Preparation virtual environment - -* Create virtual environment - -```sh - python3 -m venv venv -``` - -* To activate virtual environment - -```sh - source venv/bin/activate -``` - -* To deactivate virtual environment - -```sh - deactivate -``` diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/director/director_config.yaml deleted file mode 100644 index 26bda0f6c6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['96', '96'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/director/start_director.sh deleted file mode 100755 index 5b5c4b3847..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/envoy_config_one.yaml b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/envoy_config_one.yaml deleted file mode 100644 index 2a9aa7fcf4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/envoy_config_one.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [0] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: landmark_shard_descriptor.LandmarkShardDescriptor - params: - rank_worldsize: 1, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/envoy_config_two.yaml b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/envoy_config_two.yaml deleted file mode 100644 index 5b10abf145..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/envoy_config_two.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [1] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: landmark_shard_descriptor.LandmarkShardDescriptor - params: - rank_worldsize: 2, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/landmark_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/landmark_shard_descriptor.py deleted file mode 100644 index 25440b48b4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/landmark_shard_descriptor.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Landmarks Shard Descriptor.""" - -import json -import shutil -from hashlib import md5 -from logging import getLogger -from pathlib import Path -from random import shuffle -from typing import Dict -from typing import List -from zipfile import ZipFile - -import numpy as np -import pandas as pd -from kaggle.api.kaggle_api_extended import KaggleApi - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = getLogger(__name__) - - -class LandmarkShardDataset(ShardDataset): - """Landmark Shard dataset class.""" - - def __init__(self, dataset_dir: Path, - rank: int = 1, worldsize: int = 1) -> None: - """Initialize LandmarkShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.img_names = list(self.dataset_dir.glob('img_*.npy')) - - # Sharding - self.img_names = self.img_names[self.rank - 1::self.worldsize] - # Shuffling the results dataset after choose half pictures of each class - shuffle(self.img_names) - - def __getitem__(self, index) -> np.ndarray: - """Return a item by the index.""" - # Get name key points file - # f.e. image name: 'img_123.npy, corresponding name of the key points: 'keypoints_123.npy' - kp_name = str(self.img_names[index]).replace('img', 'keypoints') - return np.load(self.img_names[index]), np.load(self.dataset_dir / kp_name) - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.img_names) - - -class LandmarkShardDescriptor(ShardDescriptor): - """Landmark Shard descriptor class.""" - - def __init__(self, data_folder: str = 'data', - rank_worldsize: str = '1, 1', - **kwargs) -> None: - """Initialize LandmarkShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = map(int, rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_data() - - # Calculating data and target shapes - ds = self.get_dataset() - sample, target = ds[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = [str(len(target.shape))] - - if self._target_shape[0] != '1': - raise ValueError('Target has a wrong shape') - - def process_data(self, name_csv_file) -> None: - """Process data from csv to numpy format and save it in the same folder.""" - data_df = pd.read_csv(self.data_folder / name_csv_file) - data_df.fillna(method='ffill', inplace=True) - keypoints = data_df.drop('Image', axis=1) - cur_folder = self.data_folder.relative_to(Path.cwd()) - - for i in range(data_df.shape[0]): - img = data_df['Image'][i].split(' ') - img = np.array(['0' if x == '' else x for x in img], dtype='float32').reshape(96, 96) - np.save(str(cur_folder / f'img_{i}.npy'), img) - y = np.array(keypoints.iloc[i, :], dtype='float32') - np.save(str(cur_folder / f'keypoints_{i}.npy'), y) - - def download_data(self) -> None: - """Download dataset from Kaggle.""" - if self.is_dataset_complete(): - return - - self.data_folder.mkdir(parents=True, exist_ok=True) - - logger.info('Your dataset is absent or damaged. Downloading ... ') - api = KaggleApi() - api.authenticate() - - if Path('data').exists(): - shutil.rmtree('data') - - api.competition_download_file( - 'facial-keypoints-detection', - 'training.zip', path=self.data_folder - ) - - with ZipFile(self.data_folder / 'training.zip', 'r') as zipobj: - zipobj.extractall(self.data_folder) - - (self.data_folder / 'training.zip').unlink() - - self.process_data('training.csv') - (self.data_folder / 'training.csv').unlink() - self.save_all_md5() - - def get_dataset(self, dataset_type='train') -> LandmarkShardDataset: - """Return a shard dataset by type.""" - return LandmarkShardDataset( - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize - ) - - def calc_all_md5(self) -> Dict[str, str]: - """Calculate hash of all dataset.""" - md5_dict = {} - for root in self.data_folder.glob('*.npy'): - md5_calc = md5(usedforsecurity=False) - rel_file = root.relative_to(self.data_folder) - - with open(self.data_folder / rel_file, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b''): - md5_calc.update(chunk) - md5_dict[str(rel_file)] = md5_calc.hexdigest() - return md5_dict - - def save_all_md5(self) -> None: - """Save dataset hash.""" - all_md5 = self.calc_all_md5() - with open(self.data_folder / 'dataset.json', 'w', encoding='utf-8') as f: - json.dump(all_md5, f) - - def is_dataset_complete(self) -> bool: - """Check dataset integrity.""" - dataset_md5_path = self.data_folder / 'dataset.json' - if dataset_md5_path.exists(): - with open(dataset_md5_path, 'r', encoding='utf-8') as f: - old_md5 = json.load(f) - new_md5 = self.calc_all_md5() - return new_md5 == old_md5 - return False - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return self._sample_shape - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Dogs and Cats dataset, shard number {self.rank} ' - f'out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/sd_requirements.txt deleted file mode 100644 index 011e3f5f56..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/sd_requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -kaggle -pynvml diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/start_envoy.sh deleted file mode 100755 index 72a15413ed..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -SHARD_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$SHARD_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/MXNet_landmarks.ipynb b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/MXNet_landmarks.ipynb deleted file mode 100644 index 023990209e..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/MXNet_landmarks.ipynb +++ /dev/null @@ -1,583 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated MXNex Landmarks Tutorial\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd7fe23e", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = \"api\"\n", - "cert_dir = \"cert\"\n", - "director_node_fqdn = \"localhost\"\n", - "# 1) Run with API layer - Director mTLS\n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(client_id=client_id,\n", - "# director_node_fqdn=director_node_fqdn,\n", - "# director_port='50051',\n", - "# cert_chain=cert_chain,\n", - "# api_cert=api_certificate,\n", - "# api_private_key=api_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=\"50051\",\n", - " tls=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21d89d2c", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset\n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset(\"train\")\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experimen" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import (\n", - " DataInterface,\n", - " FLExperiment,\n", - " ModelInterface,\n", - " TaskInterface,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "feee0dff", - "metadata": {}, - "outputs": [], - "source": [ - "import mxnet as mx\n", - "import numpy as np\n", - "import pandas as pd\n", - "import tqdm\n", - "from matplotlib import pyplot as plt\n", - "from mxnet.gluon import data as gdata\n", - "from mxnet.gluon import loss as gloss\n", - "from mxnet.gluon import nn" - ] - }, - { - "cell_type": "markdown", - "id": "3b468ae1", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d3ce192b", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "MXNet model definition\n", - "\"\"\"\n", - "model = nn.Sequential()\n", - "model.add(\n", - " nn.Conv2D(channels=64, kernel_size=3, padding=1, activation=\"relu\"),\n", - " nn.BatchNorm(),\n", - " nn.MaxPool2D(),\n", - " nn.Conv2D(channels=128, kernel_size=3, padding=1, activation=\"relu\"),\n", - " nn.BatchNorm(),\n", - " nn.MaxPool2D(),\n", - " nn.Conv2D(channels=256, kernel_size=3, padding=1, activation=\"relu\"),\n", - " nn.BatchNorm(),\n", - " nn.MaxPool2D(),\n", - " nn.Flatten(),\n", - " nn.Dense(64),\n", - " nn.Activation(\"relu\"),\n", - " nn.Dropout(rate=0.005),\n", - " nn.Dense(30),\n", - ")\n", - "\n", - "model.initialize(force_reinit=True, ctx=None, init=mx.init.Xavier())\n", - "model(\n", - " mx.nd.ones((1, 1, 96, 96), ctx=None)\n", - ") # first forward pass for weight initialization" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20c39cce", - "metadata": {}, - "outputs": [], - "source": [ - "# optimizer\n", - "optimizer = mx.optimizer.Adam(learning_rate=0.001)\n", - "trainer = mx.gluon.Trainer(model.collect_params(), optimizer=optimizer)\n", - "# loss function\n", - "loss_fn = gloss.L2Loss()" - ] - }, - { - "cell_type": "markdown", - "id": "88b9dbf6", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d73f5518", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = \"mxnet_adapter.FrameworkAdapterPlugin\"\n", - "\n", - "MI = ModelInterface(model=model, optimizer=trainer, framework_plugin=framework_adapter)" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8c9eb50", - "metadata": {}, - "outputs": [], - "source": [ - "class LandmarkShardDataset(gdata.Dataset):\n", - " def __init__(self, dataset):\n", - " self._dataset = dataset\n", - "\n", - " def __len__(self):\n", - " self.filelength = len(self._dataset)\n", - " return self.filelength\n", - "\n", - " def __getitem__(self, idx):\n", - " return self._dataset[idx]\n", - "\n", - "\n", - "class LandmarkShardDescriptor(DataInterface):\n", - " def __init__(self, validation_fraction=1 / 5, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.validation_fraction = validation_fraction\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self._shard_dataset = LandmarkShardDataset(\n", - " shard_descriptor.get_dataset(\"train\")\n", - " )\n", - "\n", - " self.validation_size = max(\n", - " 1, int(len(self._shard_dataset) * self.validation_fraction)\n", - " )\n", - "\n", - " self.train_indexes = len(self._shard_dataset) - self.validation_size\n", - " self.val_indexes = [self.validation_size, self.train_indexes]\n", - "\n", - " def get_train_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return gdata.DataLoader(\n", - " self._shard_dataset,\n", - " batch_size=self.kwargs[\"train_bs\"],\n", - " sampler=gdata.RandomSampler(self.train_indexes),\n", - " last_batch=\"keep\",\n", - " )\n", - "\n", - " def get_valid_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return gdata.DataLoader(\n", - " self._shard_dataset,\n", - " batch_size=self.kwargs[\"valid_bs\"],\n", - " sampler=gdata.SequentialSampler(*self.val_indexes),\n", - " last_batch=\"keep\",\n", - " )\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return self.train_indexes\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return self.validation_size" - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create Mnist federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "train_bs, valid_bs = 64, 64\n", - "fed_dataset = LandmarkShardDescriptor(train_bs=train_bs, valid_bs=valid_bs)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "\n", - "@TI.register_fl_task(\n", - " model=\"model\",\n", - " data_loader=\"train_dataset\",\n", - " device=\"device\",\n", - " optimizer=\"optimizer\",\n", - " round_num=\"round_num\",\n", - ")\n", - "def train(model, train_dataset, optimizer, round_num, device, loss_fn=loss_fn):\n", - " device = (\n", - " mx.cpu()\n", - " if device.startswith(\"cpu\")\n", - " else mx.gpu(int(device.split(\":\")[1].strip()))\n", - " )\n", - "\n", - " print(\"train on:\", device)\n", - "\n", - " if round_num == 0:\n", - " optimizer._contexts = [device]\n", - "\n", - " train_dataset = tqdm.tqdm(train_dataset, desc=\"train\")\n", - " train_sum_l = 0\n", - " for X, y in train_dataset:\n", - " X, y = X.expand_dims(axis=1).as_in_context(device), y.as_in_context(device)\n", - " with mx.autograd.record():\n", - " pred = model(X)\n", - " l = loss_fn(pred, y).mean()\n", - " l.backward()\n", - " optimizer.step(train_bs)\n", - " train_sum_l += l.mean().asscalar()\n", - " train_loss = train_sum_l / len(train_dataset)\n", - " return {\n", - " \"train_mse\": train_loss,\n", - " }\n", - "\n", - "\n", - "@TI.register_fl_task(model=\"model\", data_loader=\"val_dataset\", device=\"device\")\n", - "def validate(model, val_dataset, device):\n", - " device = (\n", - " mx.cpu()\n", - " if device.startswith(\"cpu\")\n", - " else mx.gpu(int(device.split(\":\")[1].strip()))\n", - " )\n", - "\n", - " # Run a validation loop at the end of each epoch.\n", - " test_sum_l = 0\n", - " for X, y in val_dataset:\n", - " X, y = X.expand_dims(axis=1).as_in_context(device), y.as_in_context(device)\n", - " pred = model(X)\n", - " l = loss_fn(pred, y)\n", - " test_sum_l += l.mean().asscalar()\n", - " test_loss = test_sum_l / len(val_dataset)\n", - " return {\n", - " \"val_mse\": test_loss,\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = \"landmark_experiment\"\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=MI,\n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=10,\n", - " opt_treatment=\"CONTINUE_GLOBAL\",\n", - " device_assignment_policy=\"CUDA_PREFERRED\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fa7cea", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "e6055103", - "metadata": {}, - "source": [ - "## Let's have a look at the results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff804102", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from zipfile import ZipFile\n", - "\n", - "from kaggle.api.kaggle_api_extended import KaggleApi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37dc7f56", - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.exists(\"./test\"):\n", - " api = KaggleApi()\n", - " api.authenticate()\n", - " api.competition_download_file(\"facial-keypoints-detection\", \"test.zip\")\n", - " with ZipFile(\"test.zip\", \"r\") as zipobj:\n", - " zipobj.extractall(\"./test\")\n", - " os.remove(\"test.zip\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "08fc3a7a", - "metadata": {}, - "outputs": [], - "source": [ - "last_model = fl_experiment.get_last_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13f6cfd7", - "metadata": {}, - "outputs": [], - "source": [ - "Test_Dir = \"./test/test.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "796c8e37", - "metadata": {}, - "outputs": [], - "source": [ - "def get_data(path_to_csv_file):\n", - " data_df = pd.read_csv(path_to_csv_file)\n", - " data_df.fillna(method=\"ffill\", inplace=True)\n", - " labels = data_df.drop(\"Image\", axis=1)\n", - " imag, keypoints = [], []\n", - " for i in range(data_df.shape[0]):\n", - " img = data_df[\"Image\"][i].split(\" \")\n", - " img = [\"0\" if x == \"\" else x for x in img]\n", - " imag.append(img)\n", - " y = labels.iloc[i, :]\n", - " keypoints.append(y)\n", - "\n", - " X = np.array(imag, dtype=\"float\").reshape(-1, 96, 96)\n", - " y = np.array(keypoints, dtype=\"float\")\n", - "\n", - " return X, y" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ed1ce74", - "metadata": {}, - "outputs": [], - "source": [ - "test_imgs, _ = get_data(Test_Dir) # prepare test dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7cc6bed7", - "metadata": {}, - "outputs": [], - "source": [ - "fig = plt.figure(figsize=(10, 10))\n", - "for i in range(9):\n", - " ax = fig.add_subplot(3, 3, i + 1)\n", - " in_for_net = (\n", - " mx.nd.array([test_imgs[i + 1]]).expand_dims(axis=1).as_in_context(mx.cpu())\n", - " )\n", - " pred = last_model(in_for_net)[0].asnumpy().reshape(-1, 2)\n", - " ax.imshow(test_imgs[i + 1], cmap=\"gray\")\n", - " x_cords = pred[:, 0]\n", - " y_cords = pred[:, 1]\n", - " plt.scatter(x_cords, y_cords, label='Predicted keypoints')\n", - "plt.legend(bbox_to_anchor=(2.1, 3.4), prop={'size': 12})\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ddc51e2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/mxnet_adapter.py b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/mxnet_adapter.py deleted file mode 100644 index 6f34aa86e4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/mxnet_adapter.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""MXNet Framework Adapter plugin.""" - -from pickle import dumps -from pickle import loads -from typing import Dict - -import mxnet as mx -import numpy as np -from mxnet import nd - -from openfl.plugins.frameworks_adapters.framework_adapter_interface import ( - FrameworkAdapterPluginInterface -) - - -class FrameworkAdapterPlugin(FrameworkAdapterPluginInterface): - """Framework adapter plugin class.""" - - def __init__(self) -> None: - """Initialize framework adapter.""" - - @staticmethod - def get_tensor_dict(model, optimizer=None) -> Dict[str, np.ndarray]: - """ - Extract tensor dict from a model and an optimizer. - - Returns: - dict {weight name: numpy ndarray} - """ - state = {} - if optimizer is not None: - state = _get_optimizer_state(optimizer) - - model_params = model.collect_params() - - for param_name, param_tensor in model_params.items(): - if isinstance(param_tensor.data(), mx.ndarray.ndarray.NDArray): - state[param_name] = param_tensor.list_data()[0].asnumpy() - - return state - - @staticmethod - def set_tensor_dict(model, tensor_dict: Dict[str, np.ndarray], - optimizer=None, device=None) -> None: - """ - Set tensor dict from a model and an optimizer. - - Given a dict {weight name: numpy ndarray} sets weights to - the model and optimizer objects inplace. - """ - if device is not None: - device = mx.cpu() if device.startswith('cpu') else ( - mx.gpu(int(device.split(':')[1].strip())) - ) - - if optimizer is not None: - _set_optimizer_state(optimizer, device, tensor_dict) - model.collect_params().reset_ctx(device) - - model_params = model.collect_params() - - for param_name in model_params: - model_params[param_name].set_data(nd.array(tensor_dict.pop(param_name), ctx=device)) - - -def _get_optimizer_state(optimizer): - """Return the optimizer state. - - Args: - optimizer - """ - states = loads(optimizer._updaters[0].get_states(dump_optimizer=False)) - result_states = {} - for state_key, state_tuple in states.items(): - for state_ind, state in enumerate(state_tuple): - result_states[f'opt_state__{state_key}__{state_ind}'] = state.asnumpy() - - return result_states - - -def _set_optimizer_state(optimizer, device, opt_state_dict): - """Set the optimizer state. - - Args: - optimizer: - device: - - """ - state_keys, max_numstates = set(), 0 - for key in opt_state_dict.keys(): - if not key.startswith('opt_state'): - continue - _, part1, part2 = key.split('__') - state_keys.add(int(part1)) - max_numstates = max(max_numstates, int(part2)) - - out_state = {} - for _ in range(len(state_keys)): - key = state_keys.pop() - state_vals = [] - for i in range(max_numstates + 1): - state_vals.append(nd.array(opt_state_dict.pop(f'opt_state__{key}__{i}'), ctx=device)) - out_state[key] = tuple(state_vals) - - optimizer._updaters[0].set_states(dumps(out_state)) diff --git a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/requirements.txt deleted file mode 100644 index ee4544d3f2..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/MXNet_landmarks/workspace/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -kaggle -matplotlib -numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/README.md deleted file mode 100644 index f5483f112a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/README.md +++ /dev/null @@ -1,84 +0,0 @@ -# Dogs vs. Cats tutorial based on [vit_pytorch](https://github.com/lucidrains/vit-pytorch) library - -***Note: Please pay attention that this task uses the dataset from Kaggle. To get the dataset you -will need a Kaggle account and accept "Dogs vs. Cats" competition rules.*** - -Visual Transformers are gaining popularity among the Data Science community, so this tutorial is -intended to examine Visual Transformer behavior in Federated Learning setup. - -## Installation of Kaggle API credentials - -**Before the start please make sure that you installed sd_requirements.txt on your virtual -environment on an envoy machine.** - -To use the [Kaggle API](https://github.com/Kaggle/kaggle-api), sign up for -a [Kaggle account](https://www.kaggle.com). Then go to the `'Account'` tab of your user -profile `(https://www.kaggle.com//account)` and select `'Create API Token'`. This will -trigger the download of `kaggle.json`, a file containing your API credentials. Place this file in -the location `cd ~/.kaggle/kaggle.json` - -**Note: you will need to accept competition rules -at** https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/rules - -For your security, ensure that other users of your computer do not have read access to your -credentials. On Unix-based systems you can do this with the following command: - -`chmod 600 ~/.kaggle/kaggle.json` - -If you need proxy add "proxy": `"http://" in kaggle.json`. It should looks like -that: `{"username":"your_username","key":"token", "proxy": "ip_addr:port"}` - -*Information about Kaggle API settings has been taken from kagge-api readme. For more information -visit:* https://github.com/Kaggle/kaggle-api - -*Useful link for a problem with proxy settings:* https://github.com/Kaggle/kaggle-api/issues/6 - -### Data - -All information about the dataset you may find -on https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/overview - -### Run experiment - -1. Create a folder for each `envoy`. -2. Put a relevant envoy_config in each of the n folders (n - number of envoys which you would like - to use, in this tutorial there is two of them, but you may use any number of envoys) and copy - other files from `envoy` folder there as well. -3. Modify each `envoy` accordingly: - - - At `start_envoy.sh` change env_one to env_two (or any unique `envoy` names you like) - - - Put a relevant envoy_config `envoy_config_one.yaml` or `envoy_config_two.yaml` (or any other - config file name consistent to the configuration file that is called in `start_envoy.sh`). -4. Make sure that you installed requirements for each `envoy` in your virtual - environment: `pip install -r sd_requirements.txt` -5. Run the `director`: execute `start_director.sh` in director folder -6. Run the `envoys`: execute `start_envoy.sh` in each envoy folder. If kaggle-API setting are - correct the download of the dataset will be started. If this is not the first `envoy` launch - then the dataset will be redownloaded only if some part of the data are missing. -7. Run the [PyTorch_DogsCats_ViT.ipynb](workspace/PyTorch_DogsCats_ViT.ipynb) notebook using - Jupyter lab in a prepared virtual environment. For more information about preparation virtual - environment look [** - Preparation virtual environment**](#preparation-virtual-environment) - . -8. Congratulations! You've started your federated learning of Visual Transformer with OpenFL. - -### Preparation virtual environment - -* Create virtual environment - -```sh - python3 -m venv venv -``` - -* To activate virtual environment - -```sh - source venv/bin/activate -``` - -* To deactivate virtual environment - -```sh - deactivate -``` \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/director/director_config.yaml deleted file mode 100644 index 607380c4c8..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['300', '300', '3'] - target_shape: ['1'] \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/director/start_director.sh deleted file mode 100644 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/dogs_cats_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/dogs_cats_shard_descriptor.py deleted file mode 100644 index b3cf212415..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/dogs_cats_shard_descriptor.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Cats and dogs shard descriptor.""" - -import json -import os -import shutil -from hashlib import md5 -from logging import getLogger -from pathlib import Path -from random import shuffle -from typing import Optional -from zipfile import ZipFile - -import numpy as np -from kaggle.api.kaggle_api_extended import KaggleApi -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = getLogger(__name__) - - -class DogsCatsShardDataset(ShardDataset): - """Dogs and cats Shard dataset class.""" - - def __init__(self, data_type: str, dataset_dir: Path, - rank: int = 1, worldsize: int = 1, enforce_image_hw=None): - """Initialize DogsCatsShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.enforce_image_hw = enforce_image_hw - self.img_path = self.dataset_dir / data_type - - self.img_names = [ - img.name - for img in sorted(self.img_path.iterdir()) - if img.suffix == '.jpg' - ] - - # Sharding - self.img_names = self.img_names[self.rank - 1::self.worldsize] - # Shuffling the results dataset after choose half pictures of each class - shuffle(self.img_names) - - def __getitem__(self, index): - """Return a item by the index.""" - name = self.img_names[index] - # Reading data - img = Image.open(self.img_path / name) - img_class = 1 if name[:3] == 'dog' else 0 - assert name[:3] in {'cat', 'dog'}, 'Wrong object classification' - - if self.enforce_image_hw is not None: - # If we need to resize data - # PIL accepts (w,h) tuple, not (h,w) - img = img.resize(self.enforce_image_hw[::-1]) - - img = np.asarray(img) - - assert img.shape[2] == 3 - - return img, np.asarray([img_class], dtype=np.uint8) - - def __len__(self): - """Return the len of the dataset.""" - return len(self.img_names) - - -class DogsCatsShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, data_folder: str = 'data', - rank_worldsize: str = '1,3', - enforce_image_hw: Optional[str] = None) -> None: - """Initialize DogsCatsShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = map(int, rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_dataset() - - # Settings for resizing data - self.enforce_image_hw = None - if enforce_image_hw is not None: - self.enforce_image_hw = tuple(map(int, enforce_image_hw.split(','))) - - # Calculating data and target shapes - ds = self.get_dataset() - sample, target = ds[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = [str(*target.shape)] - - assert self._target_shape[0] == '1', 'Target shape Error' - - def download_dataset(self): - """Download dataset from Kaggle.""" - if not os.path.exists(self.data_folder): - os.mkdir(self.data_folder) - - if not self.is_dataset_complete(): - logger.info('Your dataset is absent or damaged. Downloading ... ') - api = KaggleApi() - api.authenticate() - - if os.path.exists('data/train'): - shutil.rmtree('data/train') - - api.competition_download_file( - 'dogs-vs-cats-redux-kernels-edition', - 'train.zip', path=self.data_folder - ) - - with ZipFile(self.data_folder / 'train.zip', 'r') as zipobj: - zipobj.extractall(self.data_folder) - - os.remove(self.data_folder / 'train.zip') - - self.save_all_md5() - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - return DogsCatsShardDataset( - data_type=dataset_type, - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize, - enforce_image_hw=self.enforce_image_hw - ) - - def calc_all_md5(self): - """Calculate hash of all dataset.""" - md5_dict = {} - for root, _, files in os.walk(self.data_folder): - for file in files: - if file == 'dataset.json': - continue - md5_calc = md5(usedforsecurity=False) - rel_dir = os.path.relpath(root, self.data_folder) - rel_file = os.path.join(rel_dir, file) - - with open(self.data_folder / rel_file, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b''): - md5_calc.update(chunk) - md5_dict[rel_file] = md5_calc.hexdigest() - return md5_dict - - def save_all_md5(self): - """Save dataset hash.""" - all_md5 = self.calc_all_md5() - with open(os.path.join(self.data_folder, 'dataset.json'), 'w', encoding='utf-8') as f: - json.dump(all_md5, f) - - def is_dataset_complete(self): - """Check dataset integrity.""" - new_md5 = self.calc_all_md5() - try: - with open(os.path.join(self.data_folder, 'dataset.json'), 'r', encoding='utf-8') as f: - old_md5 = json.load(f) - except FileNotFoundError: - return False - - return new_md5 == old_md5 - - @property - def sample_shape(self): - """Return the sample shape info.""" - return self._sample_shape - - @property - def target_shape(self): - """Return the target shape info.""" - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Dogs and Cats dataset, shard number {self.rank} ' - f'out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/envoy_config_one.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/envoy_config_one.yaml deleted file mode 100644 index 0e997c2edd..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/envoy_config_one.yaml +++ /dev/null @@ -1,14 +0,0 @@ -params: - cuda_devices: [0] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: dogs_cats_shard_descriptor.DogsCatsShardDescriptor - params: - data_folder: data - rank_worldsize: 1,2 - enforce_image_hw: '300,300' \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/envoy_config_two.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/envoy_config_two.yaml deleted file mode 100644 index afe42953c2..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/envoy_config_two.yaml +++ /dev/null @@ -1,14 +0,0 @@ -params: - cuda_devices: [1] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: dogs_cats_shard_descriptor.DogsCatsShardDescriptor - params: - data_folder: data - rank_worldsize: 2,2 - enforce_image_hw: '300,300' \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/sd_requirements.txt deleted file mode 100644 index 0a122f85f0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/sd_requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -kaggle -numpy -pillow -pynvml diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/start_envoy.sh deleted file mode 100644 index 51dcf26501..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config_one.yaml -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/workspace/PyTorch_DogsCats_ViT.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/workspace/PyTorch_DogsCats_ViT.ipynb deleted file mode 100644 index 2928a00ee9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/workspace/PyTorch_DogsCats_ViT.ipynb +++ /dev/null @@ -1,558 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visual Transformer + OpenFL for Dogs & Cats classification" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import random\n", - "from copy import deepcopy\n", - "\n", - "from linformer import Linformer\n", - "\n", - "import numpy as np\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler\n", - "from torch.optim.lr_scheduler import StepLR\n", - "\n", - "from torchvision import transforms\n", - "\n", - "import tqdm\n", - "\n", - "from vit_pytorch.efficient import ViT" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50051\n", - "\n", - "# 1) Run with API layer - Director mTLS\n", - "# If the user wants to enable mTLS their must provide CA root chain,\n", - "# and signed key pair to the federation interface\n", - "# cert_chain = 'cert/root_ca.crt'\n", - "# API_certificate = 'cert/frontend.crt'\n", - "# API_private_key = 'cert/frontend.key'\n", - "\n", - "# federation = Federation(\n", - "# client_id=client_id,\n", - "# director_node_fqdn=director_node_fqdn,\n", - "# director_port=director_port,\n", - "# tls=True,\n", - "# cert_chain=cert_chain,\n", - "# api_cert=api_certificate,\n", - "# api_private_key=api_private_key\n", - "# )\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset\n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import DataInterface, FLExperiment, ModelInterface, TaskInterface" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Training settings\n", - "batch_size = 64\n", - "lr = 3e-5\n", - "seed = 42" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def seed_everything(seed):\n", - " random.seed(seed)\n", - " os.environ['PYTHONHASHSEED'] = str(seed)\n", - " np.random.seed(seed)\n", - " torch.manual_seed(seed)\n", - " torch.cuda.manual_seed(seed)\n", - " torch.cuda.manual_seed_all(seed)\n", - " torch.backends.cudnn.deterministic = True\n", - "\n", - "\n", - "seed_everything(seed)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class DogsCatsShardDataset(Dataset):\n", - " def __init__(self, dataset, transform_type=\"train\"):\n", - " self._dataset = dataset\n", - "\n", - " # Image Augumentation\n", - " if transform_type == \"train\":\n", - " self.transform = transforms.Compose(\n", - " [\n", - " transforms.ToPILImage(),\n", - " transforms.Resize((224, 224)),\n", - " transforms.RandomResizedCrop(224),\n", - " transforms.RandomHorizontalFlip(),\n", - " transforms.ToTensor(),\n", - " ]\n", - " )\n", - " elif transform_type == \"val\":\n", - " self.transform = transforms.Compose(\n", - " [\n", - " transforms.ToPILImage(),\n", - " transforms.Resize(256),\n", - " transforms.CenterCrop(224),\n", - " transforms.ToTensor(),\n", - " ]\n", - " )\n", - " elif transform_type == \"test\":\n", - " self.transform = transforms.Compose(\n", - " [\n", - " transforms.ToPILImage(),\n", - " transforms.Resize(256),\n", - " transforms.CenterCrop(224),\n", - " transforms.ToTensor(),\n", - " ]\n", - " )\n", - " else:\n", - " raise ValueError(\"Invalid transform type: {}\".format(transform_type))\n", - "\n", - " def __len__(self):\n", - " self.filelength = len(self._dataset)\n", - " return self.filelength\n", - "\n", - " def __getitem__(self, idx):\n", - " img, label = self._dataset[idx]\n", - " img_transformed = self.transform(img).numpy()\n", - " return img_transformed, label[0]\n", - "\n", - "\n", - "# Now you can implement your data loaders using dummy_shard_desc\n", - "class DogsCatsSD(DataInterface):\n", - "\n", - " def __init__(self, validation_fraction=1/5, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " self.validation_fraction = validation_fraction\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self._shard_dataset = DogsCatsShardDataset(shard_descriptor.get_dataset('train'))\n", - "\n", - " validation_size = max(1, int(len(self._shard_dataset) * self.validation_fraction))\n", - "\n", - " self.train_indexes = np.arange(len(self._shard_dataset) - validation_size)\n", - " self.val_indexes = np.arange(len(self._shard_dataset) - validation_size, len(self._shard_dataset))\n", - "\n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " train_sampler = SubsetRandomSampler(self.train_indexes)\n", - "\n", - " return DataLoader(\n", - " self._shard_dataset,\n", - " num_workers=8,\n", - " batch_size=self.kwargs['train_bs'],\n", - " sampler=train_sampler\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " val_sampler = SubsetRandomSampler(self.val_indexes)\n", - " return DataLoader(\n", - " self._shard_dataset,\n", - " num_workers=8,\n", - " batch_size=self.kwargs['valid_bs'],\n", - " sampler=val_sampler\n", - " )\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_indexes)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.val_indexes)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = DogsCatsSD(train_bs=batch_size, valid_bs=batch_size)\n", - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " print(sample.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Linformer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "efficient_transformer = Linformer(\n", - " dim=128,\n", - " seq_len=49 + 1, # 7x7 patches + 1 cls-token\n", - " depth=12,\n", - " heads=8,\n", - " k=64\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Visual Transformer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = ViT(\n", - " dim=128,\n", - " image_size=224,\n", - " patch_size=32,\n", - " num_classes=2,\n", - " transformer=efficient_transformer,\n", - " channels=3,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# loss function\n", - "criterion = nn.CrossEntropyLoss()\n", - "# optimizer\n", - "optimizer = optim.Adam(model.parameters(), lr=lr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.aggregation_functions import Median\n", - "\n", - "\n", - "TI = TaskInterface()\n", - "\n", - "\n", - "# The Interactive API supports registering functions definied in main module or imported.\n", - "def function_defined_in_notebook(some_parameter):\n", - " print(f'Also I accept a parameter and it is {some_parameter}')\n", - "\n", - "\n", - "# The Interactive API supports overriding of the aggregation function\n", - "aggregation_function = Median()\n", - "\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**{'some_parameter': 42})\n", - "@TI.register_fl_task(model='model', data_loader='train_loader',\n", - " device='device', optimizer='optimizer', round_num='round_num')\n", - "@TI.set_aggregation_function(aggregation_function)\n", - "def train(model, train_loader, optimizer, round_num, device, loss_fn=criterion, some_parameter=None):\n", - " function_defined_in_notebook(some_parameter)\n", - " epoch_loss = 0\n", - " epoch_accuracy = 0\n", - "\n", - " # Be careful at the scheduler initialization stage makes 'step()', that's why: \n", - " # * if you have one epoch per round DO NOT do 'scheduler.step()' at all.\n", - " # * if you have several epoch per round, makes 'scheduler.step()' for all of them EXCEPT the last one.\n", - " scheduler = StepLR(optimizer, step_size=1, gamma=0.1, verbose=True, last_epoch=round_num-1)\n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " model.train()\n", - " model.to(device)\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(target).to(device, dtype=torch.long)\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " loss = loss_fn(output, target)\n", - " loss.backward()\n", - " optimizer.step()\n", - "\n", - " acc = (output.argmax(dim=1) == target).float().mean()\n", - " epoch_accuracy += acc.cpu().numpy() / len(train_loader)\n", - " epoch_loss += loss.detach().cpu().numpy() / len(train_loader)\n", - "\n", - " return {'loss': epoch_loss, 'accuracy': epoch_accuracy}\n", - "\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device):\n", - "\n", - " model.eval()\n", - " model.to(device)\n", - "\n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - "\n", - " with torch.no_grad():\n", - " epoch_val_accuracy = 0\n", - " epoch_val_loss = 0\n", - " for data, target in val_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(target).to(device, dtype=torch.long)\n", - " val_output = model(data)\n", - " val_loss = criterion(val_output, target)\n", - "\n", - " acc = (val_output.argmax(dim=1) == target).float().mean()\n", - " epoch_val_accuracy += acc.cpu().numpy() / len(val_loader)\n", - " epoch_val_loss += val_loss.detach().cpu().numpy() / len(val_loader)\n", - "\n", - " return {'val_loss': epoch_val_loss, 'val_accuracy': epoch_val_accuracy}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'ViT_DogsCats_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI,\n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL',\n", - " device_assignment_policy='CUDA_PREFERRED')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(MI)\n", - "\n", - "fl_experiment.stream_metrics()" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "9967838c9b78b23db9544bb47605a6e8593c36ad0f41631a68de5734b7160f0f" - }, - "kernelspec": { - "display_name": "Python 3.8.9 64-bit ('openfl_Kvasir': venv)", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/workspace/requirements.txt deleted file mode 100644 index f4cef897ca..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_DogsCats_ViT/workspace/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -linformer==0.2.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -torch==2.3.1 -torchvision==0.18.1 -vit-pytorch==0.40.2 -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/README.md deleted file mode 100644 index ee57df63e9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/README.md +++ /dev/null @@ -1,71 +0,0 @@ -# MedMNIST 2D Classification Using FedProx Optimizer Tutorial - -![MedMNISTv2_overview](https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/assets/medmnistv2.jpg) - -For more details, please refer to the original paper: -**MedMNIST v2: A Large-Scale Lightweight Benchmark for 2D and 3D Biomedical Image Classification** ([arXiv](https://arxiv.org/abs/2110.14795)), and [PyPI](https://pypi.org/project/medmnist/). - -This example differs from PyTorch_MedMNIST_2D in that it uses the FedProx Optimizer. For more information on FedProx see: -**Federated Optimization in Heterogeneous Networks** ([arXiv](https://arxiv.org/abs/1812.06127)). - -## I. About model and experiments - -We use a simple convolutional neural network and settings coming from [the experiments](https://github.com/MedMNIST/experiments) repository. -
- -## II. How to run this tutorial (without TLC and locally as a simulation): -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_FedProx_MedMNIST - ``` -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` -
- -### 5. In the third terminal (or forth terminal, if you chose to do two envoys) run the Jupyter Notebook: - -```sh -cd workspace -jupyter lab Pytorch_FedProx_MedMNIST_2D.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the Pytorch_FedProx_MedMNIST_2D.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiments runs, and when the experiment is finished the director terminal will display a message that the experiment was finished successfully. diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/director/director_config.yaml deleted file mode 100644 index e51c9c8892..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['28', '28', '3'] - target_shape: ['1','1'] - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/envoy_config.yaml deleted file mode 100644 index 05ee5cec4c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/envoy_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: medmnist_shard_descriptor.MedMNISTShardDescriptor - params: - rank_worldsize: 1, 1 - datapath: data/. - dataname: bloodmnist diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/medmnist_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/medmnist_shard_descriptor.py deleted file mode 100644 index d5e639fed4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/medmnist_shard_descriptor.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""MedMNIST Shard Descriptor.""" - -import logging -import os -from typing import Any, List, Tuple -from medmnist.info import INFO, HOMEPAGE - -import numpy as np - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MedMNISTShardDataset(ShardDataset): - """MedMNIST Shard dataset class.""" - - def __init__(self, x, y, data_type: str = 'train', rank: int = 1, worldsize: int = 1) -> None: - """Initialize MedMNISTDataset.""" - self.data_type = data_type - self.rank = rank - self.worldsize = worldsize - self.x = x[self.rank - 1::self.worldsize] - self.y = y[self.rank - 1::self.worldsize] - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """Return an item by the index.""" - return self.x[index], self.y[index] - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.x) - - -class MedMNISTShardDescriptor(ShardDescriptor): - """MedMNIST Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - datapath: str = '', - dataname: str = 'bloodmnist', - **kwargs - ) -> None: - """Initialize MedMNISTShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.datapath = datapath - self.dataset_name = dataname - self.info = INFO[self.dataset_name] - - (x_train, y_train), (x_test, y_test) = self.load_data() - self.data_by_type = { - 'train': (x_train, y_train), - 'val': (x_test, y_test) - } - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train') -> MedMNISTShardDataset: - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return MedMNISTShardDataset( - *self.data_by_type[dataset_type], - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return ['28', '28', '3'] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return ['1', '1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'MedMNIST dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - @staticmethod - def download_data(datapath: str = 'data/', - dataname: str = 'bloodmnist', - info: dict = {}) -> None: - - logger.info(f"{datapath}\n{dataname}\n{info}") - try: - from torchvision.datasets.utils import download_url - download_url(url=info["url"], - root=datapath, - filename=dataname, - md5=info["MD5"]) - except Exception: - raise RuntimeError('Something went wrong when downloading! ' - + 'Go to the homepage to download manually. ' - + HOMEPAGE) - - def load_data(self) -> Tuple[Tuple[Any, Any], Tuple[Any, Any]]: - """Download prepared dataset.""" - - dataname = self.dataset_name + '.npz' - dataset = os.path.join(self.datapath, dataname) - - if not os.path.isfile(dataset): - logger.info(f"Dataset {dataname} not found at:{self.datapath}.\n\tDownloading...") - MedMNISTShardDescriptor.download_data(self.datapath, dataname, self.info) - logger.info("DONE!") - - data = np.load(dataset) - - x_train = data["train_images"] - x_test = data["test_images"] - - y_train = data["train_labels"] - y_test = data["test_labels"] - logger.info('MedMNIST data was loaded!') - return (x_train, y_train), (x_test, y_test) diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/requirements.txt deleted file mode 100644 index 363c0d69f9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -medmnist -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/start_envoy.sh deleted file mode 100755 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/workspace/Pytorch_FedProx_MedMNIST_2D.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/workspace/Pytorch_FedProx_MedMNIST_2D.ipynb deleted file mode 100644 index 3bae96ef48..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_FedProx_MNIST/workspace/Pytorch_FedProx_MedMNIST_2D.ipynb +++ /dev/null @@ -1,628 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated MedMNIST2D Using FedProx Aggregation Algorithm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5504ab79", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install medmnist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0570122", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "import tqdm\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import Dataset, DataLoader\n", - "from torchvision import transforms as T\n", - "import torch.nn.functional as F\n", - "\n", - "import medmnist\n", - "import openfl.utilities.optimizers.torch.fedprox as FP" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22ba64da", - "metadata": {}, - "outputs": [], - "source": [ - "from medmnist import INFO, Evaluator\n", - "\n", - "## Change dataflag here to reflect the ones defined in the envoy_conifg_xxx.yaml\n", - "dataname = 'bloodmnist'\n" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "director_node_fqdn = 'localhost'\n", - "director_port=50051\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "9b3081a6", - "metadata": {}, - "source": [ - "## Load MedMNIST INFO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e0377d3a", - "metadata": {}, - "outputs": [], - "source": [ - "num_epochs = 3\n", - "TRAIN_BS, VALID_BS = 64, 128\n", - "\n", - "lr = 0.001\n", - "gamma=0.1\n", - "milestones = [0.5 * num_epochs, 0.75 * num_epochs]\n", - "\n", - "info = INFO[dataname]\n", - "task = info['task']\n", - "n_channels = info['n_channels']\n", - "n_classes = len(info['label'])" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0dc457e", - "metadata": {}, - "outputs": [], - "source": [ - "## Data transformations\n", - "data_transform = T.Compose([T.ToTensor(), \n", - " T.Normalize(mean=[.5], std=[.5])]\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "09ba2f64", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image\n", - "\n", - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " \n", - " img, label = self.dataset[index]\n", - " \n", - " if self.target_transform:\n", - " label = self.target_transform(label) \n", - " else:\n", - " label = label.astype(int)\n", - " \n", - " if self.transform:\n", - " img = Image.fromarray(img)\n", - " img = self.transform(img)\n", - " else:\n", - " base_transform = T.PILToTensor()\n", - " img = Image.fromarray(img)\n", - " img = base_transform(img) \n", - "\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db2d563e", - "metadata": {}, - "outputs": [], - "source": [ - "class MedMnistFedDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - "\n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=data_transform\n", - " ) \n", - " \n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=data_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(\n", - " self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True)\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create Mnist federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MedMnistFedDataset(train_bs=TRAIN_BS, valid_bs=VALID_BS)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f63908e", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " print(sample.shape, target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "075d1d6c", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8477a001", - "metadata": {}, - "outputs": [], - "source": [ - "# define a simple CNN model\n", - "class Net(nn.Module):\n", - " def __init__(self, in_channels, num_classes):\n", - " super(Net, self).__init__()\n", - "\n", - " self.layer1 = nn.Sequential(\n", - " nn.Conv2d(in_channels, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU())\n", - "\n", - " self.layer2 = nn.Sequential(\n", - " nn.Conv2d(16, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.layer3 = nn.Sequential(\n", - " nn.Conv2d(16, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - " \n", - " self.layer4 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - "\n", - " self.layer5 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3, padding=1),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.fc = nn.Sequential(\n", - " nn.Linear(64 * 4 * 4, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, num_classes))\n", - "\n", - " def forward(self, x):\n", - " x = self.layer1(x)\n", - " x = self.layer2(x)\n", - " x = self.layer3(x)\n", - " x = self.layer4(x)\n", - " x = self.layer5(x)\n", - " x = x.view(x.size(0), -1)\n", - " x = self.fc(x)\n", - " return x\n", - "\n", - "model = Net(in_channels=n_channels, num_classes=n_classes)\n", - " \n", - "# define loss function and optimizer\n", - "if task == \"multi-label, binary-class\":\n", - " criterion = nn.BCEWithLogitsLoss()\n", - "else:\n", - " criterion = nn.CrossEntropyLoss()\n", - " \n", - "optimizer = FP.FedProxOptimizer(params = model.parameters(), lr=lr, momentum=0.9)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2154486", - "metadata": {}, - "outputs": [], - "source": [ - "print(model)" - ] - }, - { - "cell_type": "markdown", - "id": "8d1c78ee", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59831bcd", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ff463bd", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "train_custom_params={'criterion':criterion,'task':task}\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**train_custom_params)\n", - "@TI.register_fl_task(model='model', data_loader='train_loader',\n", - " device='device', optimizer='optimizer')\n", - "def train(model, train_loader, device, optimizer, criterion, task):\n", - " total_loss = []\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " model.train()\n", - " model.to(device)\n", - " \n", - " for inputs, targets in train_loader:\n", - " \n", - " optimizer.set_old_weights(list(model.parameters()))\n", - " optimizer.zero_grad()\n", - " \n", - " outputs = model(inputs.to(device))\n", - " \n", - " if task == 'multi-label, binary-class':\n", - " targets = targets.to(torch.float32).to(device)\n", - " loss = criterion(outputs, targets)\n", - " else:\n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " \n", - " total_loss.append(loss.item())\n", - " \n", - " loss.backward()\n", - " optimizer.step()\n", - "\n", - " return {'train_loss': np.mean(total_loss),}\n", - "\n", - "\n", - "val_custom_params={'criterion':criterion, \n", - " 'task':task}\n", - "\n", - "@TI.add_kwargs(**val_custom_params)\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device, criterion, task):\n", - "\n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " model.eval()\n", - " model.to(device)\n", - "\n", - " val_score = 0\n", - " total_samples = 0\n", - " total_loss = []\n", - " y_score = torch.tensor([]).to(device)\n", - "\n", - " with torch.no_grad():\n", - " for inputs, targets in val_loader:\n", - " outputs = model(inputs.to(device))\n", - " \n", - " if task == 'multi-label, binary-class':\n", - " targets = targets.to(torch.float32).to(device)\n", - " loss = criterion(outputs, targets)\n", - " m = nn.Sigmoid()\n", - " outputs = m(outputs).to(device)\n", - " else:\n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " m = nn.Softmax(dim=1)\n", - " outputs = m(outputs).to(device)\n", - " targets = targets.float().resize_(len(targets), 1)\n", - "\n", - " total_loss.append(loss.item())\n", - " \n", - " total_samples += targets.shape[0]\n", - " pred = outputs.argmax(dim=1)\n", - " val_score += pred.eq(targets).sum().cpu().numpy()\n", - " \n", - " acc = val_score / total_samples \n", - " test_loss = sum(total_loss) / len(total_loss)\n", - "\n", - " return {'acc': acc,\n", - " 'test_loss': test_loss,\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'medmnist_exp'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=3,\n", - " opt_treatment='RESET',\n", - " device_assignment_policy='CUDA_PREFERRED')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fa7cea", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92940763", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1690ea49", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10d7d5a2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "vscode": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/README.md deleted file mode 100644 index 54ff74b76b..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# PyTorch_Histology - -## **How to run this tutorial (without TLC and locally as a simulation):** -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_Histology - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab pytorch_histology.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the pytorch_histology.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/director_config.yaml deleted file mode 100644 index 14f38682af..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['150', '150'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/director_config_review_exp.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/director_config_review_exp.yaml deleted file mode 100644 index 4bbfccc6af..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/director_config_review_exp.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['150', '150'] - target_shape: ['1'] - review_experiment: True diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/.gitignore b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/.gitignore deleted file mode 100644 index ad818ec17f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/.gitignore +++ /dev/null @@ -1 +0,0 @@ -histology_data \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/envoy_config.yaml deleted file mode 100644 index b51c0dbe43..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/envoy_config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: histology_shard_descriptor.HistologyShardDescriptor - params: - data_folder: histology_data - rank_worldsize: 1,2 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/envoy_config_review_exp.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/envoy_config_review_exp.yaml deleted file mode 100644 index d8341b28b0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/envoy_config_review_exp.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - cuda_devices: [] - review_experiment: True - -optional_plugin_components: {} - -shard_descriptor: - template: histology_shard_descriptor.HistologyShardDescriptor - params: - data_folder: histology_data - rank_worldsize: 1,2 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/histology_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/histology_shard_descriptor.py deleted file mode 100644 index 7baed8c663..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/histology_shard_descriptor.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Histology Shard Descriptor.""" - -import logging -import os -from pathlib import Path -from typing import Tuple -from urllib.request import urlretrieve -from zipfile import ZipFile - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import tqdm_report_hook -from openfl.utilities import validate_file_hash - - -logger = logging.getLogger(__name__) - - -class HistologyShardDataset(ShardDataset): - """Histology shard dataset class.""" - - TRAIN_SPLIT_RATIO = 0.8 - - def __init__(self, data_folder: Path, data_type='train', rank=1, worldsize=1): - """Histology shard dataset class.""" - self.data_type = data_type - root = Path(data_folder) / 'Kather_texture_2016_image_tiles_5000' - classes = [d.name for d in root.iterdir() if d.is_dir()] - class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} - self.samples = [] - root = root.expanduser() - for target_class in sorted(class_to_idx.keys()): - class_index = class_to_idx[target_class] - target_dir = os.path.join(root, target_class) - for class_root, _, fnames in sorted(os.walk(target_dir, followlinks=True)): - for fname in sorted(fnames): - path = os.path.join(class_root, fname) - item = path, class_index - self.samples.append(item) - - idx_range = list(range(len(self.samples))) - idx_sep = int(len(idx_range) * HistologyShardDataset.TRAIN_SPLIT_RATIO) - train_idx, test_idx = np.split(idx_range, [idx_sep]) - if data_type == 'train': - self.idx = train_idx[rank - 1::worldsize] - else: - self.idx = test_idx[rank - 1::worldsize] - - def __len__(self) -> int: - """Return the len of the shard dataset.""" - return len(self.idx) - - def load_pil(self, path): - """Load image.""" - with open(path, 'rb') as f: - img = Image.open(f) - return img.convert('RGB') - - def __getitem__(self, index: int) -> Tuple['Image', int]: - """Return an item by the index.""" - path, target = self.samples[self.idx[index]] - sample = self.load_pil(path) - return sample, target - - -class HistologyShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - URL = ('https://zenodo.org/record/53169/files/Kather_' - 'texture_2016_image_tiles_5000.zip?download=1') - FILENAME = 'Kather_texture_2016_image_tiles_5000.zip' - ZIP_SHA384 = ('7d86abe1d04e68b77c055820c2a4c582a1d25d2983e38ab724e' - 'ac75affce8b7cb2cbf5ba68848dcfd9d84005d87d6790') - DEFAULT_PATH = Path.home() / '.openfl' / 'data' - - def __init__( - self, - data_folder: Path = DEFAULT_PATH, - rank_worldsize: str = '1,1', - **kwargs - ): - """Initialize HistologyShardDescriptor.""" - self.data_folder = Path.cwd() / data_folder - self.download_data() - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - def download_data(self): - """Download prepared shard dataset.""" - os.makedirs(self.data_folder, exist_ok=True) - filepath = self.data_folder / HistologyShardDescriptor.FILENAME - if not filepath.exists(): - reporthook = tqdm_report_hook() - urlretrieve(HistologyShardDescriptor.URL, filepath, reporthook) # nosec - validate_file_hash(filepath, HistologyShardDescriptor.ZIP_SHA384) - with ZipFile(filepath, 'r') as f: - f.extractall(self.data_folder) - - def get_dataset(self, dataset_type): - """Return a shard dataset by type.""" - return HistologyShardDataset( - data_folder=self.data_folder, - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - shape = self.get_dataset('train')[0][0].size - return [str(dim) for dim in shape] - - @property - def target_shape(self): - """Return the target shape info.""" - target = self.get_dataset('train')[0][1] - shape = np.array([target]).shape - return [str(dim) for dim in shape] - - @property - def dataset_description(self) -> str: - """Return the shard dataset description.""" - return (f'Histology dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/requirements.txt deleted file mode 100644 index 069a960a3c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy==1.22.2 -Pillow==10.3.0 -tqdm==4.66.3 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/start_envoy.sh deleted file mode 100755 index 1dfda52241..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 06b2916a4f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh "$DIRECTOR_FQDN" -dp 50051 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/workspace/pytorch_histology.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/workspace/pytorch_histology.ipynb deleted file mode 100644 index cf0eb4655b..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology/workspace/pytorch_histology.ipynb +++ /dev/null @@ -1,531 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated PyTorch Histology Tutorial" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "895288d0", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install torchvision==0.8.2" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051',\n", - "# cert_chain=cert_chain, api_cert=api_certificate, api_private_key=api_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051', tls=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1abebd90", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "sample, target = dummy_shard_desc.get_dataset('train')[0]" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7dda1680", - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision\n", - "from torchvision import transforms as T\n", - "\n", - "normalize = T.Normalize(mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225])\n", - "\n", - "augmentation = T.RandomApply(\n", - " [T.RandomHorizontalFlip(),\n", - " T.RandomRotation(10),\n", - " T.RandomResizedCrop(64)], \n", - " p=.8\n", - ")\n", - "\n", - "training_transform = T.ToTensor()\n", - "\n", - "valid_transform = T.ToTensor()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0314d5bf", - "metadata": {}, - "outputs": [], - "source": [ - "from torch.utils.data import Dataset\n", - "\n", - "\n", - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " img, label = self.dataset[index]\n", - " label = self.target_transform(label) if self.target_transform else label\n", - " img = self.transform(img) if self.transform else img\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01369e3b", - "metadata": {}, - "outputs": [], - "source": [ - "class HistologyDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=training_transform\n", - " )\n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=valid_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(\n", - " self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a6cedef", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = HistologyDataset(train_bs=64, valid_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "74cac654", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4949e16d", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import glob\n", - "from torch.utils.data import Dataset, DataLoader\n", - "from PIL import Image\n", - "\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43e25fe3", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "MobileNetV2 model\n", - "\"\"\"\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self):\n", - " super(Net, self).__init__()\n", - " conv_kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1}\n", - " self.conv1 = nn.Conv2d(3, 16, **conv_kwargs)\n", - " self.conv2 = nn.Conv2d(16, 32, **conv_kwargs)\n", - " self.conv3 = nn.Conv2d(32, 64, **conv_kwargs)\n", - " self.conv4 = nn.Conv2d(64, 128, **conv_kwargs)\n", - " self.conv5 = nn.Conv2d(128 + 32, 256, **conv_kwargs)\n", - " self.conv6 = nn.Conv2d(256, 512, **conv_kwargs)\n", - " self.conv7 = nn.Conv2d(512 + 128 + 32, 256, **conv_kwargs)\n", - " self.conv8 = nn.Conv2d(256, 512, **conv_kwargs)\n", - " self.fc1 = nn.Linear(1184 * 9 * 9, 128)\n", - " self.fc2 = nn.Linear(128, 8)\n", - "\n", - "\n", - " def forward(self, x):\n", - " x = F.relu(self.conv1(x))\n", - " x = F.relu(self.conv2(x))\n", - " maxpool = F.max_pool2d(x, 2, 2)\n", - "\n", - " x = F.relu(self.conv3(maxpool))\n", - " x = F.relu(self.conv4(x))\n", - " concat = torch.cat([maxpool, x], dim=1)\n", - " maxpool = F.max_pool2d(concat, 2, 2)\n", - "\n", - " x = F.relu(self.conv5(maxpool))\n", - " x = F.relu(self.conv6(x))\n", - " concat = torch.cat([maxpool, x], dim=1)\n", - " maxpool = F.max_pool2d(concat, 2, 2)\n", - "\n", - " x = F.relu(self.conv7(maxpool))\n", - " x = F.relu(self.conv8(x))\n", - " concat = torch.cat([maxpool, x], dim=1)\n", - " maxpool = F.max_pool2d(concat, 2, 2)\n", - "\n", - " x = maxpool.flatten(start_dim=1)\n", - " x = F.dropout(self.fc1(x), p=0.5)\n", - " x = self.fc2(x)\n", - " return x\n", - "\n", - "model_net = Net()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d279b4fb", - "metadata": {}, - "outputs": [], - "source": [ - "optimizer_adam = optim.Adam(model_net.parameters(), lr=1e-4)" - ] - }, - { - "cell_type": "markdown", - "id": "f097cdc5", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a8cca8", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "model_interface = ModelInterface(model=model_net, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_net)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "task_interface = TaskInterface()\n", - "import torch\n", - "\n", - "import tqdm\n", - "\n", - "# The Interactive API supports registering functions definied in main module or imported.\n", - "def function_defined_in_notebook(some_parameter):\n", - " print(f'Also I accept a parameter and it is {some_parameter}')\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@task_interface.add_kwargs(**{'some_parameter': 42})\n", - "@task_interface.register_fl_task(model='net_model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(net_model, train_loader, optimizer, device, loss_fn=F.cross_entropy, some_parameter=None):\n", - " device = torch.device('cuda')\n", - " if not torch.cuda.is_available():\n", - " device = 'cpu'\n", - " \n", - " function_defined_in_notebook(some_parameter)\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " net_model.train()\n", - " net_model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device) \n", - " optimizer.zero_grad()\n", - " output = net_model(data)\n", - " loss = loss_fn(output, target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "\n", - "@task_interface.register_fl_task(model='net_model', data_loader='val_loader', device='device') \n", - "def validate(net_model, val_loader, device):\n", - " device = torch.device('cuda')\n", - " if not torch.cuda.is_available():\n", - " device = 'cpu'\n", - " net_model.eval()\n", - " net_model.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device)\n", - " output = net_model(data)\n", - " pred = output.argmax(dim=1)\n", - " val_score += pred.eq(target).sum().cpu().numpy()\n", - " \n", - " return {'acc': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'histology_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=model_interface, \n", - " task_keeper=task_interface,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3efc78f-57cd-42e0-9398-bdebd596fac5", - "metadata": {}, - "outputs": [], - "source": [ - "# Get status of the current experiment\n", - "fl_experiment.get_experiment_status()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acting-immunology", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "432afab5-c44b-440a-9d54-1611fb48cf03", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/README.md deleted file mode 100644 index 1f3058ed3f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# PyTorch tutorial for FedCurv Federated Learning method on Histology dataset - -To show results on non-iid data distribution, this tutorial contains shard descriptor with custom data splitter where data is split log-normally. Federation consists of 8 envoys. - -Your Python environment must have OpenFL installed. - -1. Run Director instance: -``` -cd director -bash start_director.sh -``` - -2. In a separate terminal, execute: -``` -cd envoy -bash populate_envoys.sh # This creates all envoys folders in current directory -bash start_envoys.sh # This launches all envoy instances -``` - -3. In a separate terminal, launch a Jupyter Lab: -``` -cd workspace -jupyter lab -``` - -4. Open your browser at corresponding port and open `pytorch_histology.ipynb` from Jupyter web interface. - -5. Execute all cells in order. diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/director_config.yaml deleted file mode 100644 index 14f38682af..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['150', '150'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/.gitignore b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/.gitignore deleted file mode 100644 index ad818ec17f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/.gitignore +++ /dev/null @@ -1 +0,0 @@ -histology_data \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/envoy_config.yaml deleted file mode 100644 index 24dee186f7..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/envoy_config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: histology_shard_descriptor.HistologyShardDescriptor - params: - data_folder: histology_data - rank_worldsize: 1,8 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/histology_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/histology_shard_descriptor.py deleted file mode 100644 index 7e73f0eba0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/histology_shard_descriptor.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Histology Shard Descriptor.""" - -import logging -import os -from pathlib import Path -from typing import Tuple -from urllib.request import urlretrieve -from zipfile import ZipFile - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import tqdm_report_hook -from openfl.utilities import validate_file_hash -from openfl.utilities.data_splitters.numpy import LogNormalNumPyDataSplitter - - -logger = logging.getLogger(__name__) - - -class HistologyShardDataset(ShardDataset): - """Histology shard dataset class.""" - - TRAIN_SPLIT_RATIO = 0.8 - - def __init__(self, data_folder: Path, data_type='train', rank=1, worldsize=1): - """Histology shard dataset class.""" - self.data_type = data_type - root = Path(data_folder) / 'Kather_texture_2016_image_tiles_5000' - classes = [d.name for d in root.iterdir() if d.is_dir()] - class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} - self.samples = [] - root = root.absolute() - for target_class in sorted(class_to_idx.keys()): - class_index = class_to_idx[target_class] - target_dir = root / target_class - for path in sorted(target_dir.glob('*')): - item = path, class_index - self.samples.append(item) - np.random.seed(0) - np.random.shuffle(self.samples) - idx_range = list(range(len(self.samples))) - idx_sep = int(len(idx_range) * HistologyShardDataset.TRAIN_SPLIT_RATIO) - train_idx, test_idx = np.split(idx_range, [idx_sep]) - data_splitter = LogNormalNumPyDataSplitter( - mu=0, - sigma=2, - num_classes=8, - classes_per_col=2, - min_samples_per_class=5) - if data_type == 'train': - labels = np.array(self.samples)[train_idx][:, 1].astype(int) - self.idx = data_splitter.split(labels, worldsize)[rank - 1] - else: - labels = np.array(self.samples)[test_idx][:, 1].astype(int) - self.idx = data_splitter.split(labels, worldsize)[rank - 1] - - def __len__(self) -> int: - """Return the len of the shard dataset.""" - return len(self.idx) - - def load_pil(self, path): - """Load image.""" - with open(path, 'rb') as f: - img = Image.open(f) - return img.convert('RGB') - - def __getitem__(self, index: int) -> Tuple['Image', int]: - """Return an item by the index.""" - path, target = self.samples[self.idx[index]] - sample = self.load_pil(path) - return sample, target - - -class HistologyShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - URL = ('https://zenodo.org/record/53169/files/Kather_' - 'texture_2016_image_tiles_5000.zip?download=1') - FILENAME = 'Kather_texture_2016_image_tiles_5000.zip' - ZIP_SHA384 = ('7d86abe1d04e68b77c055820c2a4c582a1d25d2983e38ab724e' - 'ac75affce8b7cb2cbf5ba68848dcfd9d84005d87d6790') - DEFAULT_PATH = Path('.') / 'data' - - def __init__( - self, - data_folder: Path = DEFAULT_PATH, - rank_worldsize: str = '1,1', - **kwargs - ): - """Initialize HistologyShardDescriptor.""" - self.data_folder = Path.cwd() / data_folder - self.download_data() - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - def download_data(self): - """Download prepared shard dataset.""" - os.makedirs(self.data_folder, exist_ok=True) - filepath = self.data_folder / HistologyShardDescriptor.FILENAME - if not filepath.exists(): - reporthook = tqdm_report_hook() - urlretrieve(HistologyShardDescriptor.URL, filepath, reporthook) # nosec - validate_file_hash(filepath, HistologyShardDescriptor.ZIP_SHA384) - with ZipFile(filepath, 'r') as f: - f.extractall(self.data_folder) - - def get_dataset(self, dataset_type): - """Return a shard dataset by type.""" - return HistologyShardDataset( - data_folder=self.data_folder, - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - shape = self.get_dataset('train')[0][0].size - return [str(dim) for dim in shape] - - @property - def target_shape(self): - """Return the target shape info.""" - target = self.get_dataset('train')[0][1] - shape = np.array([target]).shape - return [str(dim) for dim in shape] - - @property - def dataset_description(self) -> str: - """Return the shard dataset description.""" - return (f'Histology dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/populate_envoys.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/populate_envoys.sh deleted file mode 100644 index f3e46196cf..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/populate_envoys.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -DIRECTOR_HOST=${1:-'localhost'} -DIRECTOR_PORT=${2:-'50051'} -PYTHON=${3:-'python3.8'} - -for i in {1..8} -do - mkdir $i - cd $i - echo "shard_descriptor: - template: histology_shard_descriptor.HistologyShardDescriptor - params: - data_folder: histology_data - rank_worldsize: $i,8 -" > envoy_config.yaml - - eval ${PYTHON} '-m venv venv' - echo "source venv/bin/activate - pip install ../../../../.. # install OpenFL - pip install -r requirements.txt - fx envoy start -n env_$i --disable-tls --envoy-config-path envoy_config.yaml -dh ${DIRECTOR_HOST} -dp ${DIRECTOR_PORT} - " > start_envoy.sh - cp ../requirements.txt . - cp ../histology_shard_descriptor.py . - cd .. -done diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/requirements.txt deleted file mode 100644 index 069a960a3c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy==1.22.2 -Pillow==10.3.0 -tqdm==4.66.3 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoy.sh deleted file mode 100755 index 1dfda52241..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 06b2916a4f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh "$DIRECTOR_FQDN" -dp 50051 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoys.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoys.sh deleted file mode 100644 index db526717f7..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/envoy/start_envoys.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -e - -cd 1 && bash start_envoy.sh & -cd 2 && bash start_envoy.sh & -cd 3 && bash start_envoy.sh & -cd 4 && bash start_envoy.sh & -cd 5 && bash start_envoy.sh & -cd 6 && bash start_envoy.sh & -cd 7 && bash start_envoy.sh & -cd 8 && bash start_envoy.sh diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/workspace/.gitignore b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/workspace/.gitignore deleted file mode 100644 index 98d8a5a630..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/workspace/.gitignore +++ /dev/null @@ -1 +0,0 @@ -logs diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/workspace/pytorch_histology.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/workspace/pytorch_histology.ipynb deleted file mode 100644 index 4a014c9102..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Histology_FedCurv/workspace/pytorch_histology.ipynb +++ /dev/null @@ -1,521 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated PyTorch Histology Tutorial" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "895288d0", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install torchvision==0.10.0" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051',\n", - "# cert_chain=cert_chain, api_cert=api_certificate, api_private_key=api_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051', tls=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1abebd90", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "sample, target = dummy_shard_desc.get_dataset('train')[0]" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7dda1680", - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision\n", - "from torchvision import transforms as T\n", - "\n", - "normalize = T.Normalize(mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225])\n", - "\n", - "augmentation = T.RandomApply(\n", - " [T.RandomHorizontalFlip(),\n", - " T.RandomRotation(10),\n", - " T.RandomResizedCrop(64)], \n", - " p=.8\n", - ")\n", - "\n", - "training_transform = T.ToTensor()\n", - "\n", - "valid_transform = T.ToTensor()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0314d5bf", - "metadata": {}, - "outputs": [], - "source": [ - "from torch.utils.data import Dataset\n", - "\n", - "\n", - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " img, label = self.dataset[index]\n", - " label = self.target_transform(label) if self.target_transform else label\n", - " img = self.transform(img) if self.transform else img\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01369e3b", - "metadata": {}, - "outputs": [], - "source": [ - "class HistologyDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=training_transform\n", - " )\n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=valid_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(\n", - " self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a6cedef", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = HistologyDataset(train_bs=64, valid_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "74cac654", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4949e16d", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import glob\n", - "from torch.utils.data import Dataset, DataLoader\n", - "from PIL import Image\n", - "\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6158e87", - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(0)\n", - "torch.manual_seed(0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43e25fe3", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "MobileNetV2 model\n", - "\"\"\"\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self):\n", - " super(Net, self).__init__()\n", - " conv_kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1}\n", - " self.conv1 = nn.Conv2d(3, 16, **conv_kwargs)\n", - " self.conv2 = nn.Conv2d(16, 32, **conv_kwargs)\n", - " self.conv3 = nn.Conv2d(32, 64, **conv_kwargs)\n", - " self.conv4 = nn.Conv2d(64, 128, **conv_kwargs)\n", - " self.conv5 = nn.Conv2d(128 + 32, 256, **conv_kwargs)\n", - " self.conv6 = nn.Conv2d(256, 512, **conv_kwargs)\n", - " self.conv7 = nn.Conv2d(512 + 128 + 32, 256, **conv_kwargs)\n", - " self.conv8 = nn.Conv2d(256, 512, **conv_kwargs)\n", - " self.fc1 = nn.Linear(1184 * 9 * 9, 128)\n", - " self.fc2 = nn.Linear(128, 8)\n", - "\n", - "\n", - " def forward(self, x):\n", - " torch.manual_seed(0)\n", - " x = F.relu(self.conv1(x))\n", - " x = F.relu(self.conv2(x))\n", - " maxpool = F.max_pool2d(x, 2, 2)\n", - "\n", - " x = F.relu(self.conv3(maxpool))\n", - " x = F.relu(self.conv4(x))\n", - " concat = torch.cat([maxpool, x], dim=1)\n", - " maxpool = F.max_pool2d(concat, 2, 2)\n", - "\n", - " x = F.relu(self.conv5(maxpool))\n", - " x = F.relu(self.conv6(x))\n", - " concat = torch.cat([maxpool, x], dim=1)\n", - " maxpool = F.max_pool2d(concat, 2, 2)\n", - "\n", - " x = F.relu(self.conv7(maxpool))\n", - " x = F.relu(self.conv8(x))\n", - " concat = torch.cat([maxpool, x], dim=1)\n", - " maxpool = F.max_pool2d(concat, 2, 2)\n", - "\n", - " x = maxpool.flatten(start_dim=1)\n", - " x = F.dropout(self.fc1(x), p=0.5)\n", - " x = self.fc2(x)\n", - " return x\n", - "\n", - "model_net = Net()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79021778", - "metadata": {}, - "outputs": [], - "source": [ - "optimizer_adam = optim.Adam(model_net.parameters(), lr=1e-4)" - ] - }, - { - "cell_type": "markdown", - "id": "f097cdc5", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a8cca8", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "model_interface = ModelInterface(model=model_net, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_net)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "task_interface = TaskInterface()\n", - "import torch\n", - "\n", - "from openfl.utilities.fedcurv.torch import FedCurv\n", - "from openfl.interface.aggregation_functions import FedCurvWeightedAverage\n", - "import tqdm\n", - "\n", - "fedcurv = FedCurv(model_interface.provide_model(), importance=1e7)\n", - "\n", - "@task_interface.register_fl_task(model='net_model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer')\n", - "@task_interface.set_aggregation_function(FedCurvWeightedAverage())\n", - "def train(net_model, train_loader, optimizer, device, loss_fn=F.cross_entropy):\n", - " torch.manual_seed(0)\n", - " fedcurv.on_train_begin(net_model)\n", - " device = 'cpu'\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " net_model.train()\n", - " net_model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device) \n", - " optimizer.zero_grad()\n", - " output = net_model(data)\n", - " loss = loss_fn(output, target) + fedcurv.get_penalty(net_model)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " fedcurv.on_train_end(net_model, train_loader, device)\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "\n", - "@task_interface.register_fl_task(model='net_model', data_loader='val_loader', device='device') \n", - "def validate(net_model, val_loader, device):\n", - " device = torch.device('cpu')\n", - " net_model.eval()\n", - " net_model.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " output = net_model(data)\n", - " pred = output.argmax(dim=1)\n", - " val_score += pred.eq(target).sum().cpu().numpy()\n", - " \n", - " return {'acc': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = f'histology_test_experiment {fedcurv.importance=}'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=model_interface, \n", - " task_keeper=task_interface,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acting-immunology", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/README.md deleted file mode 100644 index ac1c94b95d..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/README.md +++ /dev/null @@ -1,63 +0,0 @@ -# Federated Hugging face :hugs: transformers tutorial for audio classification using PyTorch - -Transformers have been a driving point for breakthrough developments in the Audio and Speech processing domain. Recently, Hugging Face dropped the State-of-the-art Natural Language Processing library Transformers v4.30 and extended its reach to Speech Recognition by adding one of the leading Automatic Speech Recognition models by Facebook called the Wav2Vec2. - -### About model: Wav2Vec2 - -This tutorial uses [Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2#wav2vec2forsequenceclassification) model which is a speech model checkpoint from the [Model Hub](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads). The Wav2Vec2 model was proposed in [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) which shows that learning powerful representations from speech audio alone followed by fine-tuning on transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. We will fine-tune this pretrained speech model for Automatic Speech Recognition in this tutorial. - -### About dataset: Keyword spotting (KS) from SUPERB - -Keyword spotting subset from [SUPERB](https://huggingface.co/datasets/superb) dataset is used. Keyword Spotting (KS) detects preregistered keywords by classifying utterances into a predefined set of words. The dataset consists of ten classes of keywords, a class for silence, and an unknown class to include the false positive. The evaluation metric is accuracy (ACC). - -### Links - -* [Huggingface transformers on Github](https://github.com/huggingface/transformers) -* [Original Huggingface notebook audio classification example on Github](https://github.com/huggingface/notebooks/blob/master/examples/audio_classification.ipynb) - -### How to run this tutorial (without TLS and locally as a simulation): - -Using hugging face models requires you to setup a [cache directory](https://huggingface.co/transformers/v4.0.1/installation.html#caching-models) at every node where the experiment is run, like XDG_CACHE_HOME. - -In addition to this, the Trainer class in huggingface transformers is desgined to use all available GPUs on a node. Hence, to avoid [cuda runtime error](https://forums.developer.nvidia.com/t/cuda-peer-resources-error-when-running-on-more-than-8-k80s-aws-p2-16xlarge/45351/5) on nodes that have more than 8 GPUs, setting up CUDA_VISIBLE_DEVICES limits the number of GPUs participating in the experiment. - -Go to example [folder](./) - -```sh -export PYTORCH_HUGGINGFACE_TRANSFORMERS_SUPERB=/openfl-tutorials/interactive_api/PyTorch_Huggingface_transformers_SUPERB -``` - -1. Run director: - -```sh -cd $PYTORCH_HUGGINGFACE_TRANSFORMERS_SUPERB/director -bash start_director.sh -``` - -2. Run envoy: - -```sh -cd $PYTORCH_HUGGINGFACE_TRANSFORMERS_SUPERB/envoy -pip install -r sd_requirements.txt -export XDG_CACHE_HOME= -CUDA_VISIBLE_DEVICES= bash start_envoy.sh -``` - -Optional: start second envoy: - -- Copy `$PYTORCH_HUGGINGFACE_TRANSFORMERS_SUPERB/envoy` to another folder, change the config and envoy name in - start_envoy.sh and run from there: - -```sh -cd $PYTORCH_HUGGINGFACE_TRANSFORMERS_SUPERB/envoy_two -export XDG_CACHE_HOME= -CUDA_VISIBLE_DEVICES= bash start_envoy.sh -``` - -3. Run `PyTorch_Huggingface_transformers_SUPERB.ipynb` jupyter notebook: - -```sh -cd $PYTORCH_HUGGINGFACE_TRANSFORMERS_SUPERB/workspace -export XDG_CACHE_HOME= -CUDA_VISIBLE_DEVICES= jupyter lab PyTorch_Huggingface_transformers_SUPERB.ipynb -``` diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/director/director_config.yaml deleted file mode 100644 index 95a214ac0c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['1'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/envoy_config.yaml deleted file mode 100644 index f8d614108c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/envoy_config.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [0] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: superb_shard_descriptor.SuperbShardDescriptor - params: - rank_worldsize: 1, 1000 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/sd_requirements.txt deleted file mode 100644 index 244f7f5080..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/sd_requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -datasets==1.14 -librosa -numpy==1.22.0 -pynvml diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/start_envoy.sh deleted file mode 100755 index ae9b4c27a0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/superb_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/superb_shard_descriptor.py deleted file mode 100644 index 2b38bd85a3..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/envoy/superb_shard_descriptor.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Superb Shard Descriptor.""" - -from typing import Any -from typing import Dict -from typing import List -from typing import Tuple - -from datasets import load_dataset -from datasets import load_metric - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class SuperbShardDataset(ShardDataset): - """SUPERB Shard dataset class.""" - - def __init__(self, dataset, rank: int = 1, worldsize: int = 1) -> None: - """Initialize Superb shard Dataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset = dataset - self.x = self.dataset['audio'][self.rank - 1::self.worldsize] - self.y = self.dataset['label'][self.rank - 1::self.worldsize] - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """Return an item by the index.""" - return self.x[index]['array'], self.y[index] - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.x) - - -class SuperbShardDescriptor(ShardDescriptor): - """Superb Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - **kwargs - ) -> None: - """Initialize SuperbShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - print('rank and worldsize', self.rank, self.worldsize) - train_set, val_set, test_set = self.download_data() - self.data_by_type = { - 'train': train_set, - 'val': val_set, - 'test': test_set - } - - def get_shard_dataset_types(self) -> List[Dict[str, Any]]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type: str = 'train') -> SuperbShardDataset: - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return SuperbShardDataset( - self.data_by_type[dataset_type], - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return ['1'] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return ['1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Superb dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - def download_data(self) -> Tuple[Tuple[Dict, List], Tuple[Dict, List], Tuple[Dict, List]]: - """Download dataset.""" - dataset = load_dataset('superb', 'ks') - metric = load_metric('accuracy') # noqa - - # Train data - train_set = dataset['train'] - - # Validation data - val_set = dataset['validation'] - - # Test data - test_set = dataset['test'] - - print('Superb data was loaded!') - return train_set, val_set, test_set diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/workspace/PyTorch_Huggingface_transformers_SUPERB.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/workspace/PyTorch_Huggingface_transformers_SUPERB.ipynb deleted file mode 100644 index 3ab32ef11b..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Huggingface_transformers_SUPERB/workspace/PyTorch_Huggingface_transformers_SUPERB.ipynb +++ /dev/null @@ -1,483 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "liquid-jacket", - "metadata": {}, - "source": [ - "# Federated Audio Classification tutorial with 🤗 Transformers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alike-sharing", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install \"datasets==1.14\" \"transformers==4.11.3\" \"librosa\" \"torch\" \"ipywidgets\" \"numpy==1.21.5\"" - ] - }, - { - "cell_type": "markdown", - "id": "16986f22", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4485ac79", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "client_id = \"frontend\"\n", - "director_node_fqdn = \"localhost\"\n", - "director_port = 50050\n", - "\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e35802d5", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67ae50de", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rubber-address", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import (\n", - " DataInterface,\n", - " FLExperiment,\n", - " ModelInterface,\n", - " TaskInterface,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d9acb53", - "metadata": {}, - "outputs": [], - "source": [ - "import datasets\n", - "import numpy as np\n", - "import torch\n", - "from torch.utils.data import Dataset\n", - "from transformers import (\n", - " AutoFeatureExtractor,\n", - " AutoModelForAudioClassification,\n", - " Trainer,\n", - " TrainingArguments,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1eaecbb1", - "metadata": {}, - "outputs": [], - "source": [ - "model_checkpoint = \"facebook/wav2vec2-base\"\n", - "\n", - "labels = [\n", - " \"yes\",\n", - " \"no\",\n", - " \"up\",\n", - " \"down\",\n", - " \"left\",\n", - " \"right\",\n", - " \"on\",\n", - " \"off\",\n", - " \"stop\",\n", - " \"go\",\n", - " \"_silence_\",\n", - " \"_unknown_\",\n", - "]\n", - "\n", - "label2id, id2label = dict(), dict()\n", - "for i, label in enumerate(labels):\n", - " label2id[label] = str(i)\n", - " id2label[str(i)] = label" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "151fdff2", - "metadata": {}, - "outputs": [], - "source": [ - "feature_extractor = AutoFeatureExtractor.from_pretrained(model_checkpoint)\n", - "max_duration = 1.0\n", - "\n", - "\n", - "def preprocess_function(pre_processed_data):\n", - " audio_arrays = pre_processed_data\n", - " inputs = feature_extractor(\n", - " audio_arrays,\n", - " sampling_rate=feature_extractor.sampling_rate,\n", - " max_length=int(feature_extractor.sampling_rate * max_duration),\n", - " truncation=True,\n", - " )\n", - "\n", - " return inputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f37dcf", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "class SuperbShardDataset(Dataset):\n", - " def __init__(self, dataset):\n", - " self._dataset = dataset\n", - "\n", - " def __getitem__(self, index):\n", - " x, y = self._dataset[index]\n", - " x = preprocess_function(x)\n", - " return {\"input_values\": x[\"input_values\"][0], \"labels\": y}\n", - "\n", - " def __len__(self):\n", - " return len(self._dataset)\n", - "\n", - "\n", - "class SuperbFedDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures for sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self.train_set = SuperbShardDataset(\n", - " self._shard_descriptor.get_dataset(\"train\"),\n", - " )\n", - " self.valid_set = SuperbShardDataset(\n", - " self._shard_descriptor.get_dataset(\"val\"),\n", - " )\n", - " self.test_set = SuperbShardDataset(\n", - " self._shard_descriptor.get_dataset(\"test\"),\n", - " )\n", - "\n", - " def __getitem__(self, index):\n", - " return self.shard_descriptor[index]\n", - "\n", - " def __len__(self):\n", - " return len(self.shard_descriptor)\n", - "\n", - " def get_train_loader(self):\n", - " return self.train_set\n", - "\n", - " def get_valid_loader(self):\n", - " return self.valid_set\n", - "\n", - " def get_train_data_size(self):\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " return len(self.valid_set)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8df35f5", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = SuperbFedDataset()" - ] - }, - { - "cell_type": "markdown", - "id": "caring-distinction", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "foreign-gospel", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "Download the pretrained model and fine-tune it. For classification we use the AutoModelForAudioClassification class.\n", - "\"\"\"\n", - "\n", - "num_labels = len(id2label)\n", - "\n", - "model = AutoModelForAudioClassification.from_pretrained(\n", - " model_checkpoint,\n", - " num_labels=num_labels,\n", - " label2id=label2id,\n", - " id2label=id2label,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d5afa68-4bd3-43d8-a86d-d59b5cad94bd", - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import AdamW\n", - "\n", - "params_to_update = []\n", - "for param in model.parameters():\n", - " if param.requires_grad == True:\n", - " params_to_update.append(param)\n", - "\n", - "optimizer = AdamW(params_to_update, lr=3e-5)" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handled-teens", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = (\n", - " \"openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin\"\n", - ")\n", - "MI = ModelInterface(\n", - " model=model, optimizer=optimizer, framework_plugin=framework_adapter\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5d4ff313-a17f-4119-a4c7-afa898b0f304", - "metadata": {}, - "outputs": [], - "source": [ - "batch_size = 16\n", - "args = TrainingArguments(\n", - " \"finetuned_model\",\n", - " save_strategy=\"epoch\",\n", - " per_device_train_batch_size=batch_size,\n", - " per_device_eval_batch_size=batch_size,\n", - " num_train_epochs=1,\n", - " warmup_ratio=0.1,\n", - " logging_steps=10,\n", - " push_to_hub=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd011594-f16a-4569-ae4e-26977e94b8c2", - "metadata": {}, - "outputs": [], - "source": [ - "from datasets import load_metric\n", - "\n", - "metric = load_metric(\"accuracy\")\n", - "\n", - "\n", - "def compute_metrics(eval_pred):\n", - " \"\"\"Computes accuracy on a batch of predictions\"\"\"\n", - " predictions = np.argmax(eval_pred.predictions, axis=1)\n", - " return metric.compute(predictions=predictions, references=eval_pred.label_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "increasing-builder", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "import torch.nn as nn\n", - "import tqdm\n", - "\n", - "\n", - "@TI.register_fl_task(\n", - " model=\"model\", data_loader=\"train_loader\", device=\"device\", optimizer=\"optimizer\"\n", - ")\n", - "def train(model, train_loader, optimizer, device):\n", - "\n", - " print(f\"\\n\\n TASK TRAIN GOT DEVICE {device}\\n\\n\")\n", - "\n", - " trainer = Trainer(\n", - " model.to(device),\n", - " args,\n", - " train_dataset=train_loader,\n", - " tokenizer=feature_extractor,\n", - " optimizers=(optimizer, None),\n", - " compute_metrics=compute_metrics,\n", - " )\n", - " train_metrics = trainer.train()\n", - " return {\"train_loss\": train_metrics.metrics[\"train_loss\"]}\n", - "\n", - "\n", - "@TI.register_fl_task(model=\"model\", data_loader=\"val_loader\", device=\"device\")\n", - "def validate(model, val_loader, device):\n", - "\n", - " print(f\"\\n\\n TASK VALIDATE GOT DEVICE {device}\\n\\n\")\n", - "\n", - " trainer = Trainer(\n", - " model.to(device),\n", - " args,\n", - " eval_dataset=val_loader,\n", - " tokenizer=feature_extractor,\n", - " compute_metrics=compute_metrics,\n", - " )\n", - " eval_metrics = trainer.evaluate()\n", - " return {\"eval_accuracy\": eval_metrics[\"eval_accuracy\"]}" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mature-renewal", - "metadata": {}, - "outputs": [], - "source": [ - "experiment_name = \"HF_audio_test_experiment\"\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "lightweight-causing", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.start(\n", - " model_provider=MI,\n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=2,\n", - " opt_treatment=\"CONTINUE_GLOBAL\",\n", - " device_assignment_policy=\"CUDA_PREFERRED\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1543a36", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/README.md deleted file mode 100644 index 2b1b03bd25..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# PyTorch_Kvasir_UNet - -## **How to run this tutorial (without TLC and locally as a simulation):** -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_Kvasir_UNet - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r sd_requirements.txt -``` - - If you have GPUs: -```sh -./start_envoy.sh env_one envoy_config.yaml -``` - - For no GPUs, use: -```sh -./start_envoy.sh env_one envoy_config_no_gpu.yaml -``` - - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Repeat step 4 instructions above but change "env_one" name to "env_two" (or another name of your choice). - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab PyTorch_Kvasir_UNet.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the PyTorch_Kvasir_UNet.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/director_config.yaml deleted file mode 100644 index 860e0436f5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['300', '400', '3'] - target_shape: ['300', '400'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/envoy_config.yaml deleted file mode 100644 index aae095f4e7..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/envoy_config.yaml +++ /dev/null @@ -1,14 +0,0 @@ -params: - cuda_devices: [0,2] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: kvasir_shard_descriptor.KvasirShardDescriptor - params: - data_folder: kvasir_data - rank_worldsize: 1,10 - enforce_image_hw: '300,400' diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/envoy_config_no_gpu.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/envoy_config_no_gpu.yaml deleted file mode 100644 index 1c121e534a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/envoy_config_no_gpu.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: kvasir_shard_descriptor.KvasirShardDescriptor - params: - data_folder: kvasir_data - rank_worldsize: 2,10 - enforce_image_hw: '300,400' - \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor.py deleted file mode 100644 index 2a83543250..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Kvasir shard descriptor.""" - -import os -from pathlib import Path - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import validate_file_hash - - -class KvasirShardDataset(ShardDataset): - """Kvasir Shard dataset class.""" - - def __init__(self, dataset_dir: Path, rank=1, worldsize=1, enforce_image_hw=None): - """Initialize KvasirShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.enforce_image_hw = enforce_image_hw - self.images_path = self.dataset_dir / 'segmented-images' / 'images' - self.masks_path = self.dataset_dir / 'segmented-images' / 'masks' - - self.images_names = [ - img_name - for img_name in sorted(os.listdir(self.images_path)) - if len(img_name) > 3 and img_name[-3:] == 'jpg' - ] - # Sharding - self.images_names = self.images_names[self.rank - 1::self.worldsize] - - def __getitem__(self, index): - """Return a item by the index.""" - name = self.images_names[index] - # Reading data - img = Image.open(self.images_path / name) - mask = Image.open(self.masks_path / name) - if self.enforce_image_hw is not None: - # If we need to resize data - # PIL accepts (w,h) tuple, not (h,w) - img = img.resize(self.enforce_image_hw[::-1]) - mask = mask.resize(self.enforce_image_hw[::-1]) - img = np.asarray(img) - mask = np.asarray(mask) - assert img.shape[2] == 3 - - return img, mask[:, :, 0].astype(np.uint8) - - def __len__(self): - """Return the len of the dataset.""" - return len(self.images_names) - - -class KvasirShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, data_folder: str = 'kvasir_data', - rank_worldsize: str = '1,1', - enforce_image_hw: str = None) -> None: - """Initialize KvasirShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_data(self.data_folder) - - # Settings for resizing data - self.enforce_image_hw = None - if enforce_image_hw is not None: - self.enforce_image_hw = tuple(int(size) for size in enforce_image_hw.split(',')) - - # Calculating data and target shapes - ds = self.get_dataset() - sample, target = ds[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = [str(dim) for dim in target.shape] - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - return KvasirShardDataset( - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize, - enforce_image_hw=self.enforce_image_hw - ) - - @staticmethod - def download_data(data_folder): - """Download data.""" - zip_file_path = data_folder / 'kvasir.zip' - os.makedirs(data_folder, exist_ok=True) - os.system( - 'wget -nc' - " 'https://datasets.simula.no/downloads/" - "hyper-kvasir/hyper-kvasir-segmented-images.zip'" - f' -O {zip_file_path.relative_to(Path.cwd())}' - ) - zip_sha384 = ('66cd659d0e8afd8c83408174' - '1ade2b75dada8d4648b816f2533c8748b1658efa3d49e205415d4116faade2c5810e241e') - validate_file_hash(zip_file_path, zip_sha384) - os.system(f'unzip -n {zip_file_path.relative_to(Path.cwd())}' - f' -d {data_folder.relative_to(Path.cwd())}') - - @property - def sample_shape(self): - """Return the sample shape info.""" - return self._sample_shape - - @property - def target_shape(self): - """Return the target shape info.""" - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Kvasir dataset, shard number {self.rank} ' - f'out of {self.worldsize}') - - -if __name__ == '__main__': - from openfl.interface.cli import setup_logging - - setup_logging() - - data_folder = 'data' - rank_worldsize = '1,100' - enforce_image_hw = '529,622' - - kvasir_sd = KvasirShardDescriptor( - data_folder, - rank_worldsize=rank_worldsize, - enforce_image_hw=enforce_image_hw) - - print(kvasir_sd.dataset_description) - print(kvasir_sd.sample_shape, kvasir_sd.target_shape) - - from openfl.component.envoy.envoy import Envoy - - shard_name = 'one' - director_host = 'localhost' - director_port = 50051 - - keeper = Envoy( - shard_name=shard_name, - director_host=director_host, - director_port=director_port, - shard_descriptor=kvasir_sd, - tls=True, - root_certificate='./cert/root_ca.crt', - private_key='./cert/one.key', - certificate='./cert/one.crt', - ) - - keeper.start() diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py deleted file mode 100644 index 09bcde87c0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Kvasir shard descriptor.""" - - -import os -from pathlib import Path - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import validate_file_hash -from openfl.utilities.data_splitters import RandomNumPyDataSplitter - - -class KvasirShardDataset(ShardDataset): - """Kvasir Shard dataset class.""" - - def __init__(self, dataset_dir: Path, rank=1, worldsize=1, enforce_image_hw=None): - """Initialize KvasirShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.enforce_image_hw = enforce_image_hw - - self.images_path = self.dataset_dir / 'segmented-images' / 'images' - self.masks_path = self.dataset_dir / 'segmented-images' / 'masks' - - self.images_names = [ - img_name - for img_name in sorted(os.listdir(self.images_path)) - if len(img_name) > 3 and img_name[-3:] == 'jpg' - ] - # Sharding - data_splitter = RandomNumPyDataSplitter() - shard_idx = data_splitter.split(self.images_names, self.worldsize)[self.rank] - self.images_names = [self.images_names[i] for i in shard_idx] - - def __getitem__(self, index): - """Return a item by the index.""" - name = self.images_names[index] - # Reading data - img = Image.open(self.images_path / name) - mask = Image.open(self.masks_path / name) - if self.enforce_image_hw is not None: - # If we need to resize data - # PIL accepts (w,h) tuple, not (h,w) - img = img.resize(self.enforce_image_hw[::-1]) - mask = mask.resize(self.enforce_image_hw[::-1]) - img = np.asarray(img) - mask = np.asarray(mask) - assert img.shape[2] == 3 - - return img, mask[:, :, 0].astype(np.uint8) - - def __len__(self): - """Return the len of the dataset.""" - return len(self.images_names) - - -class KvasirShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, data_folder: str = 'kvasir_data', - rank_worldsize: str = '1,1', - enforce_image_hw: str = None) -> None: - """Initialize KvasirShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_data(self.data_folder) - - # Settings for resizing data - self.enforce_image_hw = None - if enforce_image_hw is not None: - self.enforce_image_hw = tuple(int(size) for size in enforce_image_hw.split(',')) - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - return KvasirShardDataset( - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize, - enforce_image_hw=self.enforce_image_hw - ) - - @staticmethod - def download_data(data_folder): - """Download data.""" - zip_file_path = data_folder / 'kvasir.zip' - os.makedirs(data_folder, exist_ok=True) - os.system('wget -nc' - " 'https://datasets.simula.no/downloads/hyper-kvasir/" - "hyper-kvasir-segmented-images.zip'" - f' -O {zip_file_path.relative_to(Path.cwd())}') - zip_sha384 = ('66cd659d0e8afd8c83408174' - '1ade2b75dada8d4648b816f2533c8748b1658efa3d49e205415d4116faade2c5810e241e') - validate_file_hash(zip_file_path, zip_sha384) - os.system(f'unzip -n {zip_file_path.relative_to(Path.cwd())}' - f' -d {data_folder.relative_to(Path.cwd())}') - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['300', '400', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['300', '400'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return f'Kvasir dataset, shard number {self.rank} out of {self.worldsize}' diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/sd_requirements.txt deleted file mode 100644 index 772cf9f531..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/sd_requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -pillow -pynvml diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/start_envoy.sh deleted file mode 100755 index ae9b4c27a0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 97e3f4d893..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh "$DIRECTOR_FQDN" -dp 50050 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/workspace/PyTorch_Kvasir_UNet.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/workspace/PyTorch_Kvasir_UNet.ipynb deleted file mode 100644 index ed583e3194..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/workspace/PyTorch_Kvasir_UNet.ipynb +++ /dev/null @@ -1,610 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "liquid-jacket", - "metadata": {}, - "source": [ - "# Federated Kvasir with Director example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alike-sharing", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "!pip install torchvision==0.8.1" - ] - }, - { - "cell_type": "markdown", - "id": "16986f22", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4485ac79", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50050\n", - "\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = 'cert/root_ca.crt'\n", - "# API_certificate = 'cert/frontend.crt'\n", - "# API_private_key = 'cert/frontend.key'\n", - "\n", - "# federation = Federation(\n", - "# client_id=client_id,\n", - "# director_node_fqdn=director_node_fqdn,\n", - "# director_port=director_port,\n", - "# tls=True,\n", - "# cert_chain=cert_chain,\n", - "# api_cert=api_certificate,\n", - "# api_private_key=api_private_key\n", - "# )\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e35802d5", - "metadata": {}, - "outputs": [], - "source": [ - "# import time\n", - "# while True:\n", - "# shard_registry = federation.get_shard_registry()\n", - "# print(shard_registry)\n", - "# time.sleep(5)\n", - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67ae50de", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "920216d3", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rubber-address", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "markdown", - "id": "unlike-texas", - "metadata": {}, - "source": [ - "We extract User dataset class implementation.\n", - "Is it convinient?\n", - "What if the dataset is not a class?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f37dcf", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import PIL\n", - "import numpy as np\n", - "from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler\n", - "from torchvision import transforms as tsf\n", - "\n", - "\n", - "class KvasirShardDataset(Dataset):\n", - " \n", - " def __init__(self, dataset):\n", - " self._dataset = dataset\n", - " \n", - " # Prepare transforms\n", - " self.img_trans = tsf.Compose([\n", - " tsf.ToPILImage(),\n", - " tsf.Resize((332, 332)),\n", - " tsf.ToTensor(),\n", - " tsf.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])\n", - " self.mask_trans = tsf.Compose([\n", - " tsf.ToPILImage(),\n", - " tsf.Resize((332, 332), interpolation=PIL.Image.NEAREST),\n", - " tsf.ToTensor()])\n", - " \n", - " def __getitem__(self, index):\n", - " img, mask = self._dataset[index]\n", - " img = self.img_trans(img).numpy()\n", - " mask = self.mask_trans(mask).numpy()\n", - " return img, mask\n", - " \n", - " def __len__(self):\n", - " return len(self._dataset)\n", - "\n", - " \n", - "\n", - "# Now you can implement you data loaders using dummy_shard_desc\n", - "class KvasirSD(DataInterface):\n", - "\n", - " def __init__(self, validation_fraction=1/8, **kwargs):\n", - " super().__init__(**kwargs)\n", - " \n", - " self.validation_fraction = validation_fraction\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self._shard_dataset = KvasirShardDataset(shard_descriptor.get_dataset('train'))\n", - " \n", - " validation_size = max(1, int(len(self._shard_dataset) * self.validation_fraction))\n", - " \n", - " self.train_indeces = np.arange(len(self._shard_dataset) - validation_size)\n", - " self.val_indeces = np.arange(len(self._shard_dataset) - validation_size, len(self._shard_dataset))\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " train_sampler = SubsetRandomSampler(self.train_indeces)\n", - " return DataLoader(\n", - " self._shard_dataset,\n", - " num_workers=8,\n", - " batch_size=self.kwargs['train_bs'],\n", - " sampler=train_sampler\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " val_sampler = SubsetRandomSampler(self.val_indeces)\n", - " return DataLoader(\n", - " self._shard_dataset,\n", - " num_workers=8,\n", - " batch_size=self.kwargs['valid_bs'],\n", - " sampler=val_sampler\n", - " )\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_indeces)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.val_indeces)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8df35f5", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = KvasirSD(train_bs=4, valid_bs=8)\n", - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " print(sample.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "caring-distinction", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "visible-victor", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "foreign-gospel", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "UNet model definition\n", - "\"\"\"\n", - "from layers import soft_dice_coef, soft_dice_loss, DoubleConv, Down, Up\n", - "\n", - "\n", - "class UNet(nn.Module):\n", - " def __init__(self, n_channels=3, n_classes=1):\n", - " super().__init__()\n", - " self.inc = DoubleConv(n_channels, 64)\n", - " self.down1 = Down(64, 128)\n", - " self.down2 = Down(128, 256)\n", - " self.down3 = Down(256, 512)\n", - " self.up1 = Up(512, 256)\n", - " self.up2 = Up(256, 128)\n", - " self.up3 = Up(128, 64)\n", - " self.outc = nn.Conv2d(64, n_classes, 1)\n", - "\n", - " def forward(self, x):\n", - " x1 = self.inc(x)\n", - " x2 = self.down1(x1)\n", - " x3 = self.down2(x2)\n", - " x4 = self.down3(x3)\n", - " x = self.up1(x4, x3)\n", - " x = self.up2(x, x2)\n", - " x = self.up3(x, x1)\n", - " x = self.outc(x)\n", - " x = torch.sigmoid(x)\n", - " return x\n", - " \n", - "model_unet = UNet()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "greater-activation", - "metadata": {}, - "outputs": [], - "source": [ - "optimizer_adam = optim.Adam(model_unet.parameters(), lr=1e-4)" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handled-teens", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model_unet, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_unet)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "increasing-builder", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "import torch\n", - "\n", - "import tqdm\n", - "from openfl.interface.aggregation_functions import Median\n", - "\n", - "# The Interactive API supports registering functions definied in main module or imported.\n", - "def function_defined_in_notebook(some_parameter):\n", - " print(f'Also I accept a parameter and it is {some_parameter}')\n", - "\n", - "#The Interactive API supports overriding of the aggregation function\n", - "aggregation_function = Median()\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**{'some_parameter': 42})\n", - "@TI.register_fl_task(model='unet_model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "@TI.set_aggregation_function(aggregation_function)\n", - "def train(unet_model, train_loader, optimizer, device, loss_fn=soft_dice_loss, some_parameter=None):\n", - " \n", - " \"\"\" \n", - " The following constructions, that may lead to resource race\n", - " is no longer needed:\n", - " \n", - " if not torch.cuda.is_available():\n", - " device = 'cpu'\n", - " else:\n", - " device = 'cuda'\n", - " \n", - " \"\"\"\n", - "\n", - " print(f'\\n\\n TASK TRAIN GOT DEVICE {device}\\n\\n')\n", - " \n", - " function_defined_in_notebook(some_parameter)\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " \n", - " unet_model.train()\n", - " unet_model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device, dtype=torch.float32)\n", - " optimizer.zero_grad()\n", - " output = unet_model(data)\n", - " loss = loss_fn(output=output, target=target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "\n", - "@TI.register_fl_task(model='unet_model', data_loader='val_loader', device='device') \n", - "def validate(unet_model, val_loader, device):\n", - " print(f'\\n\\n TASK VALIDATE GOT DEVICE {device}\\n\\n')\n", - " \n", - " unet_model.eval()\n", - " unet_model.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - "\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " output = unet_model(data)\n", - " val = soft_dice_coef(output, target)\n", - " val_score += val.sum().cpu().numpy()\n", - " \n", - " return {'dice_coef': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mature-renewal", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'kvasir_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "lightweight-causing", - "metadata": {}, - "outputs": [], - "source": [ - "# If I use autoreload I got a pickling error\n", - "\n", - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=2,\n", - " opt_treatment='CONTINUE_GLOBAL',\n", - " device_assignment_policy='CUDA_PREFERRED')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1543a36", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going \n", - "# fl_experiment.restore_experiment_state(MI)\n", - "\n", - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "8c30b301", - "metadata": {}, - "source": [ - "## Now we validate the best model!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55acff59", - "metadata": {}, - "outputs": [], - "source": [ - "best_model = fl_experiment.get_best_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9479fb7f", - "metadata": {}, - "outputs": [], - "source": [ - "# We remove exremove_experiment_datamove_experiment_datamove_experiment_datariment data from director\n", - "fl_experiment.remove_experiment_data()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "75c8aeab", - "metadata": {}, - "outputs": [], - "source": [ - "best_model.inc.conv[0].weight\n", - "# model_unet.inc.conv[0].weight" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2acb7e6", - "metadata": {}, - "outputs": [], - "source": [ - "# Validating initial model\n", - "validate(initial_model, fed_dataset.get_valid_loader(), 'cpu')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c12ca93f", - "metadata": {}, - "outputs": [], - "source": [ - "# Validating trained model\n", - "validate(best_model, fed_dataset.get_valid_loader(), 'cpu')" - ] - }, - { - "cell_type": "markdown", - "id": "1e6734f6", - "metadata": {}, - "source": [ - "## We can tune model further!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3940e75e", - "metadata": {}, - "outputs": [], - "source": [ - "MI = ModelInterface(model=best_model, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "fl_experiment.start(model_provider=MI, task_keeper=TI, data_loader=fed_dataset, rounds_to_train=4, \\\n", - " opt_treatment='CONTINUE_GLOBAL')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bd786d2", - "metadata": {}, - "outputs": [], - "source": [ - "best_model = fl_experiment.get_best_model()\n", - "# Validating trained model\n", - "validate(best_model, fed_dataset.get_valid_loader(), 'cpu')" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/workspace/layers.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/workspace/layers.py deleted file mode 100644 index 12d913c15e..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Kvasir_UNet/workspace/layers.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Layers for Unet model.""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def soft_dice_loss(output, target): - """Calculate loss.""" - num = target.size(0) - m1 = output.view(num, -1) - m2 = target.view(num, -1) - intersection = m1 * m2 - score = 2.0 * (intersection.sum(1) + 1) / (m1.sum(1) + m2.sum(1) + 1) - score = 1 - score.sum() / num - return score - - -def soft_dice_coef(output, target): - """Calculate soft DICE coefficient.""" - num = target.size(0) - m1 = output.view(num, -1) - m2 = target.view(num, -1) - intersection = m1 * m2 - score = 2.0 * (intersection.sum(1) + 1) / (m1.sum(1) + m2.sum(1) + 1) - return score.sum() - - -class DoubleConv(nn.Module): - """Pytorch double conv class.""" - - def __init__(self, in_ch, out_ch): - """Initialize layer.""" - super(DoubleConv, self).__init__() - self.in_ch = in_ch - self.out_ch = out_ch - self.conv = nn.Sequential( - nn.Conv2d(in_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - nn.Conv2d(out_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - """Do forward pass.""" - x = self.conv(x) - return x - - -class Down(nn.Module): - """Pytorch nn module subclass.""" - - def __init__(self, in_ch, out_ch): - """Initialize layer.""" - super(Down, self).__init__() - self.mpconv = nn.Sequential( - nn.MaxPool2d(2), - DoubleConv(in_ch, out_ch) - ) - - def forward(self, x): - """Do forward pass.""" - x = self.mpconv(x) - return x - - -class Up(nn.Module): - """Pytorch nn module subclass.""" - - def __init__(self, in_ch, out_ch, bilinear=False): - """Initialize layer.""" - super(Up, self).__init__() - self.in_ch = in_ch - self.out_ch = out_ch - if bilinear: - self.up = nn.Upsample( - scale_factor=2, - mode='bilinear', - align_corners=True - ) - else: - self.up = nn.ConvTranspose2d(in_ch, in_ch // 2, 2, stride=2) - self.conv = DoubleConv(in_ch, out_ch) - - def forward(self, x1, x2): - """Do forward pass.""" - x1 = self.up(x1) - diff_y = x2.size()[2] - x1.size()[2] - diff_x = x2.size()[3] - x1.size()[3] - - x1 = F.pad( - x1, - (diff_x // 2, diff_x - diff_x // 2, diff_y // 2, diff_y - diff_y // 2) - ) - - x = torch.cat([x2, x1], dim=1) - x = self.conv(x) - return x diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/README.md deleted file mode 100755 index f5c53d8095..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/README.md +++ /dev/null @@ -1,129 +0,0 @@ -# PyTorch Lightining tutorial for Generative Adverserial Network (GAN) Dataset - -## **I. About model: Generative Adverserial Networks (GANs)** - -[Generative Adverserial Networks](https://arxiv.org/abs/1406.2661) or GANs were introduced to the -machine learning community by Ian J. Goodfellow in 2014. The idea is to generate real-looking -samples or images that resemble the training data. A GAN has three primary components: a Generator -model for generating new data from random data (noise), a discriminator model for classifying -whether generated data is real or fake, and the adversarial network that pits them against each -other. The fundamental nature of these dual networks is to outplay each other until the generator -starts generating real looking samples that the discriminator fails to differentiate. - -
-
- -## **II. About framework: PyTorch Lightning** - -[Pytorch Lightning](https://www.pytorchlightning.ai/) is a framework built on top of PyTorch that -allows the models to be scaled without the boilerplate. - -
-
- -## **III. About dataset: MNIST** - -[MNIST](http://yann.lecun.com/exdb/mnist/) database is a database of handwritten digits that has a -training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set -available from NIST. The digits have been size-normalized and centered in a fixed-size image. - -
-
- -## **IV. Using multiple optimizers** - -The example uses two different optimizers: one for discriminator and one for generator. -The [plugin](workspace/plugin_for_multiple_optimizers.py) to support multiple optimizers with -OpenFL has been added. Note that in order to use PyTorch Lightning framework with a single -optimizer, this plugin is NOT required. - -
-
- -## **V. Training Generator and Discriminator models separately** - -Cuurently, the tutorial shows how to train both the generator and the discriminator models -parallely. Individual models can be trained as well. To train only the generator, the flag ' -train_gen_only' should be set to 1 and to train only the discriminator, 'train_disc_only' should be -set to 1. - -
-
- -## **VI. Links** - -* [Original GAN paper](https://arxiv.org/abs/1406.2661) -* [Original PyTorch Lightning code](https://pytorch-lightning.readthedocs.io/en/stable/notebooks/lightning_examples/basic-gan.html) - -
-
- -## **VII. How to run this tutorial (without TLS and locally as a simulation):** - -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_Lightning_MNIST_GAN - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r sd_requirements.txt -``` - - If you have GPUs: -```sh -./start_envoy.sh env_one envoy_config.yaml -``` - - For no GPUs, use: -```sh -./start_envoy.sh env_one envoy_config_no_gpu.yaml -``` - - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Repeat step 4 instructions above but change "env_one" name to "env_two" (or another name of your choice). - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab PyTorch_Lightning_GAN.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the PyTorch_Lightning_GAN.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/director/director_config.yaml deleted file mode 100644 index 91a33c3d2d..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['28','28'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/envoy_config.yaml deleted file mode 100644 index bcc1953180..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/envoy_config.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [0,2] - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: mnist_shard_descriptor.MnistShardDescriptor - params: - rank_worldsize: 1, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/envoy_config_no_gpu.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/envoy_config_no_gpu.yaml deleted file mode 100644 index 0923271823..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/envoy_config_no_gpu.yaml +++ /dev/null @@ -1,7 +0,0 @@ -params: - cuda_devices: [] - -shard_descriptor: - template: mnist_shard_descriptor.MnistShardDescriptor - params: - rank_worldsize: 1,2 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/mnist_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/mnist_shard_descriptor.py deleted file mode 100644 index bf298bb2c5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/mnist_shard_descriptor.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Mnist Shard Descriptor.""" - -import logging -from typing import Any -from typing import Dict -from typing import List -from typing import Tuple - -from torchvision import datasets - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MnistShardDataset(ShardDataset): - """Mnist Shard dataset class.""" - - def __init__(self, x, y, data_type, rank: int = 1, worldsize: int = 1) -> None: - """Initialize Mnist shard Dataset.""" - self.data_type = data_type - self.rank = rank - self.worldsize = worldsize - - self.x = x[self.rank - 1::self.worldsize] - self.y = y[self.rank - 1::self.worldsize] - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """Return an item by the index.""" - return self.x[index], self.y[index] - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.x) - - -class MnistShardDescriptor(ShardDescriptor): - """Mnist Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - **kwargs - ) -> None: - """Initialize MnistShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - (x_train, y_train), (x_val, y_val) = self.download_data() - self.data_by_type = { - 'train': (x_train, y_train), - 'val': (x_val, y_val) - } - - def get_shard_dataset_types(self) -> List[Dict[str, Any]]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type: str = 'train') -> MnistShardDataset: - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return MnistShardDataset( - *self.data_by_type[dataset_type], - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return ['28', '28'] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return ['1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Mnist dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - def download_data(self) -> Tuple[Tuple[Any, Any], Tuple[Any, Any]]: - """Download prepared dataset.""" - train_data, val_data = ( - datasets.MNIST('data', train=train, download=True) - for train in (True, False) - ) - x_train, y_train = train_data.train_data, train_data.train_labels - x_val, y_val = val_data.test_data, val_data.test_labels - - print('Mnist data was loaded!') - return (x_train, y_train), (x_val, y_val) diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/sd_requirements.txt deleted file mode 100644 index 278e0f55c1..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/sd_requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -numpy -pillow -pynvml==11.4.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -torch==2.3.1 -torchvision==0.18.1 -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/start_envoy.sh deleted file mode 100755 index ae9b4c27a0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/workspace/PyTorch_Lightning_GAN.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/workspace/PyTorch_Lightning_GAN.ipynb deleted file mode 100644 index 95dbeb75ee..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/workspace/PyTorch_Lightning_GAN.ipynb +++ /dev/null @@ -1,699 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "liquid-jacket", - "metadata": {}, - "source": [ - "# Federated GAN tutorial with PyTorch Lightning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alike-sharing", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install \"pytorch-lightning>=1.3\" \"torch==2.3.1\" \"torchvision==0.18.1\" \"torchmetrics>=0.3\" \"scikit-image\" \"matplotlib\"" - ] - }, - { - "cell_type": "markdown", - "id": "16986f22", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4485ac79", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "client_id = \"frontend\"\n", - "director_node_fqdn = \"localhost\"\n", - "director_port = 50050\n", - "\n", - "#Run with TLS disabled (trusted environment)\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e35802d5", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67ae50de", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rubber-address", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import (\n", - " DataInterface,\n", - " FLExperiment,\n", - " ModelInterface,\n", - " TaskInterface,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d9acb53", - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import os\n", - "import shutil\n", - "import PIL\n", - "from collections import OrderedDict\n", - "\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "import torchvision\n", - "import torchvision.transforms as transforms\n", - "from pytorch_lightning import LightningDataModule, LightningModule, Trainer\n", - "from torch.utils.data import DataLoader, Dataset, random_split\n", - "from torchvision.datasets import MNIST" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f37dcf", - "metadata": {}, - "outputs": [], - "source": [ - "mnist_transform = transforms.Compose(\n", - " [\n", - " transforms.ToPILImage(),\n", - " transforms.Resize((28, 28)),\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.1307,), (0.3081,)),\n", - " ]\n", - ")\n", - "\n", - "\n", - "class MnistShardDataset(Dataset):\n", - " def __init__(self, x, y, transform=None):\n", - " self.x, self.y = x, y\n", - " self.transform = transform\n", - "\n", - " def __getitem__(self, index):\n", - " x, y = self.x[index], self.y[index]\n", - " x = self.transform(x).numpy()\n", - " y = y.numpy()\n", - " return x, y\n", - "\n", - " def __len__(self):\n", - " return len(self.x)\n", - "\n", - "\n", - "class MnistFedDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self.train_set = MnistShardDataset(\n", - " self._shard_descriptor.get_dataset(\"train\")[:][0],\n", - " self._shard_descriptor.get_dataset(\"train\")[:][1],\n", - " transform=mnist_transform,\n", - " )\n", - " self.valid_set = MnistShardDataset(\n", - " self._shard_descriptor.get_dataset(\"val\")[:][0],\n", - " self._shard_descriptor.get_dataset(\"val\")[:][1],\n", - " transform=mnist_transform,\n", - " )\n", - "\n", - " def __getitem__(self, index):\n", - " return self.shard_descriptor[index]\n", - "\n", - " def __len__(self):\n", - " return len(self.shard_descriptor)\n", - "\n", - " def get_train_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " if self.kwargs[\"train_bs\"]:\n", - " batch_size = self.kwargs[\"train_bs\"]\n", - " else:\n", - " batch_size = 256\n", - " return DataLoader(self.train_set, batch_size=batch_size, num_workers=4)\n", - "\n", - " def get_valid_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " if self.kwargs[\"valid_bs\"]:\n", - " batch_size = self.kwargs[\"valid_bs\"]\n", - " else:\n", - " batch_size = 64\n", - " return DataLoader(self.valid_set, batch_size=batch_size)\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " train data size\n", - " \"\"\"\n", - "\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " val data size\n", - " \"\"\"\n", - " return len(self.valid_set)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8df35f5", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MnistFedDataset(train_bs=256, valid_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "caring-distinction", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "foreign-gospel", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "Generator and discriminator model definition\n", - "\"\"\"\n", - "\n", - "\n", - "class Generator(nn.Module):\n", - " def __init__(self, latent_dim, img_shape):\n", - " super().__init__()\n", - " self.img_shape = img_shape\n", - "\n", - " def block(in_feat, out_feat, normalize=True):\n", - " layers = [nn.Linear(in_feat, out_feat)]\n", - " if normalize:\n", - " layers.append(nn.BatchNorm1d(out_feat, 0.8))\n", - " layers.append(nn.LeakyReLU(0.2, inplace=True))\n", - " return layers\n", - "\n", - " self.model = nn.Sequential(\n", - " *block(latent_dim, 128, normalize=False),\n", - " *block(128, 256),\n", - " *block(256, 512),\n", - " *block(512, 1024),\n", - " nn.Linear(1024, int(np.prod(img_shape))),\n", - " nn.Tanh(),\n", - " )\n", - "\n", - " def forward(self, z):\n", - " z = z.float()\n", - " img = self.model(z)\n", - " img = img.view(img.size(0), *self.img_shape)\n", - " return img" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "981b810c", - "metadata": {}, - "outputs": [], - "source": [ - "class Discriminator(nn.Module):\n", - " def __init__(self, img_shape):\n", - " super().__init__()\n", - "\n", - " self.model = nn.Sequential(\n", - " nn.Linear(int(np.prod(img_shape)), 512),\n", - " nn.LeakyReLU(0.2, inplace=True),\n", - " nn.Linear(512, 256),\n", - " nn.LeakyReLU(0.2, inplace=True),\n", - " nn.Linear(256, 1),\n", - " nn.Sigmoid(),\n", - " )\n", - "\n", - " def forward(self, img):\n", - " img_flat = img.view(img.size(0), -1)\n", - " img_flat = img_flat.float()\n", - " validity = self.model(img_flat)\n", - "\n", - " return validity" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6cc92e98", - "metadata": {}, - "outputs": [], - "source": [ - "class GAN(LightningModule):\n", - " def __init__(\n", - " self,\n", - " channels,\n", - " width,\n", - " height,\n", - " train_disc_only,\n", - " train_gen_only,\n", - " latent_dim: int = 100,\n", - " lr: float = 0.0002,\n", - " b1: float = 0.5,\n", - " b2: float = 0.999,\n", - " batch_size: int = 256,\n", - " **kwargs\n", - " ):\n", - " super().__init__()\n", - " self.save_hyperparameters()\n", - "\n", - " data_shape = (channels, width, height)\n", - " self.generator = Generator(\n", - " latent_dim=self.hparams.latent_dim, img_shape=data_shape\n", - " )\n", - " self.discriminator = Discriminator(img_shape=data_shape)\n", - "\n", - " self.validation_z = torch.randn(8, self.hparams.latent_dim)\n", - " self.example_input_array = torch.zeros(2, self.hparams.latent_dim)\n", - " self.train_disc_only = train_disc_only\n", - " self.train_gen_only = train_gen_only\n", - "\n", - " def forward(self, z):\n", - " return self.generator(z)\n", - "\n", - " def adversarial_loss(self, y_hat, y):\n", - " return F.binary_cross_entropy(y_hat, y)\n", - "\n", - " def training_step(self, batch, batch_idx, optimizer_idx):\n", - " imgs, _ = batch\n", - "\n", - " # sample noise\n", - " z = torch.randn(imgs.shape[0], self.hparams.latent_dim)\n", - " z = z.type_as(imgs)\n", - "\n", - " if optimizer_idx == 0 and self.train_disc_only == 0:\n", - " return self.train_generator(imgs, z, display_images=0)\n", - "\n", - " elif optimizer_idx == 1 and self.train_gen_only == 0:\n", - " return self.train_discriminator(imgs, z)\n", - "\n", - " def train_generator(self, imgs, z, display_images=0):\n", - " self.generated_imgs = self(z)\n", - " sample_imgs = self.generated_imgs[:10]\n", - " sample_imgs = np.reshape(sample_imgs.detach().cpu().numpy(), (10, 28, 28, 1))\n", - " \n", - " if display_images:\n", - " from skimage import data, io\n", - " from matplotlib import pyplot as plt\n", - " for img in sample_imgs:\n", - " io.imshow(img.reshape((28, 28)), cmap='gray_r')\n", - " plt.axis('off')\n", - " plt.show()\n", - "\n", - " valid = torch.ones(imgs.size(0), 1)\n", - " valid = valid.type_as(imgs).float()\n", - "\n", - " g_loss = self.adversarial_loss(self.discriminator(self(z)), valid)\n", - " tqdm_dict = {\"g_loss\": g_loss}\n", - " output = OrderedDict(\n", - " {\"loss\": g_loss, \"progress_bar\": tqdm_dict, \"log\": tqdm_dict}\n", - " )\n", - " self.log(name=\"Generator training loss\", value=g_loss, on_epoch=True)\n", - " return output\n", - "\n", - " def train_discriminator(self, imgs, z):\n", - " valid = torch.ones(imgs.size(0), 1)\n", - " valid = valid.type_as(imgs).float()\n", - "\n", - " real_loss = self.adversarial_loss(self.discriminator(imgs), valid)\n", - "\n", - " fake = torch.zeros(imgs.size(0), 1)\n", - " fake = fake.type_as(imgs).float()\n", - "\n", - " fake_loss = self.adversarial_loss(self.discriminator(self(z).detach()), fake)\n", - "\n", - " d_loss = (real_loss + fake_loss) / 2\n", - " tqdm_dict = {\"d_loss\": d_loss}\n", - " output = OrderedDict(\n", - " {\"loss\": d_loss, \"progress_bar\": tqdm_dict, \"log\": tqdm_dict}\n", - " )\n", - " self.log(name=\"Discriminator training loss\", value=d_loss, on_epoch=True)\n", - " return output\n", - "\n", - " def configure_optimizers(self):\n", - " lr = self.hparams.lr\n", - " b1 = self.hparams.b1\n", - " b2 = self.hparams.b2\n", - "\n", - " opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))\n", - " opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))\n", - "\n", - " return [opt_g, opt_d]\n", - "\n", - " def validation_step(self, batch, batch_idx, optimizer_idx=1):\n", - " imgs, _ = batch\n", - "\n", - " valid = torch.ones(imgs.size(0), 1)\n", - " valid = valid.type_as(imgs).float()\n", - "\n", - " val_real_loss = self.adversarial_loss(self.discriminator(imgs), valid)\n", - " self.log(name=\"Discriminator val loss\", value=val_real_loss, on_epoch=True)\n", - " return {\"val_loss\": val_real_loss}\n", - "\n", - " def on_val_epoch_end(self):\n", - " z = self.validation_z.type_as(self.generator.model[0].weight)\n", - "\n", - " sample_imgs = self(z)\n", - " grid = torchvision.utils.make_grid(sample_imgs)\n", - " self.logger.experiment.add_image(\"generated_images\", grid, self.current_epoch)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5719fa50", - "metadata": {}, - "outputs": [], - "source": [ - "from pytorch_lightning.callbacks import Callback\n", - "\n", - "\n", - "class MetricsCallback(Callback):\n", - " \"\"\"PyTorch Lightning metric callback.\"\"\"\n", - "\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.metrics = []\n", - "\n", - " def on_val_epoch_end(self, trainer, pl_module):\n", - " met = copy.deepcopy(trainer.callback_metrics)\n", - " self.metrics.append(met)\n", - "\n", - " def __call__(self):\n", - " return self.get_callbacks()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "46692b08", - "metadata": {}, - "outputs": [], - "source": [ - "model = GAN(channels=1, width=28, height=28, train_disc_only=0, train_gen_only=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "greater-activation", - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = model.configure_optimizers()" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handled-teens", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "# Need this plugin only if multiple optimizers are used. Not required for PyTorch Lightning with a single optimizer.\n", - "framework_adapter = (\n", - " \"plugin_for_multiple_optimizers.FrameworkAdapterPluginforMultipleOpt\"\n", - ")\n", - "MI = ModelInterface(\n", - " model=model, optimizer=optimizer, framework_plugin=framework_adapter\n", - ")\n", - "\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "increasing-builder", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "import tqdm\n", - "\n", - "@TI.register_fl_task(\n", - " model=\"model\", data_loader=\"train_loader\", device=\"device\", optimizer=\"optimizer\"\n", - ")\n", - "def train(model, train_loader, optimizer, device, some_parameter=None):\n", - "\n", - " print(f\"\\n\\n TASK TRAIN GOT DEVICE {device}\\n\\n\")\n", - "\n", - " AVAIL_GPUS = 1 if \"cuda\" in device else 0\n", - "\n", - " trainer = Trainer(gpus=AVAIL_GPUS, max_epochs=1, callbacks=[MetricsCallback()])\n", - " trainer.fit(model=model, train_dataloaders=train_loader)\n", - " print(\"training logged metrics\", trainer.logged_metrics)\n", - "\n", - " if \"Discriminator training loss_epoch\" in trainer.logged_metrics:\n", - " train_loss = trainer.logged_metrics[\"Discriminator training loss_epoch\"]\n", - " else:\n", - " train_loss = trainer.logged_metrics[\"Generator training loss_epoch\"]\n", - " return {\"train_loss\": train_loss}\n", - "\n", - "\n", - "@TI.register_fl_task(model=\"model\", data_loader=\"val_loader\", device=\"device\")\n", - "def validate(model, val_loader, device):\n", - "\n", - " print(f\"\\n\\n TASK VALIDATE GOT DEVICE {device}\\n\\n\")\n", - "\n", - " model.eval()\n", - " model.to(device)\n", - "\n", - " AVAIL_GPUS = 1 if \"cuda\" in device else 0\n", - "\n", - " trainer = Trainer(gpus=AVAIL_GPUS, max_epochs=1, callbacks=[MetricsCallback()])\n", - "\n", - " trainer.validate(model=model, dataloaders=val_loader)\n", - " print(\"validation logged metrics\", trainer.logged_metrics)\n", - "\n", - " val_loss = trainer.logged_metrics[\"Discriminator val loss\"]\n", - "\n", - " return {\"val_loss\": val_loss}" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mature-renewal", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = \"PL_MNIST_test_experiment\"\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "lightweight-causing", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.start(\n", - " model_provider=MI,\n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=10,\n", - " opt_treatment=\"CONTINUE_GLOBAL\",\n", - " device_assignment_policy=\"CUDA_PREFERRED\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1543a36", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "f987d2c4", - "metadata": {}, - "source": [ - "## Check the images generated by the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fec7a708", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r ../envoy/sd_requirements.txt\n", - "import sys\n", - "\n", - "sys.path.insert(1, \"../envoy\")\n", - "from mnist_shard_descriptor import MnistShardDescriptor" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2bc77cd8", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MnistFedDataset(train_bs=256, valid_bs=64)\n", - "fed_dataset.shard_descriptor = MnistShardDescriptor(rank_worldsize=\"1,1\")\n", - "\n", - "last_model = fl_experiment.get_last_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f821972", - "metadata": {}, - "outputs": [], - "source": [ - "val_imgs, _ = next(iter(fed_dataset.get_valid_loader()))\n", - "\n", - "z = torch.randn(val_imgs.shape[0], 100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9549d1ab", - "metadata": {}, - "outputs": [], - "source": [ - "last_model.train_generator(val_imgs, z, display_images=1)\n", - "pass" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/workspace/plugin_for_multiple_optimizers.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/workspace/plugin_for_multiple_optimizers.py deleted file mode 100644 index d47c13b36e..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Lightning_MNIST_GAN/workspace/plugin_for_multiple_optimizers.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Pytorch Framework Adapter plugin for multiple optimizers.""" - - -from openfl.plugins.frameworks_adapters.pytorch_adapter import _get_optimizer_state -from openfl.plugins.frameworks_adapters.pytorch_adapter import FrameworkAdapterPlugin -from openfl.plugins.frameworks_adapters.pytorch_adapter import to_cpu_numpy - - -class FrameworkAdapterPluginforMultipleOpt(FrameworkAdapterPlugin): - """Framework adapter plugin class for multiple optimizers.""" - - def __init__(self): - """Initialize framework adapter.""" - super().__init__() - - @staticmethod - def get_tensor_dict(model, optimizers=None): - """ - Extract tensor dict from a model and a list of optimizers. - - Returns: - dict {weight name: numpy ndarray} - """ - state = to_cpu_numpy(model.state_dict()) - if optimizers is not None: - for opt in optimizers: - if isinstance(opt, dict): - opt_state = _get_optimizer_state(opt['optimizer']) - else: - opt_state = _get_optimizer_state(opt) - - state = {**state, **opt_state} - - return state diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/README.md deleted file mode 100644 index 84c75e5369..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# PyTorch based Linear Regression Tutorial - -### 1. About dataset - -Generate a random regression problem using `make_regression` from sklearn.datasets with pre-defined parameters. - -Define the below param in envoy.yaml config to shard the dataset across participants/envoy. -- rank_worldsize - - -### 2. About model - -Simple Regression Model based on PyTorch. - - -### 3. How to run this tutorial (without TLC and locally as a simulation): - -1. Run director: - -```sh -cd director_folder -./start_director.sh -``` - -2. Run envoy: - -Step 1: Activate virtual environment and install packages -``` -cd envoy_folder -pip install -r requirements.txt -``` -Step 2: start the envoy -```sh -./start_envoy.sh env_instance_1 envoy_config.yaml -``` - -Optional: start second envoy: - -- Copy `envoy_folder` to another place and follow the same process as above: - -```sh -./start_envoy.sh env_instance_2 envoy_config.yaml -``` - -3. Run `torch_linear_regression.ipynb` jupyter notebook: - -```sh -cd workspace -jupyter lab torch_linear_regression.ipynb -``` - -4. Visualization - -``` -tensorboard --logdir logs/ -``` diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/director/director_config.yaml deleted file mode 100644 index d22b4b7766..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['1'] # Modify this param if experimenting with `n_features` of shard_descriptor. - target_shape: ['1'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml deleted file mode 100644 index 8f35387ca8..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: regression_shard_descriptor.RegressionShardDescriptor - params: - rank_worldsize: 1, 2 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py deleted file mode 100644 index 467307f430..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (C) 2020-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Noisy-Sin Shard Descriptor.""" - -from typing import List - -import numpy as np -import torch -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class RegressionShardDescriptor(ShardDescriptor): - """Regression Shard descriptor class.""" - - def __init__(self, rank_worldsize: str = '1, 1', **kwargs) -> None: - """ - Initialize Regression Data Shard Descriptor. - - This Shard Descriptor generate random regression data with some gaussian centered noise - using make_regression method from sklearn.datasets. - Shards data across participants using rank and world size. - """ - - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - X_train, y_train, X_test, y_test = self.generate_data() - self.data_by_type = { - 'train': np.concatenate((X_train, y_train[:, None]), axis=1), - 'val': np.concatenate((X_test, y_test[:, None]), axis=1) - } - - def generate_data(self): - """Generate regression dataset with predefined params.""" - x, y = make_regression(n_samples=1000, n_features=1, noise=14, random_state=24) - X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=24) - self.data = np.concatenate((x, y[:, None]), axis=1) - return X_train, y_train, X_test, y_test - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Incorrect dataset type: {dataset_type}') - - if dataset_type in ['train', 'val']: - return torch.tensor( - self.data_by_type[dataset_type][self.rank - 1::self.worldsize], - dtype=torch.float32 - ) - else: - raise ValueError - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - (*x, _) = self.data[0] - return [str(i) for i in np.array(x, ndmin=1).shape] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - (*_, y) = self.data[0] - return [str(i) for i in np.array(y, ndmin=1).shape] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Regression dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt deleted file mode 100644 index acfef16953..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability -numpy>=1.13.3 -openfl>=1.2.1 -scikit-learn>=0.24.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -torch>=1.13.1 -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh deleted file mode 100755 index 4da07821af..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt deleted file mode 100644 index fbeabbcfd9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -jupyterlab -numpy>=1.13.3 -openfl>=1.2.1 -scikit-learn>=0.24.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -torch>=1.13.1 -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb deleted file mode 100644 index d7ad3e310f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb +++ /dev/null @@ -1,388 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Torch Regression Example - Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "NUM_FEATURES = 1\n", - "LEARNING_RATE = 0.5" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Torch Definitions" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class LRModel(nn.Module):\n", - "\n", - " def __init__(self, in_features: int, out_features: int) -> None:\n", - " super().__init__()\n", - " self.fc = torch.nn.Linear(in_features, out_features)\n", - " \n", - " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", - " return self.fc(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = LRModel(NUM_FEATURES, 1)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Loss function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "loss_fn = nn.MSELoss()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class LRDataset(DataInterface):\n", - " def __init__(self, train_bs: int = 1024, val_bs: int = 1024, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self._train_bs = train_bs\n", - " self._val_bs = val_bs\n", - " self._train_data = None\n", - " self._val_data = None\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self._train_data = self._shard_descriptor.get_dataset('train')\n", - " self._val_data = self._shard_descriptor.get_dataset('val')\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " if self._train_data is None:\n", - " raise ValueError(\"train data is not set\")\n", - " return torch.utils.data.DataLoader(self._train_data, batch_size=self._train_bs, shuffle=True)\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " if self._val_data is None:\n", - " raise ValueError(\"validation data is not set\")\n", - " return torch.utils.data.DataLoader(self._val_data, batch_size=self._val_bs)\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " if self._train_data is None:\n", - " raise ValueError(\"train data is not set\")\n", - " return len(self._train_data)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " if self._val_data is None:\n", - " raise ValueError(\"validation data is not set\")\n", - " return len(self._val_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fl_dataset = LRDataset()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "model_interface = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = copy.deepcopy(model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Register tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "task_interface = TaskInterface()\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@task_interface.add_kwargs(**{'loss_fn': loss_fn})\n", - "@task_interface.register_fl_task(model='model', data_loader='train_loader', device='device', optimizer='optimizer') \n", - "def train(model, train_loader, optimizer, device, loss_fn): \n", - " model.to(device)\n", - " model.train()\n", - "\n", - " losses = []\n", - " for data in train_loader:\n", - " data = data.to(device)\n", - " optimizer.zero_grad()\n", - " loss = loss_fn(model(data[:,:NUM_FEATURES]), data[:,NUM_FEATURES:])\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - "\n", - " return {'train_mse': np.mean(losses)}\n", - "\n", - "\n", - "@task_interface.add_kwargs(**{'loss_fn': loss_fn})\n", - "@task_interface.register_fl_task(model='model', data_loader='val_loader', device='device') \n", - "def validate(model, val_loader, device, loss_fn):\n", - " model.to(device)\n", - " model.eval()\n", - " \n", - " losses = []\n", - " with torch.no_grad():\n", - " for data in val_loader:\n", - " data = data.to(device)\n", - " loss = loss_fn(model(data[:,:NUM_FEATURES]), data[:,NUM_FEATURES:])\n", - " losses.append(loss.detach().cpu().numpy())\n", - "\n", - " return {'val_mse': np.mean(losses)}" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.federation import Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50050\n", - "\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'torch_linear_regression_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=model_interface, \n", - " task_keeper=task_interface,\n", - " data_loader=fl_dataset,\n", - " rounds_to_train=10\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "osh", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/README.md deleted file mode 100755 index 3e538db659..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# Anomaly Detection with PatchSVDD for MVTec Dataset - -![MVTec AD objects](https://www.mvtec.com/fileadmin/Redaktion/mvtec.com/company/research/datasets/dataset_overview_large.png "MVTec AD objects") - -## **I. About the Dataset** - -MVTec AD is a dataset for benchmarking anomaly detection methods with a focus on industrial -inspection. It contains over 5000 high-resolution images divided into fifteen different object and -texture categories. Each class contains 60 to 390 normal train images (defect free) and 40 to 167 -test images (with various kinds of defects as well as images without defects). More info -at [MVTec dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad). For each object, the -data is divided into 3 folders - 'train' (containing defect free training images), 'test'( -containing test images, both good and bad), 'ground_truth' (containing the masks of defected -images). - -
-
- -## **II. About the Model** - -Two neural networks are used: an encoder and a classifier. The encoder is composed of convolutional -layers only. The classifier is a two layered MLP model having 128 hidden units per layer, and the -input to the classifier is a subtraction of the features of the two patches. The activation -function for both networks is a LeakyReLU with a α = 0.1. The encoder has a hierarchical structure. -The receptive field of the encoder is K = 64, and that of the embedded smaller encoder is K = 32. -Patch SVDD divides the images into patches with a size K and a stride S. The values for the strides -are S = 16 and S = 4 for the encoders with K = 64 and K = 32, respectively. - -
-
- -## **III. Links** - -* [Original paper](https://arxiv.org/abs/2006.16067) -* [Original Github code](https://github.com/nuclearboy95/Anomaly-Detection-PatchSVDD-PyTorch/tree/934d6238e5e0ad511e2a0e7fc4f4899010e7d892) -* [MVTec ad dataset download link](https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz) - -
-
- -## **IV. How to run this tutorial (without TLS and locally as a simulation):** - -
- -### **Note: An NVIDIA driver and GPUs are needed to run this tutorial unless configured otherwise.** - -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r sd_requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Repeat step 4 instructions above but change "env_one" name to "env_two" (or another name of your choice). - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab PatchSVDD_with_Director.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the PatchSVDD_with_Director.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/director_config.yaml deleted file mode 100644 index abe05b045a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['256', '256', '3'] - target_shape: ['256', '256'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/envoy_config.yaml deleted file mode 100644 index 02d38535e5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/envoy_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - cuda_devices: [0,2] - -optional_plugin_components: {} - -shard_descriptor: - template: mvtec_shard_descriptor.MVTecShardDescriptor - params: - data_folder: MVTec_data - rank_worldsize: 1,1 - obj: bottle \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/mvtec_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/mvtec_shard_descriptor.py deleted file mode 100644 index e1e7df9215..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/mvtec_shard_descriptor.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""MVTec shard descriptor.""" - -import os -from glob import glob -from pathlib import Path - -import numpy as np -from imageio import imread -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class MVTecShardDataset(ShardDataset): - """MVTec Shard dataset class.""" - - def __init__(self, images_path, - mask_path, labels, - rank=1, - worldsize=1): - """Initialize MVTecShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.images_path = images_path[self.rank - 1::self.worldsize] - self.mask_path = mask_path[self.rank - 1::self.worldsize] - self.labels = labels[self.rank - 1::self.worldsize] - - def __getitem__(self, index): - """Return a item by the index.""" - img = np.asarray(imread(self.images_path[index])) - if img.shape[-1] != 3: - img = self.gray2rgb(img) - - img = self.resize(img) - img = np.asarray(img) - label = self.labels[index] - if self.mask_path[index]: - mask = np.asarray(imread(self.mask_path[index])) - mask = self.resize(mask) - mask = np.asarray(mask) - else: - mask = np.zeros(img.shape)[:, :, 0] - return img, mask, label - - def __len__(self): - """Return the len of the dataset.""" - return len(self.images_path) - - def resize(self, image, shape=(256, 256)): - """Resize image.""" - return np.array(Image.fromarray(image).resize(shape)) - - def gray2rgb(self, images): - """Change image from gray to rgb.""" - tile_shape = tuple(np.ones(len(images.shape), dtype=int)) - tile_shape += (3,) - - images = np.tile(np.expand_dims(images, axis=-1), tile_shape) - return images - - -class MVTecShardDescriptor(ShardDescriptor): - """MVTec Shard descriptor class.""" - - def __init__(self, data_folder: str = 'MVTec_data', - rank_worldsize: str = '1,1', - obj: str = 'bottle'): - """Initialize MVTecShardDescriptor.""" - super().__init__() - - self.dataset_path = Path.cwd() / data_folder - self.download_data() - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - self.obj = obj - - # Calculating data and target shapes - ds = self.get_dataset() - sample, masks, target = ds[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = [str(dim) for dim in target.shape] - - def download_data(self): - """Download data.""" - zip_file_path = self.dataset_path / 'mvtec_anomaly_detection.tar.xz' - if not Path(zip_file_path).exists(): - os.makedirs(self.dataset_path, exist_ok=True) - print('Downloading MVTec Dataset...this might take a while') - os.system('wget -nc' - " 'https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz'" # noqa - f' -O {zip_file_path.relative_to(Path.cwd())}') - print('Downloaded MVTec dataset, untar-ring now') - os.system(f'tar -xvf {zip_file_path.relative_to(Path.cwd())}' - f' -C {self.dataset_path.relative_to(Path.cwd())}') - # change to write permissions - self.change_permissions(self.dataset_path, 0o764) - - def change_permissions(self, folder, code): - """Change permissions after data is downloaded.""" - for root, dirs, files in os.walk(folder): - for d in dirs: - os.chmod(os.path.join(root, d), code) - for f in files: - os.chmod(os.path.join(root, f), code) - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - # Train dataset - if dataset_type == 'train': - fpattern = os.path.join(self.dataset_path, f'{self.obj}/train/*/*.png') - fpaths = sorted(glob(fpattern)) - self.images_path = list(fpaths) - self.labels = np.zeros(len(fpaths), dtype=np.int32) - # Masks - self.mask_path = np.full(self.labels.shape, None) - # Test dataset - elif dataset_type == 'test': - fpattern = os.path.join(self.dataset_path, f'{self.obj}/test/*/*.png') - fpaths = sorted(glob(fpattern)) - fpaths_anom = list( - filter(lambda fpath: os.path.basename(os.path.dirname(fpath)) != 'good', fpaths)) - fpaths_good = list( - filter(lambda fpath: os.path.basename(os.path.dirname(fpath)) == 'good', fpaths)) - fpaths = fpaths_anom + fpaths_good - self.images_path = fpaths - self.labels = np.zeros(len(fpaths_anom) + len(fpaths_good), dtype=np.int32) - self.labels[:len(fpaths_anom)] = 1 # anomalies - # Masks - fpattern_mask = os.path.join(self.dataset_path, f'{self.obj}/ground_truth/*/*.png') - self.mask_path = sorted(glob(fpattern_mask)) + [None] * len(fpaths_good) - else: - raise Exception(f'Wrong dataset type: {dataset_type}.' - f'Choose from the list: [train, test]') - - return MVTecShardDataset( - images_path=self.images_path, - mask_path=self.mask_path, - labels=self.labels, - rank=self.rank, - worldsize=self.worldsize, - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['256', '256', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['256', '256'] - - @property - def dataset_description(self) -> str: - """Return the shard dataset description.""" - return (f'MVTec dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/sd_requirements.txt deleted file mode 100644 index a1e73bfe94..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/sd_requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -imageio -numpy -pillow diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy.sh deleted file mode 100755 index ae9b4c27a0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy_with_tls.sh deleted file mode 100755 index d4e09601bd..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh"$DIRECTOR_FQDN" -dp 50050 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/PatchSVDD_with_Director.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/PatchSVDD_with_Director.ipynb deleted file mode 100644 index 351934772a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/PatchSVDD_with_Director.ipynb +++ /dev/null @@ -1,1120 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "liquid-jacket", - "metadata": {}, - "source": [ - "# Federated PatchSVDD algorithm with Director example" - ] - }, - { - "cell_type": "markdown", - "id": "83e6aae2", - "metadata": {}, - "source": [ - "# PatchSVDD algorithm\n", - "Anomaly detection involves making a binary decision as to whether an input image contains an anomaly, and anomaly segmentation aims to locate the anomaly on the pixel level. The deep learning variant of Support vector data description (SVDD: a long-standing algorithm used for anomaly detection) is used to the patch-based method using self-supervised learning. This extension enables anomaly segmentation and improves detection performances which are measured in AUROC on MVTec AD dataset.\n", - "\n", - "![alt text](https://media.arxiv-vanity.com/render-output/5520416/x4.png \"Patch Level SVDD for Anomaly Detection\")\n", - "\n", - "* Original paper: https://arxiv.org/abs/2006.16067\n", - "* Original Github code: https://github.com/nuclearboy95/Anomaly-Detection-PatchSVDD-PyTorch/tree/934d6238e5e0ad511e2a0e7fc4f4899010e7d892\n", - "* MVTec ad dataset download link: https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alike-sharing", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "!pip install torchvision==0.8.1 matplotlib numpy scikit-image scikit-learn torch tqdm Pillow imageio opencv-python ngt" - ] - }, - { - "cell_type": "markdown", - "id": "16986f22", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4485ac79", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = 'cert/root_ca.crt'\n", - "# API_certificate = 'cert/frontend.crt'\n", - "# API_private_key = 'cert/frontend.key'\n", - "\n", - "# federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50051',\n", - "# cert_chain=cert_chain, api_cert=API_certificate, api_private_key=API_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50050', tls=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e35802d5", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67ae50de", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rubber-address", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd2407cf", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "#Arguments\n", - "args = {\n", - "'obj' : 'bottle',\n", - "'lambda_value': '1e-3',\n", - "'D' : 64,\n", - "'lr' : '1e-4',\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8669bc63", - "metadata": {}, - "outputs": [], - "source": [ - "import argparse\n", - "import torch\n", - "from functools import reduce\n", - "from torch.utils.data import DataLoader,Dataset\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import math\n", - "import numpy as np\n", - "from PIL import Image\n", - "from imageio import imread\n", - "from glob import glob\n", - "from sklearn.metrics import roc_auc_score\n", - "import os, shutil\n", - "import _pickle as p\n", - "from contextlib import contextmanager\n", - "import PIL\n", - "from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler\n", - "from torchvision import transforms as tsf\n", - "from utils import to_device, task, DictionaryConcatDataset, crop_chw, cnn_output_size, crop_image_chw\n", - "from functools import reduce" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f37dcf", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "def generate_coords(H, W, K):\n", - " h = np.random.randint(0, H - K + 1)\n", - " w = np.random.randint(0, W - K + 1)\n", - " return h, w\n", - "\n", - "\n", - "def generate_coords_position(H, W, K):\n", - " with task('P1'):\n", - " p1 = generate_coords(H, W, K)\n", - " h1, w1 = p1\n", - "\n", - " pos = np.random.randint(8)\n", - "\n", - " with task('P2'):\n", - " J = K // 4\n", - "\n", - " K3_4 = 3 * K // 4\n", - " h_dir, w_dir = pos_to_diff[pos]\n", - " h_del, w_del = np.random.randint(J, size=2)\n", - "\n", - " h_diff = h_dir * (h_del + K3_4)\n", - " w_diff = w_dir * (w_del + K3_4)\n", - "\n", - " h2 = h1 + h_diff\n", - " w2 = w1 + w_diff\n", - "\n", - " h2 = np.clip(h2, 0, H - K)\n", - " w2 = np.clip(w2, 0, W - K)\n", - "\n", - " p2 = (h2, w2)\n", - "\n", - " return p1, p2, pos\n", - "\n", - "\n", - "def generate_coords_svdd(H, W, K):\n", - " with task('P1'):\n", - " p1 = generate_coords(H, W, K)\n", - " h1, w1 = p1\n", - "\n", - " with task('P2'):\n", - " J = K // 32\n", - "\n", - " h_jit, w_jit = 0, 0\n", - "\n", - " while h_jit == 0 and w_jit == 0:\n", - " h_jit = np.random.randint(-J, J + 1)\n", - " w_jit = np.random.randint(-J, J + 1)\n", - "\n", - " h2 = h1 + h_jit\n", - " w2 = w1 + w_jit\n", - "\n", - " h2 = np.clip(h2, 0, H - K)\n", - " w2 = np.clip(w2, 0, W - K)\n", - "\n", - " p2 = (h2, w2)\n", - "\n", - " return p1, p2\n", - "\n", - "\n", - "pos_to_diff = {\n", - " 0: (-1, -1),\n", - " 1: (-1, 0),\n", - " 2: (-1, 1),\n", - " 3: (0, -1),\n", - " 4: (0, 1),\n", - " 5: (1, -1),\n", - " 6: (1, 0),\n", - " 7: (1, 1)\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "242e3109", - "metadata": {}, - "outputs": [], - "source": [ - "class SVDD_Dataset(Dataset):\n", - " def __init__(self, memmap, K=64, repeat=1):\n", - " super().__init__()\n", - " self.arr = np.asarray(memmap)\n", - " self.K = K\n", - " self.repeat = repeat\n", - " \n", - "\n", - " def __len__(self):\n", - " N = self.arr.shape[0]\n", - " return N * self.repeat\n", - "\n", - " def __getitem__(self, idx):\n", - " N = self.arr.shape[0]\n", - " K = self.K\n", - " n = idx % N\n", - "\n", - " p1, p2 = generate_coords_svdd(256, 256, K)\n", - "\n", - " image = self.arr[n]\n", - "\n", - " patch1 = crop_image_chw(image, p1, K)\n", - " patch2 = crop_image_chw(image, p2, K)\n", - "\n", - " return patch1, patch2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cea3ee13", - "metadata": {}, - "outputs": [], - "source": [ - "class PositionDataset(Dataset):\n", - " def __init__(self, x, K=64, repeat=1):\n", - " super(PositionDataset, self).__init__()\n", - " self.x = np.asarray(x)\n", - " self.K = K\n", - " self.repeat = repeat\n", - "\n", - " def __len__(self):\n", - " N = self.x.shape[0]\n", - " return N * self.repeat\n", - "\n", - " def __getitem__(self, idx):\n", - " N = self.x.shape[0]\n", - " K = self.K\n", - " n = idx % N\n", - "\n", - " image = self.x[n]\n", - " p1, p2, pos = generate_coords_position(256, 256, K)\n", - "\n", - " patch1 = crop_image_chw(image, p1, K).copy()\n", - " patch2 = crop_image_chw(image, p2, K).copy()\n", - "\n", - " # perturb RGB\n", - " rgbshift1 = np.random.normal(scale=0.02, size=(3, 1, 1))\n", - " rgbshift2 = np.random.normal(scale=0.02, size=(3, 1, 1))\n", - "\n", - " patch1 += rgbshift1\n", - " patch2 += rgbshift2\n", - "\n", - " # additive noise\n", - " noise1 = np.random.normal(scale=0.02, size=(3, K, K))\n", - " noise2 = np.random.normal(scale=0.02, size=(3, K, K))\n", - "\n", - " patch1 += noise1\n", - " patch2 += noise2\n", - "\n", - " return patch1, patch2, pos\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6efc53ee", - "metadata": {}, - "outputs": [], - "source": [ - "class PatchDataset_NCHW(Dataset):\n", - " def __init__(self, memmap, tfs=None, K=32, S=1):\n", - " super().__init__()\n", - " self.arr = memmap\n", - " self.tfs = tfs\n", - " self.S = S\n", - " self.K = K\n", - " self.N = self.arr.shape[0]\n", - " \n", - " def __len__(self):\n", - " return self.N * self.row_num * self.col_num\n", - "\n", - " @property\n", - " def row_num(self):\n", - " N, C, H, W = self.arr.shape\n", - " K = self.K\n", - " S = self.S\n", - " I = cnn_output_size(H, k=K, s=S)\n", - " return I\n", - "\n", - " @property\n", - " def col_num(self):\n", - " N, C, H, W = self.arr.shape\n", - " K = self.K\n", - " S = self.S\n", - " J = cnn_output_size(W, k=K, s=S)\n", - " return J\n", - "\n", - " def __getitem__(self, idx):\n", - " N = self.N\n", - " n, i, j = np.unravel_index(idx, (N, self.row_num, self.col_num))\n", - " K = self.K\n", - " S = self.S\n", - " image = self.arr[n]\n", - " patch = crop_chw(image, i, j, K, S)\n", - "\n", - " if self.tfs:\n", - " patch = self.tfs(patch)\n", - "\n", - " return patch, n, i, j\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd781c21", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "ShardDataset class\n", - "\"\"\"\n", - "class MVTecShardDataset(Dataset):\n", - " \n", - " def __init__(self, dataset):\n", - " self._dataset = dataset\n", - " \n", - " def __getitem__(self, index):\n", - " img, mask, label = self._dataset[index]\n", - " return img, mask, label\n", - " \n", - " def __len__(self):\n", - " return len(self._dataset)\n", - " \n", - "class MVTecSD(DataInterface):\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = MVTecShardDataset(shard_descriptor.get_dataset('train'))\n", - "\n", - " self.test_set = MVTecShardDataset(shard_descriptor.get_dataset('test')) \n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " train_x = np.stack([image for image, mask, label in self.train_set]).astype(np.float32)\n", - " mean = train_x.astype(np.float32).mean(axis=0)\n", - " train_x = (train_x.astype(np.float32) - mean) / 255\n", - " train_x = np.transpose(train_x, [0, 3, 1, 2])\n", - " \n", - " if self.kwargs['train_bs']:\n", - " batch_size = self.kwargs['train_bs']\n", - " else:\n", - " batch_size = 64\n", - " \n", - " loader = DataLoader(self.get_train_dataset_dict(train_x), batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)\n", - " return loader\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " # We need both train and test data for obtaining embeddings\n", - " train_x = np.stack([image for image, mask, label in self.train_set]).astype(np.float32)\n", - " mean = train_x.astype(np.float32).mean(axis=0)\n", - " train_x = (train_x.astype(np.float32) - mean) / 255\n", - " train_x = np.transpose(train_x, [0, 3, 1, 2])\n", - " \n", - " #getting val loader\n", - " test_x = np.stack([image for image, mask, label in self.test_set]).astype(np.float32)\n", - " mean = test_x.astype(np.float32).mean(axis=0)\n", - " test_x = (test_x.astype(np.float32) - mean) / 255\n", - " test_x = np.transpose(test_x, [0, 3, 1, 2])\n", - " \n", - " masks = np.stack([mask for image, mask, label in self.test_set]).astype(np.int32)\n", - " masks[masks <= 128] = 0\n", - " masks[masks > 128] = 255\n", - " labels = np.stack([label for image, mask, label in self.test_set]).astype(np.int32)\n", - "\n", - " return (train_x, test_x, masks, labels, mean)\n", - "\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - " \n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.test_set)\n", - " \n", - " def get_train_dataset_dict(self,inp_x):\n", - " rep = 100\n", - " datasets = dict()\n", - " datasets[f'pos_64'] = PositionDataset(inp_x, K=64, repeat=rep)\n", - " datasets[f'pos_32'] = PositionDataset(inp_x, K=32, repeat=rep)\n", - "\n", - " datasets[f'svdd_64'] = SVDD_Dataset(inp_x, K=64, repeat=rep)\n", - " datasets[f'svdd_32'] = SVDD_Dataset(inp_x, K=32, repeat=rep)\n", - " dataset = DictionaryConcatDataset(datasets)\n", - " return dataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8df35f5", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MVTecSD(train_bs=64, val_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "8f786b8b", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "visible-victor", - "metadata": {}, - "outputs": [], - "source": [ - "import torch.optim as optim\n", - "import torch.nn as nn\n", - "import torch\n", - "import torch.nn.functional as F\n", - "import math\n", - "from utils import makedirpath" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "138e1493", - "metadata": {}, - "outputs": [], - "source": [ - "class Encoder(nn.Module):\n", - " def __init__(self, K, D=64, bias=True):\n", - " super().__init__()\n", - "\n", - " self.conv1 = nn.Conv2d(3, 64, 5, 2, 0, bias=bias)\n", - " self.conv2 = nn.Conv2d(64, 64, 5, 2, 0, bias=bias)\n", - " self.conv3 = nn.Conv2d(64, 128, 5, 2, 0, bias=bias)\n", - " self.conv4 = nn.Conv2d(128, D, 5, 1, 0, bias=bias)\n", - "\n", - " self.K = K\n", - " self.D = D\n", - " self.bias = bias\n", - "\n", - " def forward(self, x):\n", - " h = self.conv1(x)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv2(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv3(h)\n", - "\n", - " if self.K == 64:\n", - " h = F.leaky_relu(h, 0.1)\n", - " h = self.conv4(h)\n", - "\n", - " h = torch.tanh(h)\n", - "\n", - " return h\n", - "\n", - "def forward_hier(x, emb_small, K):\n", - " K_2 = K // 2\n", - " n = x.size(0)\n", - " x1 = x[..., :K_2, :K_2]\n", - " x2 = x[..., :K_2, K_2:]\n", - " x3 = x[..., K_2:, :K_2]\n", - " x4 = x[..., K_2:, K_2:]\n", - " xx = torch.cat([x1, x2, x3, x4], dim=0)\n", - " hh = emb_small(xx)\n", - "\n", - " h1 = hh[:n]\n", - " h2 = hh[n: 2 * n]\n", - " h3 = hh[2 * n: 3 * n]\n", - " h4 = hh[3 * n:]\n", - "\n", - " h12 = torch.cat([h1, h2], dim=3)\n", - " h34 = torch.cat([h3, h4], dim=3)\n", - " h = torch.cat([h12, h34], dim=2)\n", - " return h\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "113f5a73", - "metadata": {}, - "outputs": [], - "source": [ - "class EncoderDeep(nn.Module):\n", - " def __init__(self, K, D=64, bias=True):\n", - " super().__init__()\n", - "\n", - " self.conv1 = nn.Conv2d(3, 32, 3, 2, 0, bias=bias)\n", - " self.conv2 = nn.Conv2d(32, 64, 3, 1, 0, bias=bias)\n", - " self.conv3 = nn.Conv2d(64, 128, 3, 1, 0, bias=bias)\n", - " self.conv4 = nn.Conv2d(128, 128, 3, 1, 0, bias=bias)\n", - " self.conv5 = nn.Conv2d(128, 64, 3, 1, 0, bias=bias)\n", - " self.conv6 = nn.Conv2d(64, 32, 3, 1, 0, bias=bias)\n", - " self.conv7 = nn.Conv2d(32, 32, 3, 1, 0, bias=bias)\n", - " self.conv8 = nn.Conv2d(32, D, 3, 1, 0, bias=bias)\n", - "\n", - " self.K = K\n", - " self.D = D\n", - "\n", - " def forward(self, x):\n", - " h = self.conv1(x)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv2(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv3(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv4(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv5(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv6(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv7(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv8(h)\n", - " h = torch.tanh(h)\n", - "\n", - " return h\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70ccf01d", - "metadata": {}, - "outputs": [], - "source": [ - "class EncoderHier(nn.Module):\n", - " def __init__(self, K, D=64, bias=True):\n", - " super().__init__()\n", - "\n", - " if K > 64:\n", - " self.enc = EncoderHier(K // 2, D, bias=bias)\n", - "\n", - " elif K == 64:\n", - " self.enc = EncoderDeep(K // 2, D, bias=bias)\n", - "\n", - " else:\n", - " raise ValueError()\n", - "\n", - " self.conv1 = nn.Conv2d(D, 128, 2, 1, 0, bias=bias)\n", - " self.conv2 = nn.Conv2d(128, D, 1, 1, 0, bias=bias)\n", - "\n", - " self.K = K\n", - " self.D = D\n", - "\n", - " def forward(self, x):\n", - " h = forward_hier(x, self.enc, K=self.K)\n", - "\n", - " h = self.conv1(h)\n", - " h = F.leaky_relu(h, 0.1)\n", - "\n", - " h = self.conv2(h)\n", - " h = torch.tanh(h)\n", - "\n", - " return h\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6e7688de", - "metadata": {}, - "outputs": [], - "source": [ - "xent = nn.CrossEntropyLoss()\n", - "\n", - "class NormalizedLinear(nn.Module):\n", - " __constants__ = ['bias', 'in_features', 'out_features']\n", - "\n", - " def __init__(self, in_features, out_features, bias=True):\n", - " super(NormalizedLinear, self).__init__()\n", - " self.in_features = in_features\n", - " self.out_features = out_features\n", - " self.weight = nn.Parameter(torch.Tensor(out_features, in_features))\n", - " if bias:\n", - " self.bias = nn.Parameter(torch.Tensor(out_features))\n", - " else:\n", - " self.register_parameter('bias', None)\n", - " self.reset_parameters()\n", - "\n", - " def reset_parameters(self):\n", - " nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))\n", - " if self.bias is not None:\n", - " fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)\n", - " bound = 1 / math.sqrt(fan_in)\n", - " nn.init.uniform_(self.bias, -bound, bound)\n", - "\n", - " def forward(self, x):\n", - " with torch.no_grad():\n", - " w = self.weight / self.weight.data.norm(keepdim=True, dim=0)\n", - " return F.linear(x, w, self.bias)\n", - "\n", - " def extra_repr(self):\n", - " return 'in_features={}, out_features={}, bias={}'.format(\n", - " self.in_features, self.out_features, self.bias is not None\n", - " )\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "foreign-gospel", - "metadata": {}, - "outputs": [], - "source": [ - "class PositionClassifier(nn.Module):\n", - " def __init__(self, K, D, class_num=8):\n", - " super().__init__()\n", - " self.D = D\n", - "\n", - " self.fc1 = nn.Linear(D, 128)\n", - " self.act1 = nn.LeakyReLU(0.1)\n", - "\n", - " self.fc2 = nn.Linear(128, 128)\n", - " self.act2 = nn.LeakyReLU(0.1)\n", - "\n", - " self.fc3 = NormalizedLinear(128, class_num)\n", - " self.fc3.requires_grad_(False)\n", - "\n", - " self.K = K\n", - "\n", - " def forward(self, h1, h2):\n", - " h1 = h1.view(-1, self.D)\n", - " h2 = h2.view(-1, self.D)\n", - "\n", - " h = h1 - h2\n", - "\n", - " h = self.fc1(h)\n", - " h = self.act1(h)\n", - "\n", - " h = self.fc2(h)\n", - " h = self.act2(h)\n", - "\n", - " h = self.fc3(h)\n", - " return h\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db853fe0", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "Model definition (ensembled)\n", - "\"\"\"\n", - "class MyEnsembledModel(nn.Module):\n", - " def __init__(self, enc, cls_64, cls_32):\n", - " super().__init__()\n", - " self._enc = enc\n", - " self._cls_64 = cls_64\n", - " self._cls_32 = cls_32\n", - " \n", - " def forward(self):\n", - " pass\n", - "\n", - "\n", - "enc = EncoderHier(64, args['D'])\n", - "cls_64 = PositionClassifier(64, args['D'])\n", - "cls_32 = PositionClassifier(32, args['D'])\n", - "\n", - "model = MyEnsembledModel(enc, cls_64, cls_32)\n", - "\n", - "params_to_update = []\n", - "for p in model.parameters():\n", - " if p.requires_grad:\n", - " params_to_update.append(p)\n", - "optimizer_adam = torch.optim.Adam(params=params_to_update , lr=float(args['lr']))\n" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handled-teens", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "increasing-builder", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "import torch\n", - "import tqdm\n", - "from utils import cnn_output_size\n", - "from inspection import eval_embeddings_nn_multik\n", - "\n", - "# The Interactive API supports registering functions definied in main module or imported.\n", - "def function_defined_in_notebook(some_parameter):\n", - " print(f'Also I accept a parameter and it is {some_parameter}')\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**{'some_parameter': 42})\n", - "@TI.register_fl_task(model='model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "\n", - "def train(model, train_loader, optimizer, device, some_parameter=None):\n", - " print(f'\\n\\n TASK TRAIN GOT DEVICE {device}\\n\\n')\n", - " \n", - " function_defined_in_notebook(some_parameter)\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - "\n", - " model.train()\n", - " model.to(device)\n", - " losses = []\n", - "\n", - " for d in train_loader:\n", - " d = to_device(d, device, non_blocking=True)\n", - " optimizer.zero_grad()\n", - " loss_pos_64 = PositionClassifier_infer(model._cls_64, model._enc, d['pos_64'])\n", - " loss_pos_32 = PositionClassifier_infer(model._cls_32, model._enc.enc, d['pos_32'])\n", - " loss_svdd_64 = SVDD_Dataset_infer(model._enc, d['svdd_64'])\n", - " loss_svdd_32 = SVDD_Dataset_infer(model._enc.enc, d['svdd_32'])\n", - "\n", - " loss = loss_pos_64 + loss_pos_32 + float(args['lambda_value']) * (loss_svdd_64 + loss_svdd_32)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " return {'train_loss': np.mean(losses),}\n", - " \n", - "\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device') \n", - "def validate(model, val_loader, device):\n", - " print(f'\\n\\n TASK VALIDATE GOT DEVICE {device}\\n\\n')\n", - " \n", - " model._enc.eval()\n", - " model._enc.to(device)\n", - " \n", - " x_tr, x_te, masks, labels, mean = val_loader\n", - "\n", - " embs64_tr = infer_(x_tr, model._enc, K=64, S=16, device=device)\n", - " embs64_te = infer_(x_te, model._enc, K=64, S=16, device=device)\n", - " embs32_tr = infer_(x_tr, model._enc.enc, K=32, S=4, device=device)\n", - " embs32_te = infer_(x_te, model._enc.enc, K=32, S=4, device=device)\n", - "\n", - " embs64 = embs64_tr, embs64_te\n", - " embs32 = embs32_tr, embs32_te\n", - "\n", - " results = eval_embeddings_nn_multik(args['obj'], embs64, embs32, masks, labels)\n", - " \n", - " maps = results['maps_mult']\n", - " obj = args['obj']\n", - "\n", - " print(\"| K64 | Det: {:.3f} Seg:{:.3f} BA: {:.3f}\".format(results['det_64'],results['seg_64'],results['bal_acc_64']))\n", - " print(\"| K32 | Det: {:.3f} Seg:{:.3f} BA: {:.3f}\".format(results['det_32'],results['seg_32'],results['bal_acc_32']))\n", - " print(\"| sum | Det: {:.3f} Seg:{:.3f} BA: {:.3f}\".format(results['det_sum'],results['seg_sum'],results['bal_acc_sum']))\n", - " print(\"| mult | Det: {:.3f} Seg:{:.3f} BA: {:.3f}\".format(results['det_mult'],results['seg_mult'],results['bal_acc_mult']))\n", - "\n", - " return {'detection_score_sum': results['det_sum'], 'segmentation_score_sum': results['seg_sum'], 'balanced_accuracy_score_sum': results['bal_acc_sum'], 'detection_score_mult': results['det_mult'], 'segmentation_score_mult': results['seg_mult'], 'balanced_accuracy_score_mult': results['bal_acc_mult']}\n", - "\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "17d53d96", - "metadata": {}, - "outputs": [], - "source": [ - "# Infer functions\n", - "def PositionClassifier_infer(c, enc, batch):\n", - " x1s, x2s, ys = batch\n", - " h1 = enc(x1s)\n", - " h2 = enc(x2s)\n", - " logits = c(h1, h2)\n", - " loss = xent(logits, ys)\n", - " return loss\n", - "\n", - "def SVDD_Dataset_infer(enc, batch):\n", - " x1s, x2s, = batch\n", - " h1s = enc(x1s)\n", - " h2s = enc(x2s)\n", - " diff = h1s - h2s\n", - " l2 = diff.norm(dim=1)\n", - " loss = l2.mean()\n", - " return loss\n", - "\n", - "def infer_(x, enc, K, S, device):\n", - " dataset = PatchDataset_NCHW(x, K=K, S=S)\n", - " loader = DataLoader(dataset, batch_size=64, shuffle=False, pin_memory=True)\n", - " embs = np.empty((dataset.N, dataset.row_num, dataset.col_num, args['D']), dtype=np.float32) # [-1, I, J, D]\n", - " enc = enc.eval()\n", - " with torch.no_grad():\n", - " for xs, ns, iis, js in loader:\n", - " xs = xs.to(device)\n", - " embedding = enc(xs)\n", - " embedding = embedding.detach().cpu().numpy()\n", - "\n", - " for embed, n, i, j in zip(embedding, ns, iis, js):\n", - " embs[n, i, j] = np.squeeze(embed)\n", - " return embs" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mature-renewal", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'MVTec_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "lightweight-causing", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# If I use autoreload I got a pickling error\n", - "\n", - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=10,\n", - " opt_treatment='CONTINUE_GLOBAL',\n", - " device_assignment_policy='CUDA_PREFERRED')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1543a36", - "metadata": {}, - "outputs": [], - "source": [ - "# If user wants to stop IPython session, then reconnect and check how experiment is going \n", - "# fl_experiment.restore_experiment_state(MI)\n", - "\n", - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "8c30b301", - "metadata": {}, - "source": [ - "## Now we validate the best model and print anomaly maps!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12186086", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r ../envoy/sd_requirements.txt\n", - "import sys\n", - "sys.path.insert(1, '../envoy')\n", - "from mvtec_shard_descriptor import MVTecShardDescriptor\n", - "from inspection import measure_emb_nn, eval_embeddings_nn_maps\n", - "import matplotlib.pyplot as plt\n", - "from PIL import Image\n", - "from skimage.segmentation import mark_boundaries\n", - "from utils import makedirpath, distribute_scores\n", - "import pickle" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "439049e1", - "metadata": {}, - "outputs": [], - "source": [ - "def obtain_maps(model, val_loader, device):\n", - " print(f'\\n\\n OBTAIN MAPS GOT DEVICE {device}\\n\\n')\n", - " \n", - " model._enc.eval()\n", - " model._enc.to(device)\n", - " \n", - " x_tr, x_te, masks, labels, mean = val_loader\n", - "\n", - " embs64_tr = infer_(x_tr, model._enc, K=64, S=16, device=device)\n", - " embs64_te = infer_(x_te, model._enc, K=64, S=16, device=device)\n", - " embs32_tr = infer_(x_tr, model._enc.enc, K=32, S=4, device=device)\n", - " embs32_te = infer_(x_te, model._enc.enc, K=32, S=4, device=device)\n", - "\n", - " embs64 = embs64_tr, embs64_te\n", - " embs32 = embs32_tr, embs32_te\n", - "\n", - " maps = eval_embeddings_nn_maps(args['obj'], embs64, embs32, masks, labels) \n", - " print_anomaly_maps(args['obj'], maps, x_te, masks, mean)\n", - " \n", - "\n", - "def infer_(x, enc, K, S, device):\n", - " dataset = PatchDataset_NCHW(x, K=K, S=S)\n", - " loader = DataLoader(dataset, batch_size=64, shuffle=False, pin_memory=True)\n", - " embs = np.empty((dataset.N, dataset.row_num, dataset.col_num, args['D']), dtype=np.float32) # [-1, I, J, D]\n", - " enc = enc.eval()\n", - " with torch.no_grad():\n", - " for xs, ns, iis, js in loader:\n", - " xs = xs.to(device)\n", - " embedding = enc(xs)\n", - " embedding = embedding.detach().cpu().numpy()\n", - "\n", - " for embed, n, i, j in zip(embedding, ns, iis, js):\n", - " embs[n, i, j] = np.squeeze(embed)\n", - " return embs\n", - "\n", - "def print_anomaly_maps(obj, maps, images, masks, mean):\n", - " \"\"\"Print generated anomaly maps.\"\"\"\n", - " mshape = maps.shape[0]\n", - " images = np.transpose(images, [0, 3, 2, 1])\n", - " images = (images.astype(np.float32) * 255 + mean)\n", - "\n", - " for n in range(10):\n", - " fig, axes = plt.subplots(ncols=2)\n", - " fig.set_size_inches(6, 3)\n", - "\n", - " shape = (128, 128)\n", - " image = np.array(Image.fromarray((images[n] * 255).astype(np.uint8)).resize(shape[::-1]))\n", - " mask = np.array(Image.fromarray(masks[n]).resize(shape[::-1]))\n", - " image = mark_boundaries(image, mask, color=(1, 0, 0), mode='thick')\n", - "\n", - " axes[0].imshow(image)\n", - " axes[0].set_axis_off()\n", - "\n", - " axes[1].imshow(maps[n], vmax=maps[n].max(), cmap='Reds')\n", - " axes[1].set_axis_off()\n", - "\n", - " plt.tight_layout()\n", - " plt.show()\n", - " plt.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52ba543f", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MVTecSD(train_bs=64, val_bs=64)\n", - "fed_dataset.shard_descriptor = MVTecShardDescriptor(obj=args['obj'], data_folder='MVTec_data',rank_worldsize='1,1')\n", - "\n", - "last_model = fl_experiment.get_last_model()\n", - "obtain_maps(last_model, fed_dataset.get_valid_loader(), 'cuda')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/data_transf.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/data_transf.py deleted file mode 100644 index 878dcb336a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/data_transf.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Data transform functions.""" - -import numpy as np -from sklearn.metrics import balanced_accuracy_score -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import roc_auc_score - - -def bilinears(images, shape) -> np.ndarray: - """Generate binlinears.""" - import cv2 - n = images.shape[0] - new_shape = (n,) + shape - ret = np.zeros(new_shape, dtype=images.dtype) - for i in range(n): - ret[i] = cv2.resize(images[i], dsize=shape[::-1], interpolation=cv2.INTER_LINEAR) - return ret - - -def bal_acc_score(obj, predictions, labels): - """Calculate balanced accuracy score.""" - precision, recall, thresholds = precision_recall_curve(labels.flatten(), predictions.flatten()) - f1_score = (2 * precision * recall) / (precision + recall) - threshold = thresholds[np.argmax(f1_score)] - prediction_result = predictions > threshold - ba_score = balanced_accuracy_score(labels, prediction_result) - return ba_score - - -def detection_auroc(obj, anomaly_scores, labels): - """Calculate detection auroc.""" - # 1: anomaly 0: normal - auroc = roc_auc_score(labels, anomaly_scores) - return auroc - - -def segmentation_auroc(obj, anomaly_maps, masks): - """Calculate segmentation auroc.""" - gt = masks - gt = gt.astype(np.int32) - gt[gt == 255] = 1 # 1: anomaly - - anomaly_maps = bilinears(anomaly_maps, (256, 256)) - auroc = roc_auc_score(gt.flatten(), anomaly_maps.flatten()) - return auroc diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/inspection.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/inspection.py deleted file mode 100644 index c8151596aa..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/inspection.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Inspection of images and patches.""" - -import os -import shutil - -import ngtpy -import numpy as np -from data_transf import bal_acc_score -from data_transf import detection_auroc -from data_transf import segmentation_auroc -from sklearn.neighbors import KDTree -from utils import distribute_scores - - -def search_nn(test_emb, train_emb_flat, nn=1, method='kdt'): - """Seach nearest neighbors.""" - if method == 'ngt': - return search_nn_ngt(test_emb, train_emb_flat, nn=nn) - - kdt = KDTree(train_emb_flat) - - ntest, i, j, d = test_emb.shape - closest_inds = np.empty((ntest, i, j, nn), dtype=np.int32) - l2_maps = np.empty((ntest, i, j, nn), dtype=np.float32) - - for n_ in range(ntest): - for i_ in range(i): - dists, inds = kdt.query(test_emb[n_, i_, :, :], return_distance=True, k=nn) - closest_inds[n_, i_, :, :] = inds[:, :] - l2_maps[n_, i_, :, :] = dists[:, :] - - return l2_maps, closest_inds - - -def search_nn_ngt(test_emb, train_emb_flat, nn=1): - """Search nearest neighbors.""" - ntest, i, j, d = test_emb.shape - closest_inds = np.empty((ntest, i, j, nn), dtype=np.int32) - l2_maps = np.empty((ntest, i, j, nn), dtype=np.float32) - - dpath = f'/tmp/{os.getpid()}' - ngtpy.create(dpath, d) - index = ngtpy.Index(dpath) - index.batch_insert(train_emb_flat) - - for n_ in range(ntest): - for i_ in range(i): - for j_ in range(j): - query = test_emb[n_, i_, j_, :] - results = index.search(query, nn) - inds = [result[0] for result in results] - - closest_inds[n_, i_, j_, :] = inds - vecs = np.asarray([index.get_object(inds[nn_]) for nn_ in range(nn)]) - dists = np.linalg.norm(query - vecs, axis=-1) - l2_maps[n_, i_, j_, :] = dists - shutil.rmtree(dpath) - - return l2_maps, closest_inds - - -def assess_anomaly_maps(obj, anomaly_maps, masks, labels): - """Assess anomaly maps.""" - auroc_seg = segmentation_auroc(obj, anomaly_maps, masks) - - anomaly_scores = anomaly_maps.max(axis=-1).max(axis=-1) - auroc_det = detection_auroc(obj, anomaly_scores, labels) - ba_score = bal_acc_score(obj, anomaly_scores, labels) - return auroc_det, auroc_seg, ba_score - - -def eval_embeddings_nn_multik(obj, embs64, embs32, masks, labels, nn=1): - """Evaluate embeddings.""" - emb_tr, emb_te = embs64 - maps_64 = measure_emb_nn(emb_te, emb_tr, method='kdt', nn=nn) - maps_64 = distribute_scores(maps_64, (256, 256), k=64, s=16) - det_64, seg_64, ba_64 = assess_anomaly_maps(obj, maps_64, masks, labels) - - emb_tr, emb_te = embs32 - maps_32 = measure_emb_nn(emb_te, emb_tr, method='ngt', nn=nn) - maps_32 = distribute_scores(maps_32, (256, 256), k=32, s=4) - det_32, seg_32, ba_32 = assess_anomaly_maps(obj, maps_32, masks, labels) - - maps_sum = maps_64 + maps_32 - det_sum, seg_sum, ba_sum = assess_anomaly_maps(obj, maps_sum, masks, labels) - - maps_mult = maps_64 * maps_32 - det_mult, seg_mult, ba_mult = assess_anomaly_maps(obj, maps_mult, masks, labels) - - return { - 'det_64': det_64, - 'seg_64': seg_64, - 'bal_acc_64': ba_64, - - 'det_32': det_32, - 'seg_32': seg_32, - 'bal_acc_32': ba_32, - - 'det_sum': det_sum, - 'seg_sum': seg_sum, - 'bal_acc_sum': ba_sum, - - 'det_mult': det_mult, - 'seg_mult': seg_mult, - 'bal_acc_mult': ba_mult, - - 'maps_64': maps_64, - 'maps_32': maps_32, - 'maps_sum': maps_sum, - 'maps_mult': maps_mult, - } - - -def eval_embeddings_nn_maps(obj, embs64, embs32, masks, labels, nn=1): - """Evaluate embeddings.""" - emb_tr, emb_te = embs64 - maps_64 = measure_emb_nn(emb_te, emb_tr, method='kdt', nn=nn) - maps_64 = distribute_scores(maps_64, (256, 256), k=64, s=16) - - emb_tr, emb_te = embs32 - maps_32 = measure_emb_nn(emb_te, emb_tr, method='ngt', nn=nn) - maps_32 = distribute_scores(maps_32, (256, 256), k=32, s=4) - - maps_mult = maps_64 * maps_32 - return maps_mult - - -def measure_emb_nn(emb_te, emb_tr, method='kdt', nn=1): - """Measure embeddings.""" - d = emb_tr.shape[-1] - train_emb_all = emb_tr.reshape(-1, d) - - l2_maps, _ = search_nn(emb_te, train_emb_all, method=method, nn=nn) - anomaly_maps = np.mean(l2_maps, axis=-1) - - return anomaly_maps diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/utils.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/utils.py deleted file mode 100644 index bd8569c8cc..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MVTec_PatchSVDD/workspace/utils.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Utilities.""" - -import os -from contextlib import contextmanager - -import _pickle as p -import numpy as np -import torch -from torch.utils.data import Dataset - - -def to_device(obj, device, non_blocking=False): - """Copy to device.""" - if isinstance(obj, torch.Tensor): - return obj.to(device, non_blocking=non_blocking) - - if isinstance(obj, dict): - return {k: to_device(v, device, non_blocking=non_blocking) - for k, v in obj.items()} - - if isinstance(obj, list): - return [to_device(v, device, non_blocking=non_blocking) - for v in obj] - - if isinstance(obj, tuple): - return tuple([to_device(v, device, non_blocking=non_blocking) - for v in obj]) - - -@contextmanager -def task(_): - """Yield.""" - yield - - -class DictionaryConcatDataset(Dataset): - """Concate dictionaries.""" - - def __init__(self, d_of_datasets): - """Initialize.""" - self.d_of_datasets = d_of_datasets - lengths = [len(d) for d in d_of_datasets.values()] - self._length = min(lengths) - self.keys = self.d_of_datasets.keys() - assert min(lengths) == max(lengths), 'Length of the datasets should be the same' - - def __getitem__(self, idx): - """Get item.""" - return { - key: self.d_of_datasets[key][idx] - for key in self.keys - } - - def __len__(self): - """Get length.""" - return self._length - - -def crop_chw(image, i, j, k, s=1): - """Crop func.""" - if s == 1: - h, w = i, j - else: - h = s * i - w = s * j - return image[:, h: h + k, w: w + k] - - -def cnn_output_size(h, k, s=1, p=0) -> int: - """Output size. - - :param int H: input_size - :param int K: filter_size - :param int S: stride - :param int P: padding - :return:. - - """ - return 1 + (h - k + 2 * p) // s - - -def crop_image_chw(image, coord, k): - """Crop func.""" - h, w = coord - return image[:, h: h + k, w: w + k] - - -def load_binary(fpath, encoding='ASCII'): - """Load binaries.""" - with open(fpath, 'rb') as f: - return p.load(f, encoding=encoding) - - -def save_binary(d, fpath): - """Save binary.""" - with open(fpath, 'wb') as f: - p.dump(d, f) - - -def makedirpath(fpath: str): - """Make path.""" - dpath = os.path.dirname(fpath) - if dpath: - os.makedirs(dpath, exist_ok=True) - - -def distribute_scores(score_masks, output_shape, k: int, s: int) -> np.ndarray: - """Distribute scores.""" - n_all = score_masks.shape[0] - results = [distribute_score(score_masks[n], output_shape, k, s) for n in range(n_all)] - return np.asarray(results) - - -def distribute_score(score_mask, output_shape, k: int, s: int) -> np.ndarray: - """Distribute scores.""" - h, w = output_shape - mask = np.zeros([h, w], dtype=np.float32) - cnt = np.zeros([h, w], dtype=np.int32) - - i, j = score_mask.shape[:2] - for i_ in range(i): - for j_ in range(j): - h_, w_ = i_ * s, j_ * s - - mask[h_: h_ + k, w_: w_ + k] += score_mask[i_, j_] - cnt[h_: h_ + k, w_: w_ + k] += 1 - - cnt[cnt == 0] = 1 - - return mask / cnt diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/README.md deleted file mode 100644 index 247a97910f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# PyTorch_Market_Re-ID - -## **How to run this tutorial (without TLC and locally as a simulation):** -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_Market_Re-ID - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config_one.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config_two.yaml -``` - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab PyTorch_Market_Re-ID.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the PyTorch_Market_Re-ID.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/director_config.yaml deleted file mode 100644 index c3aa1d5851..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/director_config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -settings: - listen_ip: localhost - sample_shape: ['64', '128', '3'] - target_shape: ['2'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/envoy_config_one.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/envoy_config_one.yaml deleted file mode 100644 index 04afb456bc..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/envoy_config_one.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: market_shard_descriptor.MarketShardDescriptor - params: - data_folder_name: Market-1501-v15.09.15 - rank_worldsize: 1,2 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/envoy_config_two.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/envoy_config_two.yaml deleted file mode 100644 index 2da33042f4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/envoy_config_two.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: market_shard_descriptor.MarketShardDescriptor - params: - data_folder_name: Market-1501-v15.09.15 - rank_worldsize: 2,2 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/market_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/market_shard_descriptor.py deleted file mode 100644 index 463aa500d2..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/market_shard_descriptor.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Market shard descriptor.""" - -import logging -import re -import zipfile -from pathlib import Path -from typing import List - -import gdown -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MarketShardDataset(ShardDataset): - """Market shard dataset.""" - - def __init__(self, dataset_dir: Path, dataset_type: str, rank=1, worldsize=1): - """Initialize MarketShardDataset.""" - self.dataset_dir = dataset_dir - self.dataset_type = dataset_type - self.rank = rank - self.worldsize = worldsize - - self.imgs_path = list(dataset_dir.glob('*.jpg'))[self.rank - 1::self.worldsize] - self.pattern = re.compile(r'([-\d]+)_c(\d)') - - def __len__(self): - """Length of shard.""" - return len(self.imgs_path) - - def __getitem__(self, index: int): - """Return an item by the index.""" - img_path = self.imgs_path[index] - pid, camid = map(int, self.pattern.search(img_path.name).groups()) - - img = Image.open(img_path) - return img, (pid, camid) - - -class MarketShardDescriptor(ShardDescriptor): - """ - Market1501 Shard descriptor class. - - Reference: - Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. - URL: http://www.liangzheng.org/Project/project_reid.html - - Dataset statistics: - identities: 1501 (+1 for background) - images: 12936 (train) + 3368 (query) + 15913 (gallery) - """ - - def __init__(self, data_folder_name: str = 'Market-1501-v15.09.15', - rank_worldsize: str = '1,1') -> None: - """Initialize MarketShardDescriptor.""" - super().__init__() - - # Settings for sharding the dataset - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.data_folder_name = data_folder_name - self.dataset_dir = Path.cwd() / data_folder_name - self.download() - - self.path_by_type = { - 'train': self.dataset_dir / 'bounding_box_train', - 'query': self.dataset_dir / 'query', - 'gallery': self.dataset_dir / 'bounding_box_test' - } - self._check_before_run() - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.path_by_type) - - def get_dataset(self, dataset_type='train'): - """Return a dataset by type.""" - if dataset_type not in self.path_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}.' - f'Choose from the list: {", ".join(self.path_by_type)}') - return MarketShardDataset( - dataset_dir=self.path_by_type[dataset_type], - dataset_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['64', '128', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['2'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Market dataset, shard number {self.rank} ' - f'out of {self.worldsize}') - - def download(self): - """Download Market1501 dataset.""" - logger.info('Download Market1501 dataset.') - if self.dataset_dir.exists(): - return None - - logger.info('Try to download.') - output = f'{self.data_folder_name}.zip' - - if not Path(output).exists(): - url = 'https://drive.google.com/u/1/uc?id=0B8-rUzbwVRk0c054eEozWG9COHM' - gdown.download(url, output, quiet=False) - logger.info(f'{output} is downloaded.') - - with zipfile.ZipFile(output, 'r') as zip_ref: - zip_ref.extractall(Path.cwd()) - - Path(output).unlink() # remove zip - - def _check_before_run(self): - """Check if all files are available before going deeper.""" - if not self.dataset_dir.exists(): - raise RuntimeError(f'{self.dataset_dir} does not exist') - for dataset_path in self.path_by_type.values(): - if not dataset_path.exists(): - raise RuntimeError(f'{dataset_path} does not exist') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/requirements.txt deleted file mode 100644 index 1c1bf94e85..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -gdown==3.13.0 -Pillow==10.3.0 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/start_envoy.sh deleted file mode 100755 index ff20adf16b..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls -dh localhost -dp 50051 -ec envoy_config_one.yaml diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 9e10f43cef..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 -ENVOY_CONFIG=$3 - -fx envoy start -n "$ENVOY_NAME" -dh "$DIRECTOR_FQDN" -dp 50051 -ec "$ENVOY_CONFIG" -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/PyTorch_Market_Re-ID.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/PyTorch_Market_Re-ID.ipynb deleted file mode 100644 index 0a50d68895..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/PyTorch_Market_Re-ID.ipynb +++ /dev/null @@ -1,593 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "liquid-jacket", - "metadata": {}, - "source": [ - "# Federated Market with Director example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db949008", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "id": "16986f22", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4485ac79", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50051\n", - "\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = 'cert/root_ca.crt'\n", - "# api_certificate = 'cert/frontend.crt'\n", - "# api_private_key = 'cert/frontend.key'\n", - "\n", - "# federation = Federation(\n", - "# client_id=client_id,\n", - "# director_node_fqdn=director_node_fqdn,\n", - "# director_port=director_port,\n", - "# tls=True,\n", - "# cert_chain=cert_chain,\n", - "# api_cert=api_certificate,\n", - "# api_private_key=api_private_key\n", - "# )\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e35802d5", - "metadata": { - "pycharm": { - "is_executing": true - }, - "scrolled": true - }, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67ae50de", - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b42efc49", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "print(sample.shape)\n", - "print(target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "rubber-address", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64f37dcf", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "from torch.utils.data import DataLoader, Dataset\n", - "from torchvision.transforms import Compose, Normalize, RandomHorizontalFlip, Resize, ToTensor\n", - "\n", - "from tools import RandomIdentitySampler\n", - "import transforms as T\n", - "\n", - "\n", - "# Now you can implement you data loaders using dummy_shard_desc\n", - "class ImageDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - " def __init__(self, dataset, transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " \"\"\"Get item from dataset.\"\"\"\n", - " img, (pid, camid) = self.dataset[index]\n", - " if self.transform is not None:\n", - " img = self.transform(img)\n", - " return img, (pid, camid)\n", - "\n", - "\n", - "class MarketFLDataloader(DataInterface):\n", - " \"\"\"Market Dataset.\"\"\"\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " # Prepare transforms\n", - " self.transform_train = Compose([\n", - " T.ResizeRandomCropping(256, 128, p=0.5),\n", - " RandomHorizontalFlip(),\n", - " ToTensor(),\n", - " Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", - " T.RandomErasing(probability=0.5)\n", - " ])\n", - " self.transform_test = Compose([\n", - " Resize((265, 128)),\n", - " ToTensor(),\n", - " Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", - " ])\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - "\n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract.\n", - " \"\"\"\n", - " if self.kwargs['train_bs']:\n", - " batch_size = self.kwargs['train_bs']\n", - " else:\n", - " batch_size = 64\n", - "\n", - " self.train_ds = self.shard_descriptor.get_dataset('train')\n", - " return DataLoader(\n", - " # ImageDataset make transform\n", - " ImageDataset(self.train_ds, transform=self.transform_train),\n", - " sampler=RandomIdentitySampler(self.train_ds, num_instances=4),\n", - " batch_size=batch_size,\n", - " num_workers=4,\n", - " pin_memory=True,\n", - " drop_last=True\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract.\n", - " \"\"\"\n", - " if self.kwargs['valid_bs']:\n", - " batch_size = self.kwargs['valid_bs']\n", - " else:\n", - " batch_size = 512\n", - "\n", - " query_sd = self.shard_descriptor.get_dataset('query')\n", - " query_loader = DataLoader(\n", - " ImageDataset(query_sd, transform=self.transform_test),\n", - " batch_size=batch_size,\n", - " num_workers=4,\n", - " pin_memory=True,\n", - " drop_last=False,\n", - " shuffle=False\n", - " )\n", - "\n", - " self.gallery_sd = self.shard_descriptor.get_dataset('gallery')\n", - " gallery_loader = DataLoader(\n", - " ImageDataset(self.gallery_sd, transform=self.transform_test),\n", - " batch_size=batch_size,\n", - " num_workers=4,\n", - " pin_memory=True,\n", - " drop_last=False,\n", - " shuffle=False\n", - " )\n", - "\n", - " return query_loader, gallery_loader\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation.\n", - " \"\"\"\n", - " return len(self.train_ds)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation.\n", - " \"\"\"\n", - " return len(self.gallery_sd)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8cb6c73c", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MarketFLDataloader(train_bs=64, valid_bs=512)" - ] - }, - { - "cell_type": "markdown", - "id": "caring-distinction", - "metadata": {}, - "source": [ - "### Describe a model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "visible-victor", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "import torchvision" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "foreign-gospel", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "ResNet and Classifier definition\n", - "\"\"\"\n", - "\n", - "class ResNet50(nn.Module):\n", - " \"Pretrained ResNet50.\"\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__()\n", - " \n", - " self.classifier = NormalizedClassifier()\n", - "\n", - " resnet50 = torchvision.models.resnet50(pretrained=True)\n", - " resnet50.layer4[0].conv2.stride = (1, 1)\n", - " resnet50.layer4[0].downsample[0].stride = (1, 1)\n", - " self.base = nn.Sequential(*list(resnet50.children())[:-2])\n", - "\n", - " self.bn = nn.BatchNorm1d(2048)\n", - " nn.init.normal_(self.bn.weight.data, 1.0, 0.02)\n", - " nn.init.constant_(self.bn.bias.data, 0.0)\n", - "\n", - " def forward(self, x):\n", - " x = self.base(x)\n", - " x = nn.functional.avg_pool2d(x, x.size()[2:])\n", - " x = x.view(x.size(0), -1)\n", - " f = self.bn(x)\n", - "\n", - " return f\n", - "\n", - "\n", - "class NormalizedClassifier(nn.Module):\n", - " \"\"\"Classifier.\"\"\"\n", - "\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.weight = nn.Parameter(torch.Tensor(1501, 2048))\n", - " self.weight.data.uniform_(-1, 1).renorm_(2,0,1e-5).mul_(1e5)\n", - "\n", - " def forward(self, x):\n", - " w = self.weight\n", - "\n", - " x = nn.functional.normalize(x, p=2, dim=1)\n", - " w = nn.functional.normalize(w, p=2, dim=1)\n", - "\n", - " return nn.functional.linear(x, w)\n", - "\n", - "\n", - "resnet = ResNet50()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "greater-activation", - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [], - "source": [ - "parameters = list(resnet.parameters()) + list(resnet.classifier.parameters())\n", - "optimizer_adam = optim.Adam(parameters, lr=1e-4)" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handled-teens", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=resnet, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "# Save the initial model state\n", - "initial_model = deepcopy(resnet)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "increasing-builder", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "from logging import getLogger\n", - "\n", - "import torch\n", - "import tqdm\n", - "\n", - "from losses import ArcFaceLoss, TripletLoss\n", - "from tools import AverageMeter, evaluate, extract_feature\n", - "\n", - "logger = getLogger(__name__)\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.register_fl_task(model='model', data_loader='train_loader',\n", - " device='device', optimizer='optimizer')\n", - "def train(model, train_loader, optimizer, device):\n", - " device = torch.device('cuda')\n", - " \n", - " criterion_cla = ArcFaceLoss(scale=16., margin=0.1)\n", - " criterion_pair = TripletLoss(margin=0.3, distance='cosine')\n", - "\n", - " batch_cla_loss = AverageMeter()\n", - " batch_pair_loss = AverageMeter()\n", - " corrects = AverageMeter()\n", - " \n", - " model.train()\n", - " model.to(device)\n", - " model.classifier.train()\n", - " model.classifier.to(device)\n", - " \n", - " logger.info('==> Start training')\n", - " train_loader = tqdm.tqdm(train_loader, desc='train')\n", - "\n", - " for imgs, (pids, _) in train_loader:\n", - " imgs, pids = torch.tensor(imgs).to(device), torch.tensor(pids).to(device)\n", - " # Zero the parameter gradients\n", - " optimizer.zero_grad()\n", - " # Forward\n", - " features = model(imgs)\n", - " outputs = model.classifier(features)\n", - " _, preds = torch.max(outputs.data, 1)\n", - " # Compute loss\n", - " cla_loss = criterion_cla(outputs, pids)\n", - " pair_loss = criterion_pair(features, pids)\n", - " loss = cla_loss + pair_loss\n", - " # Backward + Optimize\n", - " loss.backward()\n", - " optimizer.step()\n", - " # statistics\n", - " corrects.update(torch.sum(preds == pids.data).float() / pids.size(0), pids.size(0))\n", - " batch_cla_loss.update(cla_loss.item(), pids.size(0))\n", - " batch_pair_loss.update(pair_loss.item(), pids.size(0))\n", - "\n", - " return {'ArcFaceLoss': batch_cla_loss.avg,\n", - " 'TripletLoss': batch_pair_loss.avg,\n", - " 'Accuracy': corrects.avg.cpu()}\n", - "\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device):\n", - " queryloader, galleryloader = val_loader\n", - " device = torch.device('cuda')\n", - " \n", - " logger.info('==> Start validating')\n", - " model.eval()\n", - " model.to(device)\n", - " \n", - " # Extract features for query set\n", - " qf, q_pids, q_camids = extract_feature(model, queryloader)\n", - " logger.info(f'Extracted features for query set, obtained {qf.shape} matrix')\n", - " # Extract features for gallery set\n", - " gf, g_pids, g_camids = extract_feature(model, galleryloader)\n", - " logger.info(f'Extracted features for gallery set, obtained {gf.shape} matrix')\n", - " # Compute distance matrix between query and gallery\n", - " m, n = qf.size(0), gf.size(0)\n", - " distmat = torch.zeros((m,n))\n", - " # Cosine similarity\n", - " qf = nn.functional.normalize(qf, p=2, dim=1)\n", - " gf = nn.functional.normalize(gf, p=2, dim=1)\n", - " for i in range(m):\n", - " distmat[i] = - torch.mm(qf[i:i+1], gf.t())\n", - " distmat = distmat.numpy()\n", - "\n", - " cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids)\n", - " return {'top1': cmc[0], 'top5': cmc[4], 'top10': cmc[9], 'mAP': mAP}" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mature-renewal", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'market_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "lightweight-causing", - "metadata": { - "pycharm": { - "is_executing": true - }, - "scrolled": false - }, - "outputs": [], - "source": [ - "# If I use autoreload I got a pickling error\n", - "\n", - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=3,\n", - " opt_treatment='RESET')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bfc4f89c", - "metadata": { - "pycharm": { - "is_executing": true, - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going \n", - "# fl_experiment.restore_experiment_state(MI)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/losses.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/losses.py deleted file mode 100644 index be266a4e13..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/losses.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Compute ArcFace loss and Triplet loss.""" - -import math - -import torch -import torch.nn.functional as F -from torch import nn - - -class ArcFaceLoss(nn.Module): - """ArcFace loss.""" - - def __init__(self, margin=0.1, scale=16, easy_margin=False): - """Initialize ArcFace loss.""" - super(ArcFaceLoss, self).__init__() - self.m = margin - self.s = scale - self.easy_margin = easy_margin - - def forward(self, pred, target): - """Compute forward.""" - # make a one-hot index - index = pred.data * 0.0 # size = (B, Classnum) - index.scatter_(1, target.data.view(-1, 1), 1) - index = index.bool() - - cos_m = math.cos(self.m) - sin_m = math.sin(self.m) - cos_t = pred[index] - sin_t = torch.sqrt(1.0 - cos_t * cos_t) - cos_t_add_m = cos_t * cos_m - sin_t * sin_m - - cond_v = cos_t - math.cos(math.pi - self.m) - cond = F.relu(cond_v) - keep = cos_t - math.sin(math.pi - self.m) * self.m - - cos_t_add_m = torch.where(cond.bool(), cos_t_add_m, keep) - - output = pred * 1.0 # size = (B, Classnum) - output[index] = cos_t_add_m - output = self.s * output - - return F.cross_entropy(output, target) - - -class TripletLoss(nn.Module): - """ - Triplet loss with hard positive/negative mining. - - Reference: - Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. - - Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. - - Args: - margin (float): margin for triplet. - distance (str): distance for triplet. - """ - - def __init__(self, margin=0.3, distance='cosine'): - """Initialize Triplet loss.""" - super(TripletLoss, self).__init__() - - self.distance = distance - self.margin = margin - self.ranking_loss = nn.MarginRankingLoss(margin=margin) - - def forward(self, inputs, targets): - """ - Compute forward. - - Args: - inputs: feature matrix with shape (batch_size, feat_dim) - targets: ground truth labels with shape (num_classes) - """ - n = inputs.size(0) - - # Compute pairwise distance, replace by the official when merged - inputs = F.normalize(inputs, p=2, dim=1) - dist = - torch.mm(inputs, inputs.t()) - - # For each anchor, find the hardest positive and negative - mask = targets.expand(n, n).eq(targets.expand(n, n).t()) - dist_ap, dist_an = [], [] - for i in range(n): - dist_ap.append(dist[i][mask[i]].max().unsqueeze(0)) - dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0)) - dist_ap = torch.cat(dist_ap) - dist_an = torch.cat(dist_an) - - # Compute ranking hinge loss - y = torch.ones_like(dist_an) - loss = self.ranking_loss(dist_an, dist_ap, y) - - return loss diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/requirements.txt deleted file mode 100644 index 62c8356720..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch==2.3.1 -torchvision==0.18.1 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/tools.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/tools.py deleted file mode 100644 index 75c43741f1..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/tools.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Tools for metric computation and Dataloader.""" - -import copy -import random -from collections import defaultdict -from logging import getLogger - -import numpy as np -import torch -from torch.utils.data.sampler import Sampler - -logger = getLogger(__name__) - - -class AverageMeter: - """ - Computes and stores the average and current value. - - Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 - """ - - def __init__(self): - """Initialize Average Meter.""" - self.reset() - - def reset(self): - """Reset values.""" - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - """Update values.""" - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - -def compute_ap_cmc(index, good_index, junk_index): - """Compute validation metrics.""" - ap = 0 - cmc = np.zeros(len(index)) - - # remove junk_index - mask = np.in1d(index, junk_index, invert=True) - index = index[mask] - - # find good_index index - ngood = len(good_index) - mask = np.in1d(index, good_index) - rows_good = np.argwhere(mask) - rows_good = rows_good.flatten() - - cmc[rows_good[0]:] = 1.0 - for i in range(ngood): - d_recall = 1.0 / ngood - precision = (i + 1) * 1.0 / (rows_good[i] + 1) - ap = ap + d_recall * precision - - return ap, cmc - - -def evaluate(distmat, q_pids, g_pids, q_camids, g_camids): - """Evaluate model.""" - num_q, num_g = distmat.shape - index = np.argsort(distmat, axis=1) # from small to large - - num_no_gt = 0 # num of query imgs without groundtruth - num_r1 = 0 - cmc = np.zeros(len(g_pids)) - ap = 0 - - for i in range(num_q): - # groundtruth index - query_index = np.argwhere(g_pids == q_pids[i]) - camera_index = np.argwhere(g_camids == q_camids[i]) - good_index = np.setdiff1d(query_index, camera_index, assume_unique=True) - if good_index.size == 0: - num_no_gt += 1 - continue - # remove gallery samples that have the same pid and camid with query - junk_index = np.intersect1d(query_index, camera_index) - - ap_tmp, cmc_tmp = compute_ap_cmc(index[i], good_index, junk_index) - if cmc_tmp[0] == 1: - num_r1 += 1 - cmc = cmc + cmc_tmp - ap += ap_tmp - - if num_no_gt > 0: - logger.error(f'{num_no_gt} query imgs do not have groundtruth.') - - cmc = cmc / (num_q - num_no_gt) - mean_ap = ap / (num_q - num_no_gt) - - return cmc, mean_ap - - -@torch.no_grad() -def extract_feature(model, dataloader): - """Extract features for validation.""" - features, pids, camids = [], [], [] - for imgs, (batch_pids, batch_camids) in dataloader: - flip_imgs = fliplr(imgs) - imgs, flip_imgs = imgs.cuda(), flip_imgs.cuda() - batch_features = model(imgs).data - batch_features_flip = model(flip_imgs).data - batch_features += batch_features_flip - - features.append(batch_features) - pids.append(batch_pids) - camids.append(batch_camids) - features = torch.cat(features, 0) - pids = torch.cat(pids, 0).numpy() - camids = torch.cat(camids, 0).numpy() - - return features, pids, camids - - -def fliplr(img): - """Flip horizontal.""" - inv_idx = torch.arange(img.size(3) - 1, -1, -1).long() # N x C x H x W - img_flip = img.index_select(3, inv_idx) - - return img_flip - - -class RandomIdentitySampler(Sampler): - """ - Random Sampler. - - Randomly sample N identities, then for each identity, - randomly sample K instances, therefore batch size is N*K. - - Args: - - data_source (Dataset): dataset to sample from. - - num_instances (int): number of instances per identity. - """ - - def __init__(self, data_source, num_instances=4): - """Initialize Sampler.""" - self.data_source = data_source - self.num_instances = num_instances - self.index_dic = defaultdict(list) - for index, (_, (pid, _)) in enumerate(data_source): - self.index_dic[pid].append(index) - self.pids = list(self.index_dic.keys()) - self.num_identities = len(self.pids) - - # compute number of examples in an epoch - self.length = 0 - for pid in self.pids: - idxs = self.index_dic[pid] - num = len(idxs) - if num < self.num_instances: - num = self.num_instances - self.length += num - num % self.num_instances - - def __iter__(self): - """Iterate over Sampler.""" - list_container = [] - - for pid in self.pids: - idxs = copy.deepcopy(self.index_dic[pid]) - if len(idxs) < self.num_instances: - idxs = np.random.choice(idxs, size=self.num_instances, replace=True) - random.shuffle(idxs) - batch_idxs = [] - for idx in idxs: - batch_idxs.append(idx) - if len(batch_idxs) == self.num_instances: - list_container.append(batch_idxs) - batch_idxs = [] - - random.shuffle(list_container) - - ret = [] - for batch_idxs in list_container: - ret.extend(batch_idxs) - - return iter(ret) - - def __len__(self): - """Return number of examples in an epoch.""" - return self.length diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/transforms.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/transforms.py deleted file mode 100644 index 5ffa435ecb..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_Market_Re-ID/workspace/transforms.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Image transform tools.""" - -import math -import random - -from PIL import Image - - -class ResizeRandomCropping: - """ - With a probability, first increase image size to (1 + 1/8), and then perform random crop. - - Args: - height (int): target height. - width (int): target width. - p (float): probability of performing this transformation. Default: 0.5. - """ - - def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR): - """Initialize cropping.""" - self.height = height - self.width = width - self.p = p - self.interpolation = interpolation - - def __call__(self, img): - """ - Call of cropping. - - Args: - img (PIL Image): Image to be cropped. - Returns: - PIL Image: Cropped image. - """ - if random.uniform(0, 1) >= self.p: - return img.resize((self.width, self.height), self.interpolation) - - new_width, new_height = int(round(self.width * 1.125)), int(round(self.height * 1.125)) - resized_img = img.resize((new_width, new_height), self.interpolation) - x_maxrange = new_width - self.width - y_maxrange = new_height - self.height - x1 = int(round(random.uniform(0, x_maxrange))) - y1 = int(round(random.uniform(0, y_maxrange))) - cropped_img = resized_img.crop((x1, y1, x1 + self.width, y1 + self.height)) - - return cropped_img - - -class RandomErasing: - """ - Randomly selects a rectangle region in an image and erases its pixels. - - 'Random Erasing Data Augmentation' by Zhong et al. - See https://arxiv.org/pdf/1708.04896.pdf - - Args: - probability: The probability that the Random Erasing operation will be performed. - sl: Minimum proportion of erased area against input image. - sh: Maximum proportion of erased area against input image. - r1: Minimum aspect ratio of erased area. - mean: Erasing value. - """ - - def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=None): - """Initialize Erasing.""" - if not mean: - mean = [0.4914, 0.4822, 0.4465] - - self.probability = probability - self.mean = mean - self.sl = sl - self.sh = sh - self.r1 = r1 - - def __call__(self, img): - """Call of Erasing.""" - if random.uniform(0, 1) >= self.probability: - return img - - for _attempt in range(100): - area = img.size()[1] * img.size()[2] - - target_area = random.uniform(self.sl, self.sh) * area - aspect_ratio = random.uniform(self.r1, 1 / self.r1) - - h = int(round(math.sqrt(target_area * aspect_ratio))) - w = int(round(math.sqrt(target_area / aspect_ratio))) - - if w < img.size()[2] and h < img.size()[1]: - x1 = random.randint(0, img.size()[1] - h) - y1 = random.randint(0, img.size()[2] - w) - if img.size()[0] == 3: - img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] - img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] - img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] - else: - img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] - return img - - return img diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/README.md deleted file mode 100644 index 40afb0bfda..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# MedMNIST 2D Classification Tutorial - -![MedMNISTv2_overview](https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/assets/medmnistv2.jpg) - -For more details, please refer to the original paper: -**MedMNIST v2: A Large-Scale Lightweight Benchmark for 2D and 3D Biomedical Image Classification** ([arXiv](https://arxiv.org/abs/2110.14795)), and [PyPI](https://pypi.org/project/medmnist/). - - -## I. About model and experiments - -We use a simple convolutional neural network and settings coming from [the experiments](https://github.com/MedMNIST/experiments) repository. -
- -## II. How to run this tutorial (without TLC and locally as a simulation): -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_MedMNIST_2D - ``` -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` -
- -### 5. In the third terminal (or forth terminal, if you chose to do two envoys) run the Jupyter Notebook: - -```sh -cd workspace -jupyter lab Pytorch_MedMNIST_2D.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the Pytorch_MedMNIST_2D.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiments runs, and when the experiment is finished the director terminal will display a message that the experiment was finished successfully. - \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/director/director_config.yaml deleted file mode 100644 index f7d3847170..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['28', '28', '3'] - target_shape: ['1','1'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/envoy_config.yaml deleted file mode 100644 index 05ee5cec4c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/envoy_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: medmnist_shard_descriptor.MedMNISTShardDescriptor - params: - rank_worldsize: 1, 1 - datapath: data/. - dataname: bloodmnist diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/medmnist_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/medmnist_shard_descriptor.py deleted file mode 100644 index d5e639fed4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/medmnist_shard_descriptor.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""MedMNIST Shard Descriptor.""" - -import logging -import os -from typing import Any, List, Tuple -from medmnist.info import INFO, HOMEPAGE - -import numpy as np - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MedMNISTShardDataset(ShardDataset): - """MedMNIST Shard dataset class.""" - - def __init__(self, x, y, data_type: str = 'train', rank: int = 1, worldsize: int = 1) -> None: - """Initialize MedMNISTDataset.""" - self.data_type = data_type - self.rank = rank - self.worldsize = worldsize - self.x = x[self.rank - 1::self.worldsize] - self.y = y[self.rank - 1::self.worldsize] - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """Return an item by the index.""" - return self.x[index], self.y[index] - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.x) - - -class MedMNISTShardDescriptor(ShardDescriptor): - """MedMNIST Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - datapath: str = '', - dataname: str = 'bloodmnist', - **kwargs - ) -> None: - """Initialize MedMNISTShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.datapath = datapath - self.dataset_name = dataname - self.info = INFO[self.dataset_name] - - (x_train, y_train), (x_test, y_test) = self.load_data() - self.data_by_type = { - 'train': (x_train, y_train), - 'val': (x_test, y_test) - } - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train') -> MedMNISTShardDataset: - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return MedMNISTShardDataset( - *self.data_by_type[dataset_type], - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return ['28', '28', '3'] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return ['1', '1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'MedMNIST dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - @staticmethod - def download_data(datapath: str = 'data/', - dataname: str = 'bloodmnist', - info: dict = {}) -> None: - - logger.info(f"{datapath}\n{dataname}\n{info}") - try: - from torchvision.datasets.utils import download_url - download_url(url=info["url"], - root=datapath, - filename=dataname, - md5=info["MD5"]) - except Exception: - raise RuntimeError('Something went wrong when downloading! ' - + 'Go to the homepage to download manually. ' - + HOMEPAGE) - - def load_data(self) -> Tuple[Tuple[Any, Any], Tuple[Any, Any]]: - """Download prepared dataset.""" - - dataname = self.dataset_name + '.npz' - dataset = os.path.join(self.datapath, dataname) - - if not os.path.isfile(dataset): - logger.info(f"Dataset {dataname} not found at:{self.datapath}.\n\tDownloading...") - MedMNISTShardDescriptor.download_data(self.datapath, dataname, self.info) - logger.info("DONE!") - - data = np.load(dataset) - - x_train = data["train_images"] - x_test = data["test_images"] - - y_train = data["train_labels"] - y_test = data["test_labels"] - logger.info('MedMNIST data was loaded!') - return (x_train, y_train), (x_test, y_test) diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/requirements.txt deleted file mode 100644 index 363c0d69f9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -medmnist -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/start_envoy.sh deleted file mode 100755 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/workspace/Pytorch_MedMNIST_2D.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/workspace/Pytorch_MedMNIST_2D.ipynb deleted file mode 100644 index 4cdcd36d43..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_2D/workspace/Pytorch_MedMNIST_2D.ipynb +++ /dev/null @@ -1,614 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated MedMNIST2D " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5504ab79", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install medmnist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0570122", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "import tqdm\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import Dataset, DataLoader\n", - "from torchvision import transforms as T\n", - "import torch.nn.functional as F\n", - "\n", - "import medmnist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22ba64da", - "metadata": {}, - "outputs": [], - "source": [ - "from medmnist import INFO, Evaluator\n", - "\n", - "## Change dataflag here to reflect the ones defined in the envoy_conifg_xxx.yaml\n", - "dataname = 'bloodmnist'\n" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "director_node_fqdn = 'localhost'\n", - "director_port=50051\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experimen" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "9b3081a6", - "metadata": {}, - "source": [ - "## Load MedMNIST INFO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e0377d3a", - "metadata": {}, - "outputs": [], - "source": [ - "num_epochs = 3\n", - "TRAIN_BS, VALID_BS = 64, 128\n", - "\n", - "lr = 0.001\n", - "gamma=0.1\n", - "milestones = [0.5 * num_epochs, 0.75 * num_epochs]\n", - "\n", - "info = INFO[dataname]\n", - "task = info['task']\n", - "n_channels = info['n_channels']\n", - "n_classes = len(info['label'])" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0dc457e", - "metadata": {}, - "outputs": [], - "source": [ - "## Data transformations\n", - "data_transform = T.Compose([T.ToTensor(), \n", - " T.Normalize(mean=[.5], std=[.5])]\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "09ba2f64", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image\n", - "\n", - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " \n", - " img, label = self.dataset[index]\n", - " \n", - " if self.target_transform:\n", - " label = self.target_transform(label) \n", - " else:\n", - " label = label.astype(int)\n", - " \n", - " if self.transform:\n", - " img = Image.fromarray(img)\n", - " img = self.transform(img)\n", - " else:\n", - " base_transform = T.PILToTensor()\n", - " img = Image.fromarray(img)\n", - " img = base_transform(img) \n", - "\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db2d563e", - "metadata": {}, - "outputs": [], - "source": [ - "class MedMnistFedDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - "\n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=data_transform\n", - " ) \n", - " \n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=data_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(\n", - " self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True)\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create Mnist federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MedMnistFedDataset(train_bs=TRAIN_BS, valid_bs=VALID_BS)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f63908e", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " print(sample.shape, target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "075d1d6c", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8477a001", - "metadata": {}, - "outputs": [], - "source": [ - "# define a simple CNN model\n", - "class Net(nn.Module):\n", - " def __init__(self, in_channels, num_classes):\n", - " super(Net, self).__init__()\n", - "\n", - " self.layer1 = nn.Sequential(\n", - " nn.Conv2d(in_channels, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU())\n", - "\n", - " self.layer2 = nn.Sequential(\n", - " nn.Conv2d(16, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.layer3 = nn.Sequential(\n", - " nn.Conv2d(16, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - " \n", - " self.layer4 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - "\n", - " self.layer5 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3, padding=1),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.fc = nn.Sequential(\n", - " nn.Linear(64 * 4 * 4, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, num_classes))\n", - "\n", - " def forward(self, x):\n", - " x = self.layer1(x)\n", - " x = self.layer2(x)\n", - " x = self.layer3(x)\n", - " x = self.layer4(x)\n", - " x = self.layer5(x)\n", - " x = x.view(x.size(0), -1)\n", - " x = self.fc(x)\n", - " return x\n", - "\n", - "model = Net(in_channels=n_channels, num_classes=n_classes)\n", - " \n", - "# define loss function and optimizer\n", - "if task == \"multi-label, binary-class\":\n", - " criterion = nn.BCEWithLogitsLoss()\n", - "else:\n", - " criterion = nn.CrossEntropyLoss()\n", - " \n", - "optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2154486", - "metadata": {}, - "outputs": [], - "source": [ - "print(model)" - ] - }, - { - "cell_type": "markdown", - "id": "8d1c78ee", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59831bcd", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ff463bd", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "train_custom_params={'criterion':criterion,'task':task}\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**train_custom_params)\n", - "@TI.register_fl_task(model='model', data_loader='train_loader',\n", - " device='device', optimizer='optimizer')\n", - "def train(model, train_loader, device, optimizer, criterion, task):\n", - " total_loss = []\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " model.train()\n", - " model.to(device)\n", - " \n", - " for inputs, targets in train_loader:\n", - " \n", - " optimizer.zero_grad()\n", - " outputs = model(inputs.to(device))\n", - " \n", - " if task == 'multi-label, binary-class':\n", - " targets = targets.to(torch.float32).to(device)\n", - " loss = criterion(outputs, targets)\n", - " else:\n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " \n", - " total_loss.append(loss.item())\n", - " \n", - " loss.backward()\n", - " optimizer.step()\n", - "\n", - " return {'train_loss': np.mean(total_loss),}\n", - "\n", - "\n", - "\n", - "val_custom_params={'criterion':criterion, \n", - " 'task':task}\n", - "\n", - "@TI.add_kwargs(**val_custom_params)\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device, criterion, task):\n", - "\n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " model.eval()\n", - " model.to(device)\n", - "\n", - " val_score = 0\n", - " total_samples = 0\n", - " total_loss = []\n", - " y_score = torch.tensor([]).to(device)\n", - "\n", - " with torch.no_grad():\n", - " for inputs, targets in val_loader:\n", - " outputs = model(inputs.to(device))\n", - " \n", - " if task == 'multi-label, binary-class':\n", - " targets = targets.to(torch.float32).to(device)\n", - " loss = criterion(outputs, targets)\n", - " m = nn.Sigmoid()\n", - " outputs = m(outputs).to(device)\n", - " else:\n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " m = nn.Softmax(dim=1)\n", - " outputs = m(outputs).to(device)\n", - " targets = targets.float().resize_(len(targets), 1)\n", - "\n", - " total_loss.append(loss.item())\n", - " \n", - " total_samples += targets.shape[0]\n", - " pred = outputs.argmax(dim=1)\n", - " val_score += pred.eq(targets).sum().cpu().numpy()\n", - " \n", - " acc = val_score / total_samples \n", - " test_loss = sum(total_loss) / len(total_loss)\n", - "\n", - " return {'acc': acc,\n", - " 'test_loss': test_loss,\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'medmnist_exp'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=3,\n", - " opt_treatment='RESET',\n", - " device_assignment_policy='CUDA_PREFERRED')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fa7cea", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92940763", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1690ea49", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.8.10 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "vscode": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/README.md deleted file mode 100644 index 6c5c516c62..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# MedMNIST 3D Classification Tutorial - -![MedMNISTv2_overview](https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/assets/medmnistv2.jpg) - -For more details, please refer to the original paper: -**MedMNIST v2: A Large-Scale Lightweight Benchmark for 2D and 3D Biomedical Image Classification** ([arXiv](https://arxiv.org/abs/2110.14795)) -or visit the MedMNIST [PyPI](https://pypi.org/project/medmnist/). - -## **I. About model and experiments** - -We use a simple convolutional neural network and settings coming from [the experiments](https://github.com/MedMNIST/experiments) repository. -
-
- -## **II. How to run this tutorial (without TLC and locally as a simulation):** -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_MedMNIST_3D - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab Pytorch_MedMNIST_3D.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the Pytorch_MedMNIST_3D.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/director/director_config.yaml deleted file mode 100644 index 14c8152587..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['28', '28', '28'] - target_shape: ['1','1'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/envoy_config.yaml deleted file mode 100644 index 7eea1a6f2d..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/envoy_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: medmnist_shard_descriptor.MedMNISTShardDescriptor - params: - rank_worldsize: 1, 1 - datapath: data/. - dataname: synapsemnist3d diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/medmnist_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/medmnist_shard_descriptor.py deleted file mode 100644 index 5f490a6944..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/medmnist_shard_descriptor.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""MedMNIST Shard Descriptor.""" - -import logging -import os -from typing import Any, List, Tuple -from medmnist.info import INFO, HOMEPAGE - -import numpy as np - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MedMNISTShardDataset(ShardDataset): - """MedMNIST Shard dataset class.""" - - def __init__(self, x, y, data_type: str = 'train', rank: int = 1, worldsize: int = 1) -> None: - """Initialize MedMNISTDataset.""" - self.data_type = data_type - self.rank = rank - self.worldsize = worldsize - self.x = x[self.rank - 1::self.worldsize] - self.y = y[self.rank - 1::self.worldsize] - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """Return an item by the index.""" - return self.x[index], self.y[index] - - def __len__(self) -> int: - """Return the len of the dataset.""" - return len(self.x) - - -class MedMNISTShardDescriptor(ShardDescriptor): - """MedMNIST Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - datapath: str = '', - dataname: str = 'bloodmnist', - **kwargs - ) -> None: - """Initialize MedMNISTShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.datapath = datapath - self.dataset_name = dataname - self.info = INFO[self.dataset_name] - - (x_train, y_train), (x_test, y_test) = self.load_data() - self.data_by_type = { - 'train': (x_train, y_train), - 'val': (x_test, y_test) - } - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train') -> MedMNISTShardDataset: - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return MedMNISTShardDataset( - *self.data_by_type[dataset_type], - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return ['28', '28', '28'] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return ['1', '1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'MedMNIST dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - @staticmethod - def download_data(datapath: str = 'data/', - dataname: str = 'bloodmnist', - info: dict = {}) -> None: - - logger.info(f"{datapath}\n{dataname}\n{info}") - try: - from torchvision.datasets.utils import download_url - download_url(url=info["url"], - root=datapath, - filename=dataname, - md5=info["MD5"]) - except Exception: - raise RuntimeError('Something went wrong when downloading! ' - + 'Go to the homepage to download manually. ' - + HOMEPAGE) - - def load_data(self) -> Tuple[Tuple[Any, Any], Tuple[Any, Any]]: - """Download prepared dataset.""" - - dataname = self.dataset_name + '.npz' - dataset = os.path.join(self.datapath, dataname) - - if not os.path.isfile(dataset): - logger.info(f"Dataset {dataname} not found at:{self.datapath}.\n\tDownloading...") - MedMNISTShardDescriptor.download_data(self.datapath, dataname, self.info) - logger.info("DONE!") - - data = np.load(dataset) - - x_train = data["train_images"] - x_test = data["test_images"] - - y_train = data["train_labels"] - y_test = data["test_labels"] - logger.info('MedMNIST data was loaded!') - return (x_train, y_train), (x_test, y_test) diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/requirements.txt deleted file mode 100644 index 824405557e..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -ACSConv -medmnist -protobuf>=3.20.2 # not directly required, pinned by Snyk to avoid a vulnerability -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/start_envoy.sh deleted file mode 100755 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/Pytorch_MedMNIST_3D.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/Pytorch_MedMNIST_3D.ipynb deleted file mode 100644 index a400b6faa7..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/Pytorch_MedMNIST_3D.ipynb +++ /dev/null @@ -1,739 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated MedMNIST3D " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9d99476", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install medmnist\n", - "!pip install ACSConv" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0570122", - "metadata": {}, - "outputs": [], - "source": [ - "# Install dependencies if not already installed\n", - "import tqdm\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import Dataset, DataLoader\n", - "from torchvision import transforms as T\n", - "import torch.nn.functional as F\n", - "\n", - "import medmnist" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "director_node_fqdn = 'localhost'\n", - "director_port=50051\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experimen" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "9b3081a6", - "metadata": {}, - "source": [ - "## Load MedMNIST INFO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e0377d3a", - "metadata": {}, - "outputs": [], - "source": [ - "from medmnist import INFO, Evaluator\n", - "\n", - "data_flag = 'synapsemnist3d'\n", - "num_epochs = 1\n", - "batch_size = 128\n", - "\n", - "lr = 0.001\n", - "\n", - "info = INFO[data_flag]\n", - "task = info['task']\n", - "n_channels = info['n_channels']\n", - "n_classes = len(info['label'])\n", - "\n", - "print(info)" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0dc457e", - "metadata": {}, - "outputs": [], - "source": [ - "from wspace_utils.utils import Transform3D, model_to_syncbn\n", - "shape_transform = False\n", - "\n", - "train_transform = Transform3D(mul='random') if shape_transform else Transform3D()\n", - "eval_transform = Transform3D(mul='0.5') if shape_transform else Transform3D()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "09ba2f64", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image\n", - "\n", - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None, as_rgb=False):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - " self.as_rgb = as_rgb\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " \n", - " img, label = self.dataset[index]\n", - " \n", - " if self.target_transform:\n", - " label = self.target_transform(label) \n", - " else:\n", - " label = label.astype(int)\n", - " \n", - " img = np.stack([img/255.]*(3 if self.as_rgb else 1), axis=0) \n", - " if self.transform is not None:\n", - " img = self.transform(img)\n", - "\n", - " if self.target_transform is not None:\n", - " target = self.target_transform(target)\n", - "\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db2d563e", - "metadata": {}, - "outputs": [], - "source": [ - "class MedMnistFedDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - "\n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=train_transform, as_rgb=False\n", - " ) \n", - " \n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=eval_transform, as_rgb=False\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(\n", - " self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True)\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create Mnist federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MedMnistFedDataset(train_bs=64, valid_bs=512)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f63908e", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset.shard_descriptor = dummy_shard_desc\n", - "for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):\n", - " sample = (np.array(sample))\n", - " print(sample.shape, target.shape)\n", - " \n", - "print(f\"dtype = {sample.dtype}, type = {type(sample)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "f005760c", - "metadata": {}, - "source": [ - "## Describe the model and optimizer" - ] - }, - { - "cell_type": "markdown", - "id": "12e343b3", - "metadata": {}, - "source": [ - "## ResNet (compute intensive)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d47f74d2", - "metadata": {}, - "outputs": [], - "source": [ - "class BasicBlock(nn.Module):\n", - " expansion = 1\n", - "\n", - " def __init__(self, in_planes, planes, stride=1):\n", - " super(BasicBlock, self).__init__()\n", - " self.conv1 = nn.Conv2d(\n", - " in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)\n", - " self.bn1 = nn.BatchNorm2d(planes)\n", - " \n", - " self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,\n", - " stride=1, padding=1, bias=False)\n", - " self.bn2 = nn.BatchNorm2d(planes)\n", - "\n", - " self.shortcut = nn.Sequential()\n", - " if stride != 1 or in_planes != self.expansion*planes:\n", - " self.shortcut = nn.Sequential(\n", - " nn.Conv2d(in_planes, self.expansion*planes,\n", - " kernel_size=1, stride=stride, bias=False),\n", - " nn.BatchNorm2d(self.expansion*planes)\n", - " \n", - " )\n", - "\n", - " def forward(self, x):\n", - " out = F.relu(self.bn1(self.conv1(x)))\n", - " out = self.bn2(self.conv2(out))\n", - " out += self.shortcut(x)\n", - " out = F.relu(out)\n", - " return out\n", - "\n", - "\n", - "class Bottleneck(nn.Module):\n", - " expansion = 4\n", - "\n", - " def __init__(self, in_planes, planes, stride=1):\n", - " super(Bottleneck, self).__init__()\n", - " self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)\n", - " self.bn1 = nn.BatchNorm2d(planes)\n", - " self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,\n", - " stride=stride, padding=1, bias=False)\n", - " self.bn2 = nn.BatchNorm2d(planes)\n", - " self.conv3 = nn.Conv2d(planes, self.expansion *\n", - " planes, kernel_size=1, bias=False)\n", - " self.bn3 = nn.BatchNorm2d(self.expansion*planes)\n", - "\n", - " self.shortcut = nn.Sequential()\n", - " if stride != 1 or in_planes != self.expansion*planes:\n", - " self.shortcut = nn.Sequential(\n", - " nn.Conv2d(in_planes, self.expansion*planes,\n", - " kernel_size=1, stride=stride, bias=False),\n", - " nn.BatchNorm2d(self.expansion*planes)\n", - " )\n", - "\n", - " def forward(self, x):\n", - " out = F.relu(self.bn1(self.conv1(x)))\n", - " out = F.relu(self.bn2(self.conv2(out)))\n", - " out = self.bn3(self.conv3(out))\n", - " out += self.shortcut(x)\n", - " out = F.relu(out)\n", - " return out\n", - "\n", - "\n", - "class ResNet(nn.Module):\n", - " def __init__(self, block, num_blocks, in_channels=1, num_classes=2):\n", - " super(ResNet, self).__init__()\n", - " self.in_planes = 64\n", - "\n", - " self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3,\n", - " stride=1, padding=1, bias=False)\n", - " self.bn1 = nn.BatchNorm2d(64)\n", - " self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)\n", - " self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)\n", - " self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)\n", - " self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)\n", - " self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n", - " self.linear = nn.Linear(512 * block.expansion, num_classes)\n", - "\n", - " def _make_layer(self, block, planes, num_blocks, stride):\n", - " strides = [stride] + [1]*(num_blocks-1)\n", - " layers = []\n", - " for stride in strides:\n", - " layers.append(block(self.in_planes, planes, stride))\n", - " self.in_planes = planes * block.expansion\n", - " return nn.Sequential(*layers)\n", - "\n", - " def forward(self, x):\n", - " out = F.relu(self.bn1(self.conv1(x)))\n", - " out = self.layer1(out)\n", - " out = self.layer2(out)\n", - " out = self.layer3(out)\n", - " out = self.layer4(out)\n", - " out = self.avgpool(out)\n", - " out = out.view(out.size(0), -1)\n", - " out = self.linear(out)\n", - " return out\n", - "\n", - "\n", - "def ResNet18(in_channels, num_classes):\n", - " return ResNet(BasicBlock, [2, 2, 2, 2], in_channels=in_channels, num_classes=num_classes)\n", - "\n", - "\n", - "def ResNet50(in_channels, num_classes):\n", - " return ResNet(Bottleneck, [3, 4, 6, 3], in_channels=in_channels, num_classes=num_classes)" - ] - }, - { - "cell_type": "markdown", - "id": "073ae903", - "metadata": {}, - "source": [ - "## Simple 3D Net - Demo example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7423423a", - "metadata": {}, - "outputs": [], - "source": [ - "# define a simple CNN model\n", - "class Net(nn.Module):\n", - " def __init__(self, in_channels, num_classes):\n", - " super(Net, self).__init__()\n", - "\n", - " self.layer1 = nn.Sequential(\n", - " nn.Conv2d(in_channels, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU())\n", - "\n", - " self.layer2 = nn.Sequential(\n", - " nn.Conv2d(16, 16, kernel_size=3),\n", - " nn.BatchNorm2d(16),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.layer3 = nn.Sequential(\n", - " nn.Conv2d(16, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - " \n", - " self.layer4 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU())\n", - "\n", - " self.layer5 = nn.Sequential(\n", - " nn.Conv2d(64, 64, kernel_size=3, padding=1),\n", - " nn.BatchNorm2d(64),\n", - " nn.ReLU(),\n", - " nn.MaxPool2d(kernel_size=2, stride=2))\n", - "\n", - " self.fc = nn.Sequential(\n", - " nn.Linear(64 * 4 * 4 * 4, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, 128),\n", - " nn.ReLU(),\n", - " nn.Linear(128, num_classes))\n", - "\n", - " def forward(self, x):\n", - " x = self.layer1(x)\n", - " x = self.layer2(x)\n", - " x = self.layer3(x)\n", - " x = self.layer4(x)\n", - " x = self.layer5(x)\n", - " x = x.view(x.size(0), -1)\n", - " x = self.fc(x)\n", - " return x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15e0294b", - "metadata": {}, - "outputs": [], - "source": [ - "from acsconv.converters import ACSConverter, Conv3dConverter, Conv2_5dConverter\n", - "\n", - "#model = ResNet18(in_channels=n_channels, num_classes=n_classes)\n", - "model = Net(in_channels=n_channels, num_classes=n_classes)\n", - "model = model_to_syncbn(Conv3dConverter(model, i3d_repeat_axis=None))\n", - "\n", - "criterion = nn.CrossEntropyLoss()\n", - "optimizer = torch.optim.Adam(model.parameters(), lr=lr)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2154486", - "metadata": {}, - "outputs": [], - "source": [ - "print(model)" - ] - }, - { - "cell_type": "markdown", - "id": "8d1c78ee", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59831bcd", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model.model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model.model)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a84be2", - "metadata": {}, - "outputs": [], - "source": [ - "from medmnist.evaluator import getACC" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ff463bd", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "train_custom_params={'criterion':criterion,'task':task}\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@TI.add_kwargs(**train_custom_params)\n", - "@TI.register_fl_task(model='model', data_loader='train_loader',\n", - " device='device', optimizer='optimizer')\n", - "def train(model, train_loader, device, optimizer, criterion, task):\n", - " total_loss = []\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " model.train()\n", - " model.to(device)\n", - " for inputs, targets in train_loader:\n", - " \n", - " inputs, targets = inputs.to(device), targets.to(device)\n", - " \n", - " optimizer.zero_grad()\n", - " outputs = model(inputs.to(device))\n", - " \n", - " if task == 'multi-label, binary-class':\n", - " targets = targets.to(torch.float32).to(device)\n", - " loss = criterion(outputs, targets)\n", - " else:\n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " \n", - " total_loss.append(loss.item())\n", - " \n", - " loss.backward()\n", - " optimizer.step()\n", - " \n", - " return {'train_loss': np.mean(total_loss),}\n", - "\n", - "\n", - "val_custom_params={'criterion':criterion,'task':task, 'acc_fn':getACC}\n", - "\n", - "@TI.add_kwargs(**val_custom_params)\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device, criterion, task, acc_fn):\n", - "\n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " model.eval()\n", - " model.to(device)\n", - "\n", - " val_score = 0\n", - " total_samples = 0\n", - " total_loss = []\n", - " y_score = torch.tensor([]).to(device)\n", - "\n", - " with torch.no_grad():\n", - " for inputs, targets in val_loader:\n", - " outputs = model(inputs.to(device))\n", - " \n", - " targets = torch.squeeze(targets, 1).long().to(device)\n", - " loss = criterion(outputs, targets)\n", - " m = nn.Softmax(dim=1)\n", - " outputs = m(outputs).to(device)\n", - " targets = targets.float().resize_(len(targets), 1)\n", - "\n", - " total_loss.append(loss.item())\n", - "\n", - " y_score = torch.cat((y_score, outputs), 0)\n", - "\n", - " y_score = y_score.detach().cpu().numpy()\n", - " acc = acc_fn(targets, y_score, task)\n", - "\n", - " test_loss = sum(total_loss) / len(total_loss)\n", - "\n", - " return {'acc': acc,\n", - " 'test_loss': test_loss,\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'medmnist_exp'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=3,\n", - " opt_treatment='RESET',\n", - " device_assignment_policy='CUDA_PREFERRED')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fa7cea", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - }, - { - "cell_type": "markdown", - "id": "ef600100", - "metadata": {}, - "source": [ - "### " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd975242", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "openfl", - "language": "python", - "name": "openfl" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/__init__.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/__init__.py deleted file mode 100644 index fbe97a05be..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from .utils import Transform3D -from .utils import model_to_syncbn - -__all__ = ["Transform3D", "model_to_syncbn"] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/batchnorm.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/batchnorm.py deleted file mode 100644 index 29ab79e81a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/batchnorm.py +++ /dev/null @@ -1,412 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -# -*- coding: utf-8 -*- -# File batchnorm.py -# Author Jiayuan Mao -# Email maojiayuan@gmail.com -# Date 27/01/2018 -# -# This file is part of Synchronized-BatchNorm-PyTorch. -# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch -# Distributed under MIT License. - -import collections -import contextlib - -import torch -import torch.nn.functional as F - -from torch.nn.modules.batchnorm import _BatchNorm - -try: - from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast -except ImportError: - ReduceAddCoalesced = Broadcast = None - -try: - from jactorch.parallel.comm import SyncMaster - from jactorch.parallel.data_parallel import JacDataParallel as DataParallelWithCallback -except ImportError: - from .comm import SyncMaster - from .replicate import DataParallelWithCallback - -__all__ = [ - 'SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d', - 'patch_sync_batchnorm', 'convert_model' -] - - -def _sum_ft(tensor): - """sum over the first and last dimention""" - return tensor.sum(dim=0).sum(dim=-1) - - -def _unsqueeze_ft(tensor): - """add new dimensions at the front and the tail""" - return tensor.unsqueeze(0).unsqueeze(-1) - - -_ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size']) -_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) - - -class _SynchronizedBatchNorm(_BatchNorm): - def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): - # TODO: remove assert - error_text = 'Can not use Synchronized Batch Normalization without CUDA support.' - assert ReduceAddCoalesced is not None, error_text - - super(_SynchronizedBatchNorm, self).__init__( - num_features, - eps=eps, - momentum=momentum, - affine=affine - ) - - self._sync_master = SyncMaster(self._data_parallel_master) - - self._is_parallel = False - self._parallel_id = None - self._slave_pipe = None - - def forward(self, input): - # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation. - if not (self._is_parallel and self.training): - return F.batch_norm( - input, self.running_mean, self.running_var, self.weight, self.bias, - self.training, self.momentum, self.eps) - - # Resize the input to (B, C, -1). - input_shape = input.size() - input = input.view(input.size(0), self.num_features, -1) - - # Compute the sum and square-sum. - sum_size = input.size(0) * input.size(2) - input_sum = _sum_ft(input) - input_ssum = _sum_ft(input ** 2) - child_message = _ChildMessage(input_sum, input_ssum, sum_size) - - # Reduce-and-broadcast the statistics. - if self._parallel_id == 0: - mean, inv_std = self._sync_master.run_master(child_message) - else: - mean, inv_std = self._slave_pipe.run_slave(child_message) - - # Compute the output. - if self.affine: - # MJY:: Fuse the multiplication for speed. - output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft( - inv_std * self.weight - ) + _unsqueeze_ft(self.bias) - else: - output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std) - - # Reshape it. - return output.view(input_shape) - - def __data_parallel_replicate__(self, ctx, copy_id): - self._is_parallel = True - self._parallel_id = copy_id - - # parallel_id == 0 means master device. - if self._parallel_id == 0: - ctx.sync_master = self._sync_master - else: - self._slave_pipe = ctx.sync_master.register_slave(copy_id) - - def _data_parallel_master(self, intermediates): - """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" - - # Always using same "device order" makes the ReduceAdd operation faster. - # Thanks to:: Tete Xiao (http://tetexiao.com/) - intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) - - to_reduce = [i[1][:2] for i in intermediates] - to_reduce = [j for i in to_reduce for j in i] # flatten - target_gpus = [i[1].sum.get_device() for i in intermediates] - - sum_size = sum([i[1].sum_size for i in intermediates]) - sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) - mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) - - broadcasted = Broadcast.apply(target_gpus, mean, inv_std) - - outputs = [] - for i, rec in enumerate(intermediates): - outputs.append((rec[0], _MasterMessage(*broadcasted[i * 2:i * 2 + 2]))) - - return outputs - - def _compute_mean_std(self, sum_, ssum, size): - """Compute the mean and standard-deviation with sum and square-sum. This method - also maintains the moving average on the master device.""" - assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' - mean = sum_ / size - sumvar = ssum - sum_ * mean - unbias_var = sumvar / (size - 1) - bias_var = sumvar / size - - if hasattr(torch, 'no_grad'): - with torch.no_grad(): - self.running_mean = (1 - self.momentum) * self.running_mean - self.running_mean += self.momentum * mean.data - self.running_var = (1 - self.momentum) * self.running_var - self.running_var += self.momentum * unbias_var.data - else: - self.running_mean = (1 - self.momentum) * self.running_mean - self.running_mean += self.momentum * mean.data - self.running_var = (1 - self.momentum) * self.running_var - self.running_var += self.momentum * unbias_var.data - - return mean, bias_var.clamp(self.eps) ** -0.5 - - -class SynchronizedBatchNorm1d(_SynchronizedBatchNorm): - r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a - mini-batch. - - .. math:: - - y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta - - This module differs from the built-in PyTorch BatchNorm1d as the mean and - standard-deviation are reduced across all devices during training. - - For example, when one uses `nn.DataParallel` to wrap the network during - training, PyTorch's implementation normalize the tensor on each device using - the statistics only on that device, which accelerated the computation and - is also easy to implement, but the statistics might be inaccurate. - Instead, in this synchronized version, the statistics will be computed - over all training samples distributed on multiple devices. - - Note that, for one-GPU or CPU-only case, this module behaves exactly same - as the built-in PyTorch implementation. - - The mean and standard-deviation are calculated per-dimension over - the mini-batches and gamma and beta are learnable parameter vectors - of size C (where C is the input size). - - During training, this layer keeps a running estimate of its computed mean - and variance. The running sum is kept with a default momentum of 0.1. - - During evaluation, this running mean/variance is used for normalization. - - Because the BatchNorm is done over the `C` dimension, computing statistics - on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm - - Args: - num_features: num_features from an expected input of size - `batch_size x num_features [x width]` - eps: a value added to the denominator for numerical stability. - Default: 1e-5 - momentum: the value used for the running_mean and running_var - computation. Default: 0.1 - affine: a boolean value that when set to ``True``, gives the layer learnable - affine parameters. Default: ``True`` - - Shape:: - - Input: :math:`(N, C)` or :math:`(N, C, L)` - - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) - - Examples: - >>> # With Learnable Parameters - >>> m = SynchronizedBatchNorm1d(100) - >>> # Without Learnable Parameters - >>> m = SynchronizedBatchNorm1d(100, affine=False) - >>> input = torch.autograd.Variable(torch.randn(20, 100)) - >>> output = m(input) - """ - - def _check_input_dim(self, input): - if input.dim() != 2 and input.dim() != 3: - raise ValueError('expected 2D or 3D input (got {}D input)' - .format(input.dim())) - super(SynchronizedBatchNorm1d, self)._check_input_dim(input) - - -class SynchronizedBatchNorm2d(_SynchronizedBatchNorm): - r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch - of 3d inputs - - .. math:: - - y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta - - This module differs from the built-in PyTorch BatchNorm2d as the mean and - standard-deviation are reduced across all devices during training. - - For example, when one uses `nn.DataParallel` to wrap the network during - training, PyTorch's implementation normalize the tensor on each device using - the statistics only on that device, which accelerated the computation and - is also easy to implement, but the statistics might be inaccurate. - Instead, in this synchronized version, the statistics will be computed - over all training samples distributed on multiple devices. - - Note that, for one-GPU or CPU-only case, this module behaves exactly same - as the built-in PyTorch implementation. - - The mean and standard-deviation are calculated per-dimension over - the mini-batches and gamma and beta are learnable parameter vectors - of size C (where C is the input size). - - During training, this layer keeps a running estimate of its computed mean - and variance. The running sum is kept with a default momentum of 0.1. - - During evaluation, this running mean/variance is used for normalization. - - Because the BatchNorm is done over the `C` dimension, computing statistics - on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm - - Args: - num_features: num_features from an expected input of - size batch_size x num_features x height x width - eps: a value added to the denominator for numerical stability. - Default: 1e-5 - momentum: the value used for the running_mean and running_var - computation. Default: 0.1 - affine: a boolean value that when set to ``True``, gives the layer learnable - affine parameters. Default: ``True`` - - Shape:: - - Input: :math:`(N, C, H, W)` - - Output: :math:`(N, C, H, W)` (same shape as input) - - Examples: - >>> # With Learnable Parameters - >>> m = SynchronizedBatchNorm2d(100) - >>> # Without Learnable Parameters - >>> m = SynchronizedBatchNorm2d(100, affine=False) - >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45)) - >>> output = m(input) - """ - - def _check_input_dim(self, input): - if input.dim() != 4: - raise ValueError('expected 4D input (got {}D input)' - .format(input.dim())) - super(SynchronizedBatchNorm2d, self)._check_input_dim(input) - - -class SynchronizedBatchNorm3d(_SynchronizedBatchNorm): - r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch - of 4d inputs - - .. math:: - - y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta - - This module differs from the built-in PyTorch BatchNorm3d as the mean and - standard-deviation are reduced across all devices during training. - - For example, when one uses `nn.DataParallel` to wrap the network during - training, PyTorch's implementation normalize the tensor on each device using - the statistics only on that device, which accelerated the computation and - is also easy to implement, but the statistics might be inaccurate. - Instead, in this synchronized version, the statistics will be computed - over all training samples distributed on multiple devices. - - Note that, for one-GPU or CPU-only case, this module behaves exactly same - as the built-in PyTorch implementation. - - The mean and standard-deviation are calculated per-dimension over - the mini-batches and gamma and beta are learnable parameter vectors - of size C (where C is the input size). - - During training, this layer keeps a running estimate of its computed mean - and variance. The running sum is kept with a default momentum of 0.1. - - During evaluation, this running mean/variance is used for normalization. - - Because the BatchNorm is done over the `C` dimension, computing statistics - on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm - or Spatio-temporal BatchNorm - - Args: - num_features: num_features from an expected input of - size batch_size x num_features x depth x height x width - eps: a value added to the denominator for numerical stability. - Default: 1e-5 - momentum: the value used for the running_mean and running_var - computation. Default: 0.1 - affine: a boolean value that when set to ``True``, gives the layer learnable - affine parameters. Default: ``True`` - - Shape:: - - Input: :math:`(N, C, D, H, W)` - - Output: :math:`(N, C, D, H, W)` (same shape as input) - - Examples: - >>> # With Learnable Parameters - >>> m = SynchronizedBatchNorm3d(100) - >>> # Without Learnable Parameters - >>> m = SynchronizedBatchNorm3d(100, affine=False) - >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10)) - >>> output = m(input) - """ - - def _check_input_dim(self, input): - if input.dim() != 5: - raise ValueError('expected 5D input (got {}D input)' - .format(input.dim())) - super(SynchronizedBatchNorm3d, self)._check_input_dim(input) - - -@contextlib.contextmanager -def patch_sync_batchnorm(): - import torch.nn as nn - - backup = nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d - - nn.BatchNorm1d = SynchronizedBatchNorm1d - nn.BatchNorm2d = SynchronizedBatchNorm2d - nn.BatchNorm3d = SynchronizedBatchNorm3d - - yield - - nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d = backup - - -def convert_model(module): - """Traverse the input module and its child recursively - and replace all instance of torch.nn.modules.batchnorm.BatchNorm*N*d - to SynchronizedBatchNorm*N*d - - Args: - module: the input module needs to be convert to SyncBN model - - Examples: - >>> import torch.nn as nn - >>> import torchvision - >>> # m is a standard pytorch model - >>> m = torchvision.models.resnet18(True) - >>> m = nn.DataParallel(m) - >>> # after convert, m is using SyncBN - >>> m = convert_model(m) - """ - if isinstance(module, torch.nn.DataParallel): - mod = module.module - mod = convert_model(mod) - mod = DataParallelWithCallback(mod) - return mod - - mod = module - for pth_module, sync_module in zip([torch.nn.modules.batchnorm.BatchNorm1d, - torch.nn.modules.batchnorm.BatchNorm2d, - torch.nn.modules.batchnorm.BatchNorm3d], - [SynchronizedBatchNorm1d, - SynchronizedBatchNorm2d, - SynchronizedBatchNorm3d]): - if isinstance(module, pth_module): - mod = sync_module(module.num_features, module.eps, module.momentum, module.affine) - mod.running_mean = module.running_mean - mod.running_var = module.running_var - if module.affine: - mod.weight.data = module.weight.data.clone().detach() - mod.bias.data = module.bias.data.clone().detach() - - for name, child in module.named_children(): - mod.add_module(name, convert_model(child)) - - return mod diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/comm.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/comm.py deleted file mode 100644 index 1f5ceca1ca..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/comm.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# -*- coding: utf-8 -*- -# File comm.py -# Author Jiayuan Mao -# Email maojiayuan@gmail.com -# Date 27/01/2018 -# -# This file is part of Synchronized-BatchNorm-PyTorch. -# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch -# Distributed under MIT License. - -import queue -import collections -import threading - -__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] - - -class FutureResult(object): - """A thread-safe future implementation. Used only as one-to-one pipe.""" - - def __init__(self): - self._result = None - self._lock = threading.Lock() - self._cond = threading.Condition(self._lock) - - def put(self, result): - with self._lock: - assert self._result is None, 'Previous result has\'t been fetched.' - self._result = result - self._cond.notify() - - def get(self): - with self._lock: - if self._result is None: - self._cond.wait() - - res = self._result - self._result = None - return res - - -_MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) -_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) - - -class SlavePipe(_SlavePipeBase): - """Pipe for master-slave communication.""" - - def run_slave(self, msg): - self.queue.put((self.identifier, msg)) - ret = self.result.get() - self.queue.put(True) - return ret - - -class SyncMaster(object): - """An abstract `SyncMaster` object. - - - During the replication, as the data parallel will trigger an callback of each module, - all slave devices should call `register(id)` and obtain an `SlavePipe` - to communicate with the master. - - During the forward pass, master device invokes `run_master`, - all messages from slave devices will be collected, and passed to a registered callback. - - After receiving the messages, the master device should gather the information - and determine to message passed back to each slave devices. - """ - - def __init__(self, master_callback): - """ - - Args: - master_callback: a callback to be invoked - after having collected messages from slave devices. - """ - self._master_callback = master_callback - self._queue = queue.Queue() - self._registry = collections.OrderedDict() - self._activated = False - - def __getstate__(self): - return {'master_callback': self._master_callback} - - def __setstate__(self, state): - self.__init__(state['master_callback']) - - def register_slave(self, identifier): - """ - Register an slave device. - - Args: - identifier: an identifier, usually is the device id. - - Returns: a `SlavePipe` object which can be used to communicate with the master device. - - """ - if self._activated: - assert self._queue.empty(), 'Queue is not clean before next initialization.' - self._activated = False - self._registry.clear() - future = FutureResult() - self._registry[identifier] = _MasterRegistry(future) - return SlavePipe(identifier, self._queue, future) - - def run_master(self, master_msg): - """ - Main entry for the master device in each forward pass. - The messages were first collected from each devices (including the master device), and then - an callback will be invoked to compute the message to be sent back to each devices - (including the master device). - - Args: - master_msg: the message that the master want to send to itself. - This will be placed as the first message when calling `master_callback`. - For detailed usage, see `_SynchronizedBatchNorm` for an example. - - Returns: the message to be sent back to the master device. - - """ - self._activated = True - - intermediates = [(0, master_msg)] - for i in range(self.nr_slaves): - intermediates.append(self._queue.get()) - - results = self._master_callback(intermediates) - assert results[0][0] == 0, 'The first result should belongs to the master.' - - for i, res in results: - if i == 0: - continue - self._registry[i].result.put(res) - - for i in range(self.nr_slaves): - assert self._queue.get() is True - - return results[0][1] - - @property - def nr_slaves(self): - return len(self._registry) diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/replicate.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/replicate.py deleted file mode 100644 index 6edc8ab4d1..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/replicate.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# -*- coding: utf-8 -*- -# File replicate.py -# Author Jiayuan Mao -# Email maojiayuan@gmail.com -# Date 27/01/2018 -# -# This file is part of Synchronized-BatchNorm-PyTorch. -# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch -# Distributed under MIT License. - -import functools - -from torch.nn.parallel.data_parallel import DataParallel - -__all__ = [ - 'CallbackContext', - 'execute_replication_callbacks', - 'DataParallelWithCallback', - 'patch_replication_callback' -] - - -class CallbackContext(object): - pass - - -def execute_replication_callbacks(modules): - """ - Execute an replication callback `__data_parallel_replicate__` - on each module created by original replication. - - The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` - - Note that, as all modules are isomorphism, we assign each sub-module with a context - (shared among multiple copies of this module on different devices). - Through this context, different copies can share some information. - - We guarantee that the callback on the master copy (the first copy) - will be called ahead of calling the callback of any slave copies. - """ - master_copy = modules[0] - nr_modules = len(list(master_copy.modules())) - ctxs = [CallbackContext() for _ in range(nr_modules)] - - for i, module in enumerate(modules): - for j, m in enumerate(module.modules()): - if hasattr(m, '__data_parallel_replicate__'): - m.__data_parallel_replicate__(ctxs[j], i) - - -class DataParallelWithCallback(DataParallel): - """ - Data Parallel with a replication callback. - - An replication callback `__data_parallel_replicate__` of each module - will be invoked after being created by original `replicate` function. - The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` - - Examples: - > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) - > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) - # sync_bn.__data_parallel_replicate__ will be invoked. - """ - - def replicate(self, module, device_ids): - modules = super(DataParallelWithCallback, self).replicate(module, device_ids) - execute_replication_callbacks(modules) - return modules - - -def patch_replication_callback(data_parallel): - """ - Monkey-patch an existing `DataParallel` object. Add the replication callback. - Useful when you have customized `DataParallel` implementation. - - Examples: - > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) - > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) - > patch_replication_callback(sync_bn) - # this is equivalent to - > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) - > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) - """ - - assert isinstance(data_parallel, DataParallel) - - old_replicate = data_parallel.replicate - - @functools.wraps(old_replicate) - def new_replicate(module, device_ids): - modules = old_replicate(module, device_ids) - execute_replication_callbacks(modules) - return modules - - data_parallel.replicate = new_replicate diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/utils.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/utils.py deleted file mode 100644 index 91ffa9c7f8..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_MedMNIST_3D/workspace/wspace_utils/utils.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import torch.nn as nn -import numpy as np - - -class Transform3D: - - def __init__(self, mul=None): - self.mul = mul - - def __call__(self, voxel): - - if self.mul == '0.5': - voxel = voxel * 0.5 - elif self.mul == 'random': - voxel = voxel * np.random.uniform() - - return voxel.astype(np.float32) - - -def model_to_syncbn(model): - preserve_state_dict = model.state_dict() - _convert_module_from_bn_to_syncbn(model) - model.load_state_dict(preserve_state_dict) - return model - - -def _convert_module_from_bn_to_syncbn(module): - for child_name, child in module.named_children(): - if ( - hasattr(nn, child.__class__.__name__) - and 'batchnorm' in child.__class__.__name__.lower() - ): - TargetClass = globals()['Synchronized' + child.__class__.__name__] # noqa: N806 - arguments = TargetClass.__init__.__code__.co_varnames[1:] - kwargs = {k: getattr(child, k) for k in arguments} - setattr(module, child_name, TargetClass(**kwargs)) - else: - _convert_module_from_bn_to_syncbn(child) diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/README.md deleted file mode 100644 index 5e066babfb..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# PyTorch_TinyImageNet - -## **How to run this tutorial (without TLC and locally as a simulation):** -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_TinyImageNet - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab pytorch_tinyimagenet.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the pytorch_tinyimagenet.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/director_config.yaml deleted file mode 100644 index 3fc4137943..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['64', '64', '3'] - target_shape: ['64', '64'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/envoy_config.yaml deleted file mode 100644 index cb04056976..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/envoy_config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: tinyimagenet_shard_descriptor.TinyImageNetShardDescriptor - params: - data_folder: tinyimagenet_data - rank_worldsize: 1,1 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/requirements.txt deleted file mode 100644 index 7f361a8e94..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow==10.3.0 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/start_envoy.sh deleted file mode 100755 index 1dd6591439..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50051 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 06b2916a4f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh "$DIRECTOR_FQDN" -dp 50051 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/tinyimagenet_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/tinyimagenet_shard_descriptor.py deleted file mode 100644 index 7101642a27..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/envoy/tinyimagenet_shard_descriptor.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""TinyImageNet Shard Descriptor.""" - -import glob -import logging -import os -import shutil -from pathlib import Path -from typing import Tuple - -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class TinyImageNetDataset(ShardDataset): - """TinyImageNet shard dataset class.""" - - NUM_IMAGES_PER_CLASS = 500 - - def __init__(self, data_folder: Path, data_type='train', rank=1, worldsize=1): - """Initialize TinyImageNetDataset.""" - self.data_type = data_type - self._common_data_folder = data_folder - self._data_folder = os.path.join(data_folder, data_type) - self.labels = {} # fname - label number mapping - self.image_paths = sorted( - glob.iglob( - os.path.join(self._data_folder, '**', '*.JPEG'), - recursive=True - ) - )[rank - 1::worldsize] - wnids_path = os.path.join(self._common_data_folder, 'wnids.txt') - with open(wnids_path, 'r', encoding='utf-8') as fp: - self.label_texts = sorted([text.strip() for text in fp.readlines()]) - self.label_text_to_number = {text: i for i, text in enumerate(self.label_texts)} - self.fill_labels() - - def __len__(self) -> int: - """Return the len of the shard dataset.""" - return len(self.image_paths) - - def __getitem__(self, index: int) -> Tuple['Image', int]: - """Return an item by the index.""" - file_path = self.image_paths[index] - label = self.labels[os.path.basename(file_path)] - return self.read_image(file_path), label - - def read_image(self, path: Path) -> Image: - """Read the image.""" - img = Image.open(path) - return img - - def fill_labels(self) -> None: - """Fill labels.""" - if self.data_type == 'train': - for label_text, i in self.label_text_to_number.items(): - for cnt in range(self.NUM_IMAGES_PER_CLASS): - self.labels[f'{label_text}_{cnt}.JPEG'] = i - elif self.data_type == 'val': - val_annotations_path = os.path.join(self._data_folder, 'val_annotations.txt') - with open(val_annotations_path, 'r', encoding='utf-8') as fp: - for line in fp.readlines(): - terms = line.split('\t') - file_name, label_text = terms[0], terms[1] - self.labels[file_name] = self.label_text_to_number[label_text] - - -class TinyImageNetShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__( - self, - data_folder: str = 'data', - rank_worldsize: str = '1,1', - **kwargs - ): - """Initialize TinyImageNetShardDescriptor.""" - self.common_data_folder = Path.cwd() / data_folder - self.data_folder = Path.cwd() / data_folder / 'tiny-imagenet-200' - self.download_data() - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - def download_data(self): - """Download prepared shard dataset.""" - zip_file_path = self.common_data_folder / 'tiny-imagenet-200.zip' - os.makedirs(self.common_data_folder, exist_ok=True) - os.system(f'wget --no-clobber http://cs231n.stanford.edu/tiny-imagenet-200.zip' - f' -O {zip_file_path}') - shutil.unpack_archive(str(zip_file_path), str(self.common_data_folder)) - - def get_dataset(self, dataset_type): - """Return a shard dataset by type.""" - return TinyImageNetDataset( - data_folder=self.data_folder, - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['64', '64', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['64', '64'] - - @property - def dataset_description(self) -> str: - """Return the shard dataset description.""" - return (f'TinyImageNetDataset dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/non-federated_case.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/non-federated_case.ipynb deleted file mode 100644 index 0d7a3294fa..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/non-federated_case.ipynb +++ /dev/null @@ -1,305 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "## Vanilla PyTorch training on TinyImageNet dataset" - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "This notebook is intended to show that fixing random seeds leads to the same result in both federated and non-federated cases." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "!pip install -r requirements.txt" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "from pathlib import Path\n", - "import os\n", - "import shutil\n", - "from torch.utils.data import Dataset\n", - "from torch.utils.data import DataLoader\n", - "from torch import nn\n", - "from torch import optim\n", - "import torch.nn.functional as F\n", - "import torch\n", - "import torchvision.transforms as T\n", - "import torchvision\n", - "import glob\n", - "import tqdm\n", - "from PIL import Image\n", - "import numpy as np" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "## Download data" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "common_data_folder = Path.cwd() / 'data'\n", - "zip_file_path = common_data_folder / 'tiny-imagenet-200.zip'\n", - "os.makedirs(common_data_folder, exist_ok=True)\n", - "os.system(f'wget --no-clobber http://cs231n.stanford.edu/tiny-imagenet-200.zip'\n", - " f' -O {zip_file_path}')\n", - "shutil.unpack_archive(str(zip_file_path), str(common_data_folder))" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "class TinyImageNetDataset(Dataset):\n", - " \"\"\"TinyImageNet shard dataset class.\"\"\"\n", - "\n", - " NUM_IMAGES_PER_CLASS = 500\n", - "\n", - " def __init__(self, data_folder: Path, data_type='train', transform=None):\n", - " \"\"\"Initialize TinyImageNetDataset.\"\"\"\n", - " self.data_type = data_type\n", - " self._common_data_folder = data_folder\n", - " self._data_folder = os.path.join(data_folder, data_type)\n", - " self.labels = {} # fname - label number mapping\n", - " self.image_paths = sorted(\n", - " glob.iglob(\n", - " os.path.join(self._data_folder, '**', '*.JPEG'),\n", - " recursive=True\n", - " )\n", - " )\n", - " with open(os.path.join(self._common_data_folder, 'wnids.txt'), 'r') as fp:\n", - " self.label_texts = sorted([text.strip() for text in fp.readlines()])\n", - " self.label_text_to_number = {text: i for i, text in enumerate(self.label_texts)}\n", - " self.fill_labels()\n", - " self.transform = transform\n", - "\n", - " def __len__(self) -> int:\n", - " \"\"\"Return the len of the shard dataset.\"\"\"\n", - " return len(self.image_paths)\n", - "\n", - " def __getitem__(self, index: int):\n", - " \"\"\"Return an item by the index.\"\"\"\n", - " file_path = self.image_paths[index]\n", - " sample = self.read_image(file_path)\n", - " if self.transform:\n", - " sample = self.transform(sample)\n", - " label = self.labels[os.path.basename(file_path)]\n", - " return sample, label\n", - "\n", - " def read_image(self, path: Path):\n", - " \"\"\"Read the image.\"\"\"\n", - " img = Image.open(path)\n", - " return img\n", - "\n", - " def fill_labels(self) -> None:\n", - " \"\"\"Fill labels.\"\"\"\n", - " if self.data_type == 'train':\n", - " for label_text, i in self.label_text_to_number.items():\n", - " for cnt in range(self.NUM_IMAGES_PER_CLASS):\n", - " self.labels[f'{label_text}_{cnt}.JPEG'] = i\n", - " elif self.data_type == 'val':\n", - " with open(os.path.join(self._data_folder, 'val_annotations.txt'), 'r') as fp:\n", - " for line in fp.readlines():\n", - " terms = line.split('\\t')\n", - " file_name, label_text = terms[0], terms[1]\n", - " self.labels[file_name] = self.label_text_to_number[label_text]" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "normalize = T.Normalize(\n", - " mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225]\n", - ")\n", - "\n", - "augmentation = T.RandomApply(\n", - " [T.RandomHorizontalFlip(),\n", - " T.RandomRotation(10),\n", - " T.RandomResizedCrop(64)], \n", - " p=.8\n", - ")\n", - "\n", - "training_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " augmentation,\n", - " normalize]\n", - ")\n", - "\n", - "valid_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " normalize]\n", - ")" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dense-commerce", - "metadata": {}, - "outputs": [], - "source": [ - "def get_train_loader():\n", - " generator=torch.Generator()\n", - " generator.manual_seed(0)\n", - " train_set = TinyImageNetDataset(common_data_folder / 'tiny-imagenet-200', transform=training_transform)\n", - " return DataLoader(train_set, batch_size=64, shuffle=True, generator=generator)\n", - "\n", - "def get_valid_loader():\n", - " valid_set = TinyImageNetDataset(common_data_folder / 'tiny-imagenet-200', data_type='val', transform=valid_transform)\n", - " return DataLoader(valid_set, batch_size=64)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Describe the model and optimizer" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "class Net(nn.Module):\n", - " def __init__(self):\n", - " torch.manual_seed(0)\n", - " super(Net, self).__init__()\n", - " self.model = torchvision.models.mobilenet_v2(pretrained=True)\n", - " self.model.requires_grad_(False)\n", - " self.model.classifier[1] = torch.nn.Linear(in_features=1280, \\\n", - " out_features=200, bias=True)\n", - "\n", - " def forward(self, x):\n", - " x = self.model.forward(x)\n", - " return x\n", - "\n", - "model = Net()" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "optimizer = optim.Adam([x for x in model.parameters() if x.requires_grad], lr=1e-4)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "loss_fn = F.cross_entropy" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "def train():\n", - " torch.manual_seed(0)\n", - " device='cpu'\n", - " \n", - " data_loader = tqdm.tqdm(get_train_loader(), desc=\"train\")\n", - " model.train()\n", - " model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in data_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device)\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " loss = loss_fn(output, target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "def validate():\n", - " torch.manual_seed(0)\n", - " device = torch.device('cpu')\n", - " model.eval()\n", - " model.to(device)\n", - " \n", - " data_loader = tqdm.tqdm(get_valid_loader(), desc=\"validate\")\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in data_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " output = model(data)\n", - " pred = output.argmax(dim=1,keepdim=True)\n", - " val_score += pred.eq(target).sum().cpu().numpy()\n", - " \n", - " return {'acc': val_score / total_samples,}" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "for i in range(5):\n", - " if i == 0:\n", - " name, value = next(iter(validate().items()))\n", - " print(f'{name}: {value:f}')\n", - " \n", - " name, value = next(iter(train().items()))\n", - " print(f'{name}: {value:f}')\n", - " \n", - " name, value = next(iter(validate().items()))\n", - " print(f'{name}: {value:f}')" - ], - "outputs": [], - "metadata": {} - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/pytorch_tinyimagenet.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/pytorch_tinyimagenet.ipynb deleted file mode 100644 index bbfadfd086..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/pytorch_tinyimagenet.ipynb +++ /dev/null @@ -1,483 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated PyTorch TinyImageNet Tutorial" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "895288d0", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "billion-drunk", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import glob\n", - "\n", - "from PIL import Image\n", - "\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "from copy import deepcopy\n", - "import torchvision\n", - "from torchvision import transforms as T\n", - "from torch.utils.data import Dataset\n", - "from torch.utils.data import DataLoader\n", - "import tqdm\n", - "\n", - "torch.manual_seed(0)\n", - "np.random.seed(0)" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051',\n", - "# cert_chain=cert_chain, api_cert=api_certificate, api_private_key=api_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051', tls=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1abebd90", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "print(sample.shape)\n", - "print(target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7dda1680", - "metadata": {}, - "outputs": [], - "source": [ - "normalize = T.Normalize(\n", - " mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225]\n", - ")\n", - "\n", - "augmentation = T.RandomApply(\n", - " [T.RandomHorizontalFlip(),\n", - " T.RandomRotation(10),\n", - " T.RandomResizedCrop(64)], \n", - " p=.8\n", - ")\n", - "\n", - "training_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " augmentation,\n", - " normalize]\n", - ")\n", - "\n", - "valid_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " normalize]\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0314d5bf", - "metadata": {}, - "outputs": [], - "source": [ - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " img, label = self.dataset[index]\n", - " label = self.target_transform(label) if self.target_transform else label\n", - " img = self.transform(img) if self.transform else img\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01369e3b", - "metadata": {}, - "outputs": [], - "source": [ - "class TinyImageNetDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=training_transform\n", - " )\n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=valid_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " generator=torch.Generator()\n", - " generator.manual_seed(0)\n", - " return DataLoader(\n", - " self.train_set, batch_size=self.kwargs['train_bs'], shuffle=True, generator=generator\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a6cedef", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = TinyImageNetDataset(train_bs=64, valid_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "74cac654", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43e25fe3", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "MobileNetV2 model\n", - "\"\"\"\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self):\n", - " torch.manual_seed(0)\n", - " super(Net, self).__init__()\n", - " self.model = torchvision.models.mobilenet_v2(pretrained=True)\n", - " self.model.requires_grad_(False)\n", - " self.model.classifier[1] = torch.nn.Linear(in_features=1280, \\\n", - " out_features=200, bias=True)\n", - "\n", - " def forward(self, x):\n", - " x = self.model.forward(x)\n", - " return x\n", - "\n", - "model_net = Net()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79021778", - "metadata": {}, - "outputs": [], - "source": [ - "params_to_update = []\n", - "for param in model_net.parameters():\n", - " if param.requires_grad == True:\n", - " params_to_update.append(param)\n", - " \n", - "optimizer_adam = optim.Adam(params_to_update, lr=1e-4)\n", - "\n", - "def cross_entropy(output, target):\n", - " \"\"\"Binary cross-entropy metric\n", - " \"\"\"\n", - " return F.cross_entropy(input=output,target=target)" - ] - }, - { - "cell_type": "markdown", - "id": "f097cdc5", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a8cca8", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "model_interface = ModelInterface(model=model_net, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_net)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "task_interface = TaskInterface()\n", - "\n", - "\n", - "# The Interactive API supports registering functions definied in main module or imported.\n", - "def function_defined_in_notebook(some_parameter):\n", - " print(f'Also I accept a parameter and it is {some_parameter}')\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@task_interface.add_kwargs(**{'some_parameter': 42})\n", - "@task_interface.register_fl_task(model='net_model', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(net_model, train_loader, optimizer, device, loss_fn=cross_entropy, some_parameter=None):\n", - " torch.manual_seed(0)\n", - " device='cpu'\n", - " function_defined_in_notebook(some_parameter)\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " net_model.train()\n", - " net_model.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device)\n", - " optimizer.zero_grad()\n", - " output = net_model(data)\n", - " loss = loss_fn(output=output, target=target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "\n", - "@task_interface.register_fl_task(model='net_model', data_loader='val_loader', device='device') \n", - "def validate(net_model, val_loader, device):\n", - " torch.manual_seed(0)\n", - " device = torch.device('cpu')\n", - " net_model.eval()\n", - " net_model.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " output = net_model(data)\n", - " pred = output.argmax(dim=1,keepdim=True)\n", - " val_score += pred.eq(target).sum().cpu().numpy()\n", - " \n", - " return {'acc': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'tinyimagenet_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=model_interface, \n", - " task_keeper=task_interface,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83edd88f", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/requirements.txt deleted file mode 100644 index bd0fcb51ad..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet/workspace/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -torch==2.3.1 -torchvision==0.18.1 -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/README.md b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/README.md deleted file mode 100644 index 277431c48c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# PyTorch_TinyImageNet - -## **How to run this tutorial (without TLC and locally as a simulation):** -
- -Before we dive in, let's clarify some terms. XPU is a term coined by Intel to describe their line of computing devices, which includes CPUs, GPUs, FPGAs, and other accelerators. In this tutorial, we will be focusing on the Intel® Data Center GPU Max Series model, a GPU that is part of Intel's XPU lineup. - -### 0a. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). - -
- -### 0b. Quick XPU Setup - In this tutorial, when we refer to XPU, we are specifically referring to the Intel® Data Center GPU Max Series. When using the Intel® Extension for PyTorch* package, selecting the device as 'xpu' will refer to this Intel® Data Center GPU Max Series. - - For a successful setup, please follow the steps outlined in the [Installation Guide](https://intel.github.io/intel-extension-for-pytorch/xpu/2.1.10+xpu/tutorials/installation.html). This guide provides detailed information on system requirements and the installation process for the Intel® Extension for PyTorch. For a deeper understanding of features, APIs, and technical details, refer to the [Intel® Extension for PyTorch* Documentation](https://intel.github.io/intel-extension-for-pytorch/xpu/2.1.10+xpu/index.html). - -Hardware Prerequisite: Intel® Data Center GPU Max Series. - -This Jupyter Notebook has been tested and confirmed to work with the following versions: - - - intel-extension-for-pytorch==2.0.120 (xpu) - - pytorch==2.0.1 - - torchvision==0.15.2 - -These versions were obtained from official Intel® channels. - -Additionally, the XPU driver version used in testing was: - - - [XPU_Driver==803](https://dgpu-docs.intel.com/driver/installation.html) - - -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) - -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/PyTorch_TinyImageNet - ``` - -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` - -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r requirements.txt -./start_envoy.sh env_one envoy_config.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config.yaml -``` - -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab pytorch_tinyimagenet_XPU.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the pytorch_tinyimagenet.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/director_config.yaml deleted file mode 100644 index 3fc4137943..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['64', '64', '3'] - target_shape: ['64', '64'] diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/envoy_config.yaml deleted file mode 100644 index cb04056976..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/envoy_config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: tinyimagenet_shard_descriptor.TinyImageNetShardDescriptor - params: - data_folder: tinyimagenet_data - rank_worldsize: 1,1 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/requirements.txt deleted file mode 100644 index 7f361a8e94..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow==10.3.0 diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/start_envoy.sh deleted file mode 100755 index 1dd6591439..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50051 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 06b2916a4f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh "$DIRECTOR_FQDN" -dp 50051 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/tinyimagenet_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/tinyimagenet_shard_descriptor.py deleted file mode 100644 index cb161d9d67..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/envoy/tinyimagenet_shard_descriptor.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (C) 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""TinyImageNet Shard Descriptor.""" - -import glob -import logging -import os -import shutil -from pathlib import Path -from typing import Tuple - -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class TinyImageNetDataset(ShardDataset): - """TinyImageNet shard dataset class.""" - - NUM_IMAGES_PER_CLASS = 500 - - def __init__(self, data_folder: Path, data_type='train', rank=1, worldsize=1): - """Initialize TinyImageNetDataset.""" - self.data_type = data_type - self._common_data_folder = data_folder - self._data_folder = os.path.join(data_folder, data_type) - self.labels = {} # fname - label number mapping - self.image_paths = sorted( - glob.iglob( - os.path.join(self._data_folder, '**', '*.JPEG'), - recursive=True - ) - )[rank - 1::worldsize] - wnids_path = os.path.join(self._common_data_folder, 'wnids.txt') - with open(wnids_path, 'r', encoding='utf-8') as fp: - self.label_texts = sorted([text.strip() for text in fp.readlines()]) - self.label_text_to_number = {text: i for i, text in enumerate(self.label_texts)} - self.fill_labels() - - def __len__(self) -> int: - """Return the len of the shard dataset.""" - return len(self.image_paths) - - def __getitem__(self, index: int) -> Tuple['Image', int]: - """Return an item by the index.""" - file_path = self.image_paths[index] - label = self.labels[os.path.basename(file_path)] - return self.read_image(file_path), label - - def read_image(self, path: Path) -> Image: - """Read the image.""" - img = Image.open(path) - return img - - def fill_labels(self) -> None: - """Fill labels.""" - if self.data_type == 'train': - for label_text, i in self.label_text_to_number.items(): - for cnt in range(self.NUM_IMAGES_PER_CLASS): - self.labels[f'{label_text}_{cnt}.JPEG'] = i - elif self.data_type == 'val': - val_annotations_path = os.path.join(self._data_folder, 'val_annotations.txt') - with open(val_annotations_path, 'r', encoding='utf-8') as fp: - for line in fp.readlines(): - terms = line.split('\t') - file_name, label_text = terms[0], terms[1] - self.labels[file_name] = self.label_text_to_number[label_text] - - -class TinyImageNetShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__( - self, - data_folder: str = 'data', - rank_worldsize: str = '1,1', - **kwargs - ): - """Initialize TinyImageNetShardDescriptor.""" - self.common_data_folder = Path.cwd() / data_folder - self.data_folder = Path.cwd() / data_folder / 'tiny-imagenet-200' - self.download_data() - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - def download_data(self): - """Download prepared shard dataset.""" - zip_file_path = self.common_data_folder / 'tiny-imagenet-200.zip' - os.makedirs(self.common_data_folder, exist_ok=True) - os.system(f'wget --no-clobber http://cs231n.stanford.edu/tiny-imagenet-200.zip' - f' -O {zip_file_path}') - shutil.unpack_archive(str(zip_file_path), str(self.common_data_folder)) - - def get_dataset(self, dataset_type): - """Return a shard dataset by type.""" - return TinyImageNetDataset( - data_folder=self.data_folder, - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['64', '64', '3'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['64', '64'] - - @property - def dataset_description(self) -> str: - """Return the shard dataset description.""" - return (f'TinyImageNetDataset dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/workspace/pytorch_tinyimagenet_XPU.ipynb b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/workspace/pytorch_tinyimagenet_XPU.ipynb deleted file mode 100644 index 02d9008146..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/workspace/pytorch_tinyimagenet_XPU.ipynb +++ /dev/null @@ -1,530 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated PyTorch TinyImageNet Tutorial XPU Version" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "895288d0", - "metadata": {}, - "outputs": [], - "source": [ - "# For interactive api requirements. For XPU requirements, review Readme Quick XPU Setup section.\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "billion-drunk", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import glob\n", - "\n", - "from PIL import Image\n", - "\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "from copy import deepcopy\n", - "import torchvision\n", - "from torchvision import transforms as T\n", - "from torch.utils.data import Dataset\n", - "from torch.utils.data import DataLoader\n", - "import tqdm\n", - "\n", - "import intel_extension_for_pytorch as ipex\n", - "device = torch.device(\"xpu\")\n", - "\n", - "torch.manual_seed(0)\n", - "np.random.seed(0)" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051',\n", - "# cert_chain=cert_chain, api_cert=api_certificate, api_private_key=api_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id=client_id, director_node_fqdn=director_node_fqdn, director_port='50051', tls=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1abebd90", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "print(sample.shape)\n", - "print(target.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7dda1680", - "metadata": {}, - "outputs": [], - "source": [ - "normalize = T.Normalize(\n", - " mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225]\n", - ")\n", - "\n", - "augmentation = T.RandomApply(\n", - " [T.RandomHorizontalFlip(),\n", - " T.RandomRotation(10),\n", - " T.RandomResizedCrop(64)], \n", - " p=.8\n", - ")\n", - "\n", - "training_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " augmentation,\n", - " normalize]\n", - ")\n", - "\n", - "valid_transform = T.Compose(\n", - " [T.Lambda(lambda x: x.convert(\"RGB\")),\n", - " T.ToTensor(),\n", - " normalize]\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0314d5bf", - "metadata": {}, - "outputs": [], - "source": [ - "class TransformedDataset(Dataset):\n", - " \"\"\"Image Person ReID Dataset.\"\"\"\n", - "\n", - " def __init__(self, dataset, transform=None, target_transform=None):\n", - " \"\"\"Initialize Dataset.\"\"\"\n", - " self.dataset = dataset\n", - " self.transform = transform\n", - " self.target_transform = target_transform\n", - "\n", - " def __len__(self):\n", - " \"\"\"Length of dataset.\"\"\"\n", - " return len(self.dataset)\n", - "\n", - " def __getitem__(self, index):\n", - " img, label = self.dataset[index]\n", - " label = self.target_transform(label) if self.target_transform else label\n", - " img = self.transform(img) if self.transform else img\n", - " return img, label\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01369e3b", - "metadata": {}, - "outputs": [], - "source": [ - "class TinyImageNetDataset(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " self.kwargs = kwargs\n", - " \n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('train'),\n", - " transform=training_transform\n", - " )\n", - " self.valid_set = TransformedDataset(\n", - " self._shard_descriptor.get_dataset('val'),\n", - " transform=valid_transform\n", - " )\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " generator=torch.Generator()\n", - " generator.manual_seed(0)\n", - " return DataLoader(\n", - " self.train_set, batch_size=self.kwargs['train_bs'], shuffle=True, generator=generator\n", - " )\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " return DataLoader(self.valid_set, batch_size=self.kwargs['valid_bs'])\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a6cedef", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = TinyImageNetDataset(train_bs=64, valid_bs=64)" - ] - }, - { - "cell_type": "markdown", - "id": "74cac654", - "metadata": {}, - "source": [ - "### Describe the model and optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43e25fe3", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "MobileNetV2 model\n", - "\"\"\"\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self):\n", - " torch.manual_seed(0)\n", - " super(Net, self).__init__()\n", - " self.model = torchvision.models.mobilenet_v2(pretrained=True)\n", - " self.model.requires_grad_(False)\n", - " self.model.classifier[1] = torch.nn.Linear(in_features=1280, \\\n", - " out_features=200, bias=True)\n", - "\n", - " def forward(self, x):\n", - " x = self.model.forward(x)\n", - " return x\n", - "\n", - "model_net = Net()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79021778", - "metadata": {}, - "outputs": [], - "source": [ - "params_to_update = []\n", - "for param in model_net.parameters():\n", - " if param.requires_grad == True:\n", - " params_to_update.append(param)\n", - " \n", - "optimizer_adam = optim.Adam(params_to_update, lr=1e-4)\n", - "\n", - "def cross_entropy(output, target):\n", - " \"\"\"Binary cross-entropy metric\n", - " \"\"\"\n", - " return F.cross_entropy(input=output,target=target)" - ] - }, - { - "cell_type": "markdown", - "id": "f097cdc5", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a8cca8", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", - "model_interface = ModelInterface(model=model_net, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model_net)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d7e010d-b885-4b77-83d5-e69e239be5a6", - "metadata": {}, - "outputs": [], - "source": [ - "# Counter class for the Rounds\n", - "class Counter:\n", - " def __init__(self):\n", - " self.value = 0\n", - " \n", - " def current(self):\n", - " return self.value\n", - " \n", - " def add(self):\n", - " self.value += 1\n", - "\n", - "counter = Counter()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "task_interface = TaskInterface()\n", - "\n", - "\n", - "# Task interface currently supports only standalone functions.\n", - "@task_interface.add_kwargs(**{ 'device':'xpu'})\n", - "@task_interface.register_fl_task(model='model_net', data_loader='train_loader', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(model_net, train_loader, optimizer, device, loss_fn=cross_entropy, some_parameter=None):\n", - " torch.manual_seed(0)\n", - " \n", - " # Start counter of rounds\n", - " round = counter.current()\n", - " \n", - " model_net.train()\n", - " # On first round move model to device\n", - " if round == 0:\n", - " model_net.to(device)\n", - " model_net, optimizer = ipex.optimize(model_net, optimizer=optimizer_adam)\n", - " \n", - " counter.add()\n", - " \n", - " train_loader = tqdm.tqdm(train_loader, desc=\"train\")\n", - " model_net.train()\n", - " model_net.to(device)\n", - "\n", - " losses = []\n", - "\n", - " for data, target in train_loader:\n", - " data, target = torch.tensor(data).to(device), torch.tensor(\n", - " target).to(device)\n", - " optimizer.zero_grad()\n", - " # Code changes\n", - " with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):\n", - " output = model_net(data)\n", - " loss = loss_fn(output=output, target=target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " \n", - " return {'train_loss': np.mean(losses),}\n", - "\n", - "@task_interface.add_kwargs(**{ 'device':'xpu'})\n", - "@task_interface.register_fl_task(model='model_net', device='device',data_loader='val_loader') \n", - "def validate(model_net, val_loader, device):\n", - " torch.manual_seed(0)\n", - " model_net.eval()\n", - " model_net.to(device)\n", - " \n", - " val_loader = tqdm.tqdm(val_loader, desc=\"validate\")\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " with torch.no_grad():\n", - " for data, target in val_loader:\n", - " samples = target.shape[0]\n", - " total_samples += samples\n", - " data, target = torch.tensor(data).to(device), \\\n", - " torch.tensor(target).to(device, dtype=torch.int64)\n", - " # Code changes\n", - " with torch.xpu.amp.autocast(enabled=True, dtype=torch.int64):\n", - " output = model_net(data)\n", - " pred = output.argmax(dim=1,keepdim=True)\n", - " val_score += pred.eq(target).sum().cpu().numpy()\n", - " \n", - " return {'acc': val_score / total_samples,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'tinyimagenet_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(\n", - " model_provider=model_interface, \n", - " task_keeper=task_interface,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83edd88f", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going\n", - "# fl_experiment.restore_experiment_state(model_interface)\n", - "\n", - "fl_experiment.stream_metrics(tensorboard_logs=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "conda_IPEX", - "language": "python", - "name": "conda_ipex" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/workspace/requirements.txt deleted file mode 100644 index bd0fcb51ad..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/PyTorch_TinyImageNet_XPU/workspace/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -torch==2.3.1 -torchvision==0.18.1 -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/README.md b/openfl-tutorials/deprecated/interactive_api/README.md deleted file mode 100644 index 454bab5e7b..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# **Interactive API (Director Workflow) Tutorials** - -### The Director Workflow tutorials explore a variety of frameworks, models, and datasets. The steps are similar for each tutorial, however each one has its own README with specific instructions and requirements. - diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/README.md b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/README.md deleted file mode 100644 index 527b5dba8a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# CIFAR10 Federated Classification Tutorial - -## I. About the Dataset - -The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. -
- -## II. About the Model - -A simple multi-layer CNN is used. Definition provided in the notebook. -
- -## III. About the Federation - -Data is equally partitioned between envoys/participants. - -You can write your own splitting schema in the shard descriptor class. -
- -## IV. How to run this tutorial (without TLC and locally as a simulation): - -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/Tensorflow_CIFAR_tfdata - ``` -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` -
- -### 4. In the second terminal run the envoy: - -```sh -cd envoy -./start_envoy.sh env_one envoy_config_one.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure step 2 is complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config_two.yaml -``` -
- -### 5. In the third terminal (or forth terminal, if you chose to do two envoys) run the Jupyter Notebook: - -```sh -cd workspace -jupyter lab Tensorflow_CIFAR.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the Tensorflow_CIFAR.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment was finished successfully. \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/director/director_config.yaml deleted file mode 100644 index 1d3c067543..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['32', '32', '3'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/cifar10_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/cifar10_shard_descriptor.py deleted file mode 100644 index aabaf70005..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/cifar10_shard_descriptor.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""CIFAR10 Shard Descriptor (using `tf.data.Dataset` API)""" -import logging -from typing import List, Tuple - -import tensorflow as tf - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class CIFAR10ShardDescriptor(ShardDescriptor): - """ - CIFAR10 Shard Descriptor - - This example is based on `tf.data.Dataset` pipelines. - Note that the ingestion of any model/task requires an iterable dataloader. - Hence, it is possible to utilize these pipelines without explicit need of a - new interface. - """ - - def __init__( - self, - rank_worldsize: str = '1, 1', - **kwargs - ): - """Download/Prepare CIFAR10 dataset""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - # Load dataset - train_ds, valid_ds = self._download_and_prepare_dataset( - rank=self.rank, - worldsize=self.worldsize - ) - - # Set attributes - self._sample_shape = train_ds.element_spec[0].shape - self._target_shape = train_ds.element_spec[1].shape - - self.splits = { - 'train': train_ds, - 'valid': valid_ds - } - - @staticmethod - def _download_and_prepare_dataset(rank: int, worldsize: int) -> Tuple[tf.data.Dataset]: - """ - Load CIFAR10 as `tf.data.Dataset`. - - Provide `rank` and `worldsize` to auto-split uniquely for each client - for simulation purposes. - """ - (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() - - # Split - x_train, y_train = x_train[rank - 1::worldsize], y_train[rank - 1::worldsize] - x_test, y_test = x_test[rank - 1::worldsize], y_test[rank - 1::worldsize] - - train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)) - return train_ds, test_ds - - def get_shard_dataset_types(self) -> List[str]: - """Get available split names""" - return list(self.splits) - - def get_split(self, name: str) -> tf.data.Dataset: - """Return a shard dataset by type.""" - if name not in self.splits: - raise Exception(f'Split name `{name}` not found.' - f' Expected one of {list(self.splits.keys())}') - return self.splits[name] - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - return list(map(str, self._sample_shape)) - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - return list(map(str, self._target_shape)) - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - n_train = len(self.splits['train']) - n_test = len(self.splits['valid']) - return (f'CIFAR10 dataset, shard number {self.rank}/{self.worldsize}.' - f'\nSamples [Train/Valid]: [{n_train}/{n_test}]') diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/envoy_config_one.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/envoy_config_one.yaml deleted file mode 100644 index bf9f9e51fd..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/envoy_config_one.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: cifar10_shard_descriptor.CIFAR10ShardDescriptor - params: - rank_worldsize: 1, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/envoy_config_two.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/envoy_config_two.yaml deleted file mode 100644 index 22d82117f6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/envoy_config_two.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: cifar10_shard_descriptor.CIFAR10ShardDescriptor - params: - rank_worldsize: 2, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/start_envoy.sh deleted file mode 100755 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/workspace/Tensorflow_CIFAR.ipynb b/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/workspace/Tensorflow_CIFAR.ipynb deleted file mode 100644 index 87dee451a0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_CIFAR_tfdata/workspace/Tensorflow_CIFAR.ipynb +++ /dev/null @@ -1,375 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated Tensorflow CIFAR10 Tutorial\n", - "Using `tf.data` API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1329f2e6", - "metadata": {}, - "outputs": [], - "source": [ - "# Install TF if not already. We recommend TF2.13 or greater.\n", - "# !pip install tensorflow==2.13" - ] - }, - { - "cell_type": "markdown", - "id": "e0d30942", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0833dfc9", - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "print('TensorFlow', tf.__version__)" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation\n", - "\n", - "Start `Director` and `Envoy` before proceeding with this cell. \n", - "\n", - "This cell connects this notebook to the Federation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50051\n", - "\n", - "# Create a Federation\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6efe22a8", - "metadata": {}, - "source": [ - "## Query Datasets from Shard Registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface\n", - "from openfl.interface.interactive_api.experiment import ModelInterface\n", - "from openfl.interface.interactive_api.experiment import FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "3b468ae1", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06545bbb", - "metadata": {}, - "outputs": [], - "source": [ - "# Define model\n", - "model = tf.keras.Sequential([\n", - " tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),\n", - " tf.keras.layers.MaxPooling2D((2, 2)),\n", - " tf.keras.layers.BatchNormalization(),\n", - " tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n", - " tf.keras.layers.MaxPooling2D((2, 2)),\n", - " tf.keras.layers.BatchNormalization(),\n", - " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(10, activation=None),\n", - "], name='simplecnn')\n", - "model.summary()\n", - "\n", - "# Define optimizer\n", - "optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-4)\n", - "\n", - "# Loss and metrics. These will be used later.\n", - "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()\n", - "val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()\n", - "\n", - "# Create ModelInterface\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8c9eb50", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import DataInterface\n", - "\n", - "class CIFAR10FedDataset(DataInterface):\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " # shard_descriptor.get_split(...) returns a tf.data.Dataset\n", - " # Check cifar10_shard_descriptor.py for details\n", - " self.train_set = shard_descriptor.get_split('train')\n", - " self.valid_set = shard_descriptor.get_split('valid')\n", - "\n", - " def get_train_loader(self):\n", - " \"\"\"Output of this method will be provided to tasks with optimizer in contract\"\"\"\n", - " bs = self.kwargs.get('train_bs', 32)\n", - " return self.train_set.batch(bs)\n", - "\n", - " def get_valid_loader(self):\n", - " \"\"\"Output of this method will be provided to tasks without optimizer in contract\"\"\"\n", - " bs = self.kwargs.get('valid_bs', 32)\n", - " return self.valid_set.batch(bs)\n", - " \n", - " def get_train_data_size(self) -> int:\n", - " \"\"\"Information for aggregation\"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self) -> int:\n", - " \"\"\"Information for aggregation\"\"\"\n", - " return len(self.valid_set)" - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create CIFAR10 federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = CIFAR10FedDataset(train_bs=64, valid_bs=512)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "from tensorflow.keras.utils import Progbar\n", - "\n", - "TI = TaskInterface()\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='dataset', optimizer='optimizer', device='device') \n", - "def train(model, dataset, optimizer, device, loss_fn=loss_fn, warmup=False):\n", - "\n", - " # Iterate over the batches of the dataset.\n", - " pbar = Progbar(len(dataset))\n", - " \n", - " for step, (x, y) in enumerate(dataset):\n", - " \n", - " # Gradient\n", - " with tf.GradientTape() as tape:\n", - " logits = model(x, training=True)\n", - " loss_value = loss_fn(y, logits)\n", - " grads = tape.gradient(loss_value, model.trainable_weights)\n", - " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", - "\n", - " # Update training metric.\n", - " train_acc_metric.update_state(y, logits)\n", - " pbar.update(step+1, \n", - " values={'loss': loss_value, 'acc': train_acc_metric.result()}.items())\n", - " if warmup: break\n", - " \n", - " # Display metrics at the end of each epoch.\n", - " train_acc = train_acc_metric.result()\n", - " print(\"Training acc over epoch: %.4f\" % (float(train_acc),))\n", - "\n", - " # Reset training metrics at the end of each epoch\n", - " train_acc_metric.reset_states()\n", - " return {'train_acc': train_acc,}\n", - "\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='dataset', device='device') \n", - "def validate(model, dataset, device):\n", - " # Run a validation loop at the end of each epoch.\n", - " for x, y in dataset:\n", - " logits = model(x, training=False)\n", - " # Update val metrics\n", - " val_acc_metric.update_state(y, logits)\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_states()\n", - " print(\"Validation acc: %.4f\" % (float(val_acc),))\n", - " \n", - " return {'validation_accuracy': val_acc,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'cifar10_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,serializer_plugin='openfl.plugins.interface_serializer.keras_serializer.KerasSerializer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "ROUNDS_TO_TRAIN = 10\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=ROUNDS_TO_TRAIN,\n", - " opt_treatment='CONTINUE_GLOBAL')\n", - "fl_experiment.stream_metrics()" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "f82a63373a71051274245dbf52f7a790e1979bab025fdff4da684b10eb9978bd" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/README.md b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/README.md deleted file mode 100644 index 271ad4e28e..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/README.md +++ /dev/null @@ -1,68 +0,0 @@ -# MNIST Classification Tutorial - -![mnist digits](http://i.ytimg.com/vi/0QI3xgXuB-Q/hqdefault.jpg "MNIST Digits") - -## I. About the dataset - -It is a dataset of 60,000 small square 28×28 pixel grayscale images of handwritten single digits -between 0 and 9. More info at [wiki](https://en.wikipedia.org/wiki/MNIST_database). - -## II. About the model - -We use a simple fully-connected neural network defined at -[layers.py](./workspace/layers.py) file. - -## III. How to run this tutorial (without TLC and locally as a simulation): -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) and do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/Tensorflow_MNIST - ``` - - -
- -### 2. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` -
- -### 3. In the second terminal, run the envoy: - -```sh -cd envoy -./start_envoy.sh env_one envoy_config_one.yaml -``` - -Optional: Run a second envoy in an additional terminal: - - Ensure steps 0 and 1 are complete for this terminal as well. - - Run the second envoy: -```sh -cd envoy -./start_envoy.sh env_two envoy_config_two.yaml -``` - - Notice that "env_one" was changed to "env_two", and "envoy_config_one.yaml" was changed to "envoy_config_two.yaml" -
- -### 4. In the third terminal (or forth terminal, if you chose to do two envoys) run the `Tensorflow_MNIST.ipynb` Jupyter Notebook: - -```sh -cd workspace -jupyter lab Tensorflow_MNIST.ipynb -``` diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/director_config.yaml deleted file mode 100644 index 4621e98cd6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/director_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50051 - sample_shape: ['28', '28', '1'] - target_shape: ['1'] diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/envoy_config_one.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/envoy_config_one.yaml deleted file mode 100644 index 053d5ef9cb..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/envoy_config_one.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: mnist_shard_descriptor.MnistShardDescriptor - params: - rank_worldsize: 1, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/envoy_config_two.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/envoy_config_two.yaml deleted file mode 100644 index b8b9685171..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/envoy_config_two.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: mnist_shard_descriptor.MnistShardDescriptor - params: - rank_worldsize: 2, 2 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/mnist_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/mnist_shard_descriptor.py deleted file mode 100644 index a8089b1ddf..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/mnist_shard_descriptor.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Mnist Shard Descriptor.""" - -import logging -import os -from typing import List - -import numpy as np -import requests - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - -logger = logging.getLogger(__name__) - - -class MnistShardDataset(ShardDataset): - """Mnist Shard dataset class.""" - - def __init__(self, x, y, data_type, rank=1, worldsize=1): - """Pick rank-specific subset of (x, y)""" - self.data_type = data_type - self.rank = rank - self.worldsize = worldsize - self.x = x[self.rank - 1::self.worldsize] - self.y = y[self.rank - 1::self.worldsize] - - def __getitem__(self, index: int): - """Return an item by the index.""" - return self.x[index], self.y[index] - - def __len__(self): - """Return the len of the dataset.""" - return len(self.x) - - -class MnistShardDescriptor(ShardDescriptor): - """Mnist Shard descriptor class.""" - - def __init__( - self, - rank_worldsize: str = '1, 1', - **kwargs - ): - """Initialize MnistShardDescriptor.""" - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - (x_train, y_train), (x_test, y_test) = self.download_data() - self.data_by_type = { - 'train': (x_train, y_train), - 'val': (x_test, y_test) - } - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Wrong dataset type: {dataset_type}') - return MnistShardDataset( - *self.data_by_type[dataset_type], - data_type=dataset_type, - rank=self.rank, - worldsize=self.worldsize - ) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return ['28', '28', '1'] - - @property - def target_shape(self): - """Return the target shape info.""" - return ['1'] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Mnist dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - def download_data(self): - """Download prepared dataset.""" - local_file_path = 'mnist.npz' - mnist_url = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz' - response = requests.get(mnist_url) - with open(local_file_path, 'wb') as f: - f.write(response.content) - - with np.load(local_file_path) as f: - x_train, y_train = f['x_train'], f['y_train'] - x_test, y_test = f['x_test'], f['y_test'] - x_train = np.reshape(x_train, (-1, 28, 28, 1)) - x_test = np.reshape(x_test, (-1, 28, 28, 1)) - - os.remove(local_file_path) # remove mnist.npz - print('Mnist data was loaded!') - return (x_train, y_train), (x_test, y_test) diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/start_envoy.sh deleted file mode 100755 index cdd84e7fb6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 2585cc9a01..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 -DIRECTOR_FQDN=$3 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path "$ENVOY_CONF" -dh "$DIRECTOR_FQDN" -dp 50051 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/workspace/Tensorflow_MNIST.ipynb b/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/workspace/Tensorflow_MNIST.ipynb deleted file mode 100644 index accf12f0b0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_MNIST/workspace/Tensorflow_MNIST.ipynb +++ /dev/null @@ -1,481 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "26fdd9ed", - "metadata": {}, - "source": [ - "# Federated Tensorflow MNIST Tutorial" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1329f2e6", - "metadata": {}, - "outputs": [], - "source": [ - "# Install TF if not already. We recommend TF2.7 or greater.\n", - "# !pip install tensorflow==2.8" - ] - }, - { - "cell_type": "markdown", - "id": "e0d30942", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0833dfc9", - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "print('TensorFlow', tf.__version__)" - ] - }, - { - "cell_type": "markdown", - "id": "246f9c98", - "metadata": {}, - "source": [ - "## Connect to the Federation\n", - "\n", - "Start `Director` and `Envoy` before proceeding with this cell. \n", - "\n", - "This cell connects this notebook to the Federation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d657e463", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'api'\n", - "cert_dir = 'cert'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50051\n", - "# 1) Run with API layer - Director mTLS \n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = f'{cert_dir}/root_ca.crt'\n", - "# api_certificate = f'{cert_dir}/{client_id}.crt'\n", - "# api_private_key = f'{cert_dir}/{client_id}.key'\n", - "\n", - "# federation = Federation(\n", - "# client_id=client_id,\n", - "# director_node_fqdn=director_node_fqdn,\n", - "# director_port=director_port,\n", - "# cert_chain=cert_chain,\n", - "# api_cert=api_certificate,\n", - "# api_private_key=api_private_key\n", - "# )\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "\n", - "# Create a Federation\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port, \n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6efe22a8", - "metadata": {}, - "source": [ - "## Query Datasets from Shard Registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47dcfab3", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2a6c237", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "dummy_shard_dataset = dummy_shard_desc.get_dataset('train')\n", - "sample, target = dummy_shard_dataset[0]\n", - "f\"Sample shape: {sample.shape}, target shape: {target.shape}\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc0dbdbd", - "metadata": {}, - "source": [ - "## Describing FL experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc88700a", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface\n", - "from openfl.interface.interactive_api.experiment import ModelInterface\n", - "from openfl.interface.interactive_api.experiment import FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "3b468ae1", - "metadata": {}, - "source": [ - "### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06545bbb", - "metadata": {}, - "outputs": [], - "source": [ - "# Define model\n", - "model = tf.keras.Sequential([\n", - " tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),\n", - " tf.keras.layers.MaxPooling2D((2, 2)),\n", - " tf.keras.layers.BatchNormalization(),\n", - " tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),\n", - " tf.keras.layers.MaxPooling2D((2, 2)),\n", - " tf.keras.layers.BatchNormalization(),\n", - " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(10, activation=None),\n", - "], name='simplecnn')\n", - "model.summary()\n", - "\n", - "# Define optimizer\n", - "optimizer = tf.optimizers.Adam(learning_rate=1e-3)\n", - "\n", - "# Loss and metrics. These will be used later.\n", - "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()\n", - "val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()\n", - "\n", - "# Create ModelInterface\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)" - ] - }, - { - "cell_type": "markdown", - "id": "b0979470", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8c9eb50", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from tensorflow.keras.utils import Sequence\n", - "\n", - "from openfl.interface.interactive_api.experiment import DataInterface\n", - "\n", - "\n", - "class DataGenerator(Sequence):\n", - "\n", - " def __init__(self, shard_descriptor, batch_size):\n", - " self.shard_descriptor = shard_descriptor\n", - " self.batch_size = batch_size\n", - " self.indices = np.arange(len(shard_descriptor))\n", - " self.on_epoch_end()\n", - "\n", - " def __len__(self):\n", - " return len(self.indices) // self.batch_size\n", - "\n", - " def __getitem__(self, index):\n", - " index = self.indices[index * self.batch_size:(index + 1) * self.batch_size]\n", - " batch = [self.indices[k] for k in index]\n", - "\n", - " X, y = self.shard_descriptor[batch]\n", - " return X, y\n", - "\n", - " def on_epoch_end(self):\n", - " np.random.shuffle(self.indices)\n", - "\n", - "\n", - "class MnistFedDataset(DataInterface):\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " \n", - " self.train_set = shard_descriptor.get_dataset('train')\n", - " self.valid_set = shard_descriptor.get_dataset('val')\n", - "\n", - " def __getitem__(self, index):\n", - " return self.shard_descriptor[index]\n", - "\n", - " def __len__(self):\n", - " return len(self.shard_descriptor)\n", - "\n", - " def get_train_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " if self.kwargs['train_bs']:\n", - " batch_size = self.kwargs['train_bs']\n", - " else:\n", - " batch_size = 32\n", - " return DataGenerator(self.train_set, batch_size=batch_size)\n", - "\n", - " def get_valid_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " if self.kwargs['valid_bs']:\n", - " batch_size = self.kwargs['valid_bs']\n", - " else:\n", - " batch_size = 32\n", - " \n", - " return DataGenerator(self.valid_set, batch_size=batch_size)\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " \n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.valid_set)" - ] - }, - { - "cell_type": "markdown", - "id": "b0dfb459", - "metadata": {}, - "source": [ - "### Create Mnist federated dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af5c4c2", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = MnistFedDataset(train_bs=64, valid_bs=512)" - ] - }, - { - "cell_type": "markdown", - "id": "849c165b", - "metadata": {}, - "source": [ - "## Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9649385", - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "\n", - "\n", - "\n", - "TI = TaskInterface()\n", - "\n", - "# from openfl.interface.aggregation_functions import AdagradAdaptiveAggregation # Uncomment this lines to use \n", - "# agg_fn = AdagradAdaptiveAggregation(model_interface=MI, learning_rate=0.4) # Adaptive Federated Optimization\n", - "# @TI.set_aggregation_function(agg_fn) # alghorithm!\n", - "# # See details in the:\n", - "# # https://arxiv.org/abs/2003.00295\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='train_dataset', device='device', optimizer='optimizer') \n", - "def train(model, train_dataset, optimizer, device, loss_fn=loss_fn, warmup=False):\n", - " start_time = time.time()\n", - "\n", - " # Iterate over the batches of the dataset.\n", - " for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):\n", - " with tf.GradientTape() as tape:\n", - " logits = model(x_batch_train, training=True)\n", - " loss_value = loss_fn(y_batch_train, logits)\n", - " grads = tape.gradient(loss_value, model.trainable_weights)\n", - " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", - "\n", - " # Update training metric.\n", - " train_acc_metric.update_state(y_batch_train, logits)\n", - "\n", - " # Log every 200 batches.\n", - " if step % 200 == 0:\n", - " print(\n", - " \"Training loss (for one batch) at step %d: %.4f\"\n", - " % (step, float(loss_value))\n", - " )\n", - " print(\"Seen so far: %d samples\" % ((step + 1) * 64))\n", - " if warmup:\n", - " break\n", - "\n", - " # Display metrics at the end of each epoch.\n", - " train_acc = train_acc_metric.result()\n", - " print(\"Training acc over epoch: %.4f\" % (float(train_acc),))\n", - "\n", - " # Reset training metrics at the end of each epoch\n", - " train_acc_metric.reset_states()\n", - "\n", - " \n", - " return {'train_acc': train_acc,}\n", - "\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='val_dataset', device='device') \n", - "def validate(model, val_dataset, device):\n", - " # Run a validation loop at the end of each epoch.\n", - " for x_batch_val, y_batch_val in val_dataset:\n", - " val_logits = model(x_batch_val, training=False)\n", - " # Update val metrics\n", - " val_acc_metric.update_state(y_batch_val, val_logits)\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_states()\n", - " print(\"Validation acc: %.4f\" % (float(val_acc),))\n", - " \n", - " return {'validation_accuracy': val_acc,}" - ] - }, - { - "cell_type": "markdown", - "id": "8f0ebf2d", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41b7896", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'mnist_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,serializer_plugin='openfl.plugins.interface_serializer.keras_serializer.KerasSerializer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "480700cc-400b-4073-9daf-71b1f0d60d62", - "metadata": {}, - "outputs": [], - "source": [ - "# print the default federated learning plan\n", - "import openfl.native as fx\n", - "print(fx.get_plan(fl_plan=fl_experiment.plan))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b44de9", - "metadata": {}, - "outputs": [], - "source": [ - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=5,\n", - " opt_treatment='CONTINUE_GLOBAL',\n", - " override_config={'aggregator.settings.db_store_rounds': 1, 'compression_pipeline.template': 'openfl.pipelines.KCPipeline', 'compression_pipeline.settings.n_clusters': 2})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fa7cea", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "98fb4f07-a19f-498d-985d-1b606984fb9f", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "interpreter": { - "hash": "f82a63373a71051274245dbf52f7a790e1979bab025fdff4da684b10eb9978bd" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/README.md b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/README.md deleted file mode 100644 index 2aa84245ec..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Next Word Prediction Tutorial on Keras - -## I. GPU supporting - -Currently GPU (with CUDA 11) is not supported by Tensorflow properly, so we disabled CUDA in the -tutorial. Otherwise, you can use these charms in the first answer of this Stack Overflow question (https://stackoverflow.com/questions/41991101/importerror-libcudnn-when-running-a-tensorflow-program) -to fix your environment and enjoy GPU. Don't forget to -change `os.environ['CUDA_VISIBLE_DEVICES'] = '-1'` to a positive value. - -As an option you can set the CUDA variable for each envoy before starting -it: `export CUDA_VISIBLE_DEVICES=0` -
-
- -## II. Data - -Different envoys can have different texts, so in this tutorial each envoy uses one of these 3 fairy tale books: - -- Polish Fairy Tales by A. J. Gliński https://www.gutenberg.org/files/36668/36668-h/36668-h.htm -- English Fairy Tales by Joseph Jacobs https://www.gutenberg.org/cache/epub/7439/pg7439-images.html -- American Fairy Tales by L. FRANK BAUM https://www.gutenberg.org/files/4357/4357-h/4357-h.htm -
-
- -## III. Keras Model - -At this point OpenFL maintains Sequential API and Functional API. Keras Submodel is not supported. -https://github.com/securefederatedai/openfl/issues/185 -
-
- -## IV. To run this experiment: -
- -### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: - - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package). -
- -### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment) -
- -### 2. Do the following in each terminal: - - Activate the virtual environment from step 0: - - ```sh - source venv/bin/activate - ``` - - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. - - Navigate to the tutorial: - - ```sh - cd openfl/openfl-tutorials/interactive_api/Tensorflow_Word_Prediction - ``` -
- -### 3. In the first terminal, run the director: - -```sh -cd director -./start_director.sh -``` -
- -### 4. In the second terminal, install requirements and run the envoy: - -```sh -cd envoy -pip install -r sd_requirements.txt -./start_envoy.sh env_one envoy_config_one.yaml -``` - -Optional: Run a second or third envoy in additional terminals: - - Ensure step 2 is complete for these terminals as well. - - Follow step 4 for each envoy, changing the envoy name and config file accordingly. For example: - - Envoy two would use: - ```sh - ./start_envoy.sh env_two envoy_config_two.yaml - ``` -
- -### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: - -```sh -cd workspace -jupyter lab Tensorflow_Word_Prediction.ipynb -``` -- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the Tensorflow_Word_Prediction.ipynb file. -- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". -- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. - diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/director_config.yaml deleted file mode 100644 index 3183a709a3..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/director_config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -settings: - listen_ip: localhost - sample_shape: ['3', '96'] - target_shape: ['10719'] \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/start_director_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/start_director_with_tls.sh deleted file mode 100755 index 5d6d46a792..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/director/start_director_with_tls.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -FQDN=$1 -fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_one.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_one.yaml deleted file mode 100644 index 3db7be6176..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_one.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# https://www.gutenberg.org/files/36668/36668-h/36668-h.htm -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: shard_descriptor.NextWordShardDescriptor - params: - title: Polish Fairy Tales - author: A. J. Gliński \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_three.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_three.yaml deleted file mode 100644 index a8ebdd7b3d..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_three.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# https://www.gutenberg.org/files/4357/4357-h/4357-h.htm -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: shard_descriptor.NextWordShardDescriptor - params: - title: American Fairy Tales - author: L. FRANK BAUM \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_two.yaml b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_two.yaml deleted file mode 100644 index fbdf200ee5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/envoy_config_two.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# https://www.gutenberg.org/cache/epub/7439/pg7439-images.html -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: shard_descriptor.NextWordShardDescriptor - params: - title: English Fairy Tales - author: Joseph Jacobs \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/sd_requirements.txt b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/sd_requirements.txt deleted file mode 100644 index 5f3956ba14..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/sd_requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -gdown==3.13.0 -numpy==1.22.2 -pandas==1.3.3 -pyarrow==14.0.1 diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/shard_descriptor.py deleted file mode 100644 index b5dba87601..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/shard_descriptor.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Shard descriptor for text.""" - -import re -import urllib.request -import zipfile -from pathlib import Path - -import gdown -import numpy as np -import pandas as pd - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class NextWordShardDataset(ShardDataset): - """Shard Dataset for text.""" - - def __init__(self, X, y): - """Initialize NextWordShardDataset.""" - self.X = X - self.y = y - - def __len__(self): - """Count number of sequences.""" - return len(self.X) - - def __getitem__(self, index: int): - """Return an item by the index.""" - return self.X[index], self.y[index] - - -class NextWordShardDescriptor(ShardDescriptor): - """Data is any text.""" - - def __init__(self, title: str = '', author: str = '') -> None: - """Initialize NextWordShardDescriptor.""" - super().__init__() - - self.title = title - self.author = author - - dataset_dir = self.download_data(title) - data = self.load_data(dataset_dir) # list of words - self.X, self.y = self.get_sequences(data) - - def get_dataset(self, dataset_type='train', train_val_split=0.8): - """Return a dataset by type.""" - train_size = round(len(self.X) * train_val_split) - if dataset_type == 'train': - X = self.X[:train_size] - y = self.y[:train_size] - elif dataset_type == 'val': - X = self.X[train_size:] - y = self.y[train_size:] - else: - raise Exception(f'Wrong dataset type: {dataset_type}.' - f'Choose from the list: [train, val]') - return NextWordShardDataset(X, y) - - @property - def sample_shape(self): - """Return the sample shape info.""" - length, n_gram, vector_size = self.X.shape - return [str(n_gram), str(vector_size)] # three vectors - - @property - def target_shape(self): - """Return the target shape info.""" - length, vocab_size = self.y.shape - return [str(vocab_size)] # row at one-hot matrix with n = vocab_size - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return f'Dataset from {self.title} by {self.author}' - - @staticmethod - def load_data(path): - """Load text file, return list of words.""" - file = open(path, 'r', encoding='utf8').read() - data = re.findall(r'[a-z]+', file.lower()) - return data - - @staticmethod - def get_sequences(data): - """Transform words to sequences, for X transform to vectors as well.""" - # spacy en_core_web_sm vocab_size = 10719, vector_size = 96 - x_seq = [] - y_seq = [] - - # created with make_vocab.py from - # https://gist.github.com/katerina-merkulova/e351b11c67832034b49652835b14adb0 - NextWordShardDescriptor.download_vectors() - vectors = pd.read_feather('keyed_vectors.feather') - vectors.set_index('index', inplace=True) - - for i in range(len(data) - 3): - x = data[i:i + 3] # make 3-grams - y = data[i + 3] - cur_x = [vectors.vector[word] for word in x if word in vectors.index] - if len(cur_x) == 3 and y in vectors.index: - x_seq.append(cur_x) - y_seq.append(vectors.index.get_loc(y)) - - x_seq = np.array(x_seq) - y_seq = np.array(y_seq) - y = np.zeros((y_seq.size, 10719)) - y[np.arange(y_seq.size), y_seq] = 1 - return x_seq, y - - @staticmethod - def download_data(title): - """Download text by title form Github Gist.""" - url = ('https://gist.githubusercontent.com/katerina-merkulova/e351b11c67832034b49652835b' - '14adb0/raw/5b6667c3a2e1266f3d9125510069d23d8f24dc73/' + title.replace(' ', '_') - + '.txt') - filepath = Path.cwd() / f'{title}.txt' - if not filepath.exists(): - with urllib.request.urlopen(url) as response: - content = response.read().decode('utf-8') - with open(filepath, 'w', encoding='utf-8') as file: - file.write(content) - return filepath - - @staticmethod - def download_vectors(): - """Download vectors.""" - if Path('keyed_vectors.feather').exists(): - return None - - output = 'keyed_vectors.zip' - if not Path(output).exists(): - url = 'https://drive.google.com/uc?id=1QfidtkJ9qxzNLs1pgXoY_hqnBjsDI_2i' - gdown.download(url, output, quiet=False) - - with zipfile.ZipFile(output, 'r') as zip_ref: - zip_ref.extractall(Path.cwd()) - - Path(output).unlink() # remove zip diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/start_envoy.sh deleted file mode 100755 index 3ddcc4932a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls -dh localhost -dp 50051 -ec envoy_config_one.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/start_envoy_with_tls.sh b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/start_envoy_with_tls.sh deleted file mode 100755 index 873ebcb3d6..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/envoy/start_envoy_with_tls.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -DIRECTOR_FQDN=$2 - -fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -d "$DIRECTOR_FQDN":50051 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/workspace/Tensorflow_Word_Prediction.ipynb b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/workspace/Tensorflow_Word_Prediction.ipynb deleted file mode 100644 index 85bc5d9672..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/workspace/Tensorflow_Word_Prediction.ipynb +++ /dev/null @@ -1,481 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "2390f64a", - "metadata": {}, - "source": [ - "# Federated Next Word Prediction with Director example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3f166dd", - "metadata": {}, - "outputs": [], - "source": [ - "# install requirements\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52a6f355", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "import os\n", - "# disable GPUs due to Tensoflow not supporting CUDA 11\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '-1'" - ] - }, - { - "cell_type": "markdown", - "id": "1a8e22c1", - "metadata": {}, - "source": [ - "# Connect to the Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c479dad", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "cliend_id = 'frontend'\n", - "\n", - "# 1) Run with API layer - Director mTLS\n", - "# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface\n", - "# cert_chain = 'cert/root_ca.crt'\n", - "# API_certificate = 'cert/frontend.crt'\n", - "# API_private_key = 'cert/frontend.key'\n", - "\n", - "# federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50051', disable_tls=False,\n", - "# cert_chain=cert_chain, api_cert=API_certificate, api_private_key=API_private_key)\n", - "\n", - "# --------------------------------------------------------------------------------------------------------------------\n", - "\n", - "# 2) Run with TLS disabled (trusted environment)\n", - "# Federation can also determine local fqdn automatically\n", - "federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50051', tls=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "466edd2c", - "metadata": {}, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "381a2e96", - "metadata": {}, - "outputs": [], - "source": [ - "federation.target_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d72d9aef", - "metadata": {}, - "outputs": [], - "source": [ - "# First, request a dummy_shard_desc that holds information about the federated dataset \n", - "dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)\n", - "sample, target = dummy_shard_desc.get_dataset(dataset_type='')[0]" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-tyler", - "metadata": {}, - "source": [ - "## Creating a FL experiment using Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9001b56e", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" - ] - }, - { - "cell_type": "markdown", - "id": "sustainable-public", - "metadata": {}, - "source": [ - "### Register dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41ba9a55", - "metadata": {}, - "outputs": [], - "source": [ - "from tensorflow.keras.utils import Sequence\n", - "\n", - "class DataGenerator(Sequence):\n", - "\n", - " def __init__(self, dataset, batch_size):\n", - " self.dataset = dataset\n", - " self.batch_size = batch_size\n", - " self.on_epoch_end()\n", - "\n", - " def __len__(self):\n", - " return len(self.dataset) // self.batch_size\n", - "\n", - " def __getitem__(self, index):\n", - " return self.dataset[index * self.batch_size:(index + 1) * self.batch_size]\n", - "\n", - "# Now you can implement you data loaders using dummy_shard_desc\n", - "class NextWordSD(DataInterface):\n", - "\n", - " def __init__(self, train_val_split=0.8, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.train_val_split = train_val_split\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " return self._shard_descriptor\n", - "\n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - "\n", - " def __getitem__(self, index):\n", - " return self.shard_descriptor[index]\n", - "\n", - " def __len__(self):\n", - " return len(self.shard_descriptor)\n", - "\n", - " def get_train_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks with optimizer in contract\n", - " \"\"\"\n", - " if self.kwargs['train_bs']:\n", - " batch_size = self.kwargs['train_bs']\n", - " else:\n", - " batch_size = 64\n", - "\n", - " self.train_dataset = self.shard_descriptor.get_dataset('train', self.train_val_split)\n", - " return DataGenerator(self.train_dataset, batch_size=batch_size)\n", - "\n", - " def get_valid_loader(self):\n", - " \"\"\"\n", - " Output of this method will be provided to tasks without optimizer in contract\n", - " \"\"\"\n", - " if self.kwargs['valid_bs']:\n", - " batch_size = self.kwargs['valid_bs']\n", - " else:\n", - " batch_size = 512\n", - "\n", - " self.val_dataset = self.shard_descriptor.get_dataset('val', self.train_val_split)\n", - " return DataGenerator(self.val_dataset, batch_size=batch_size)\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.train_dataset)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"\n", - " Information for aggregation\n", - " \"\"\"\n", - " return len(self.val_dataset)\n" - ] - }, - { - "cell_type": "markdown", - "id": "caring-distinction", - "metadata": {}, - "source": [ - "### Describe a model and optimizer\n", - "#### Sequential API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3524931d", - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from tensorflow.keras.layers import LSTM, Dense\n", - "from tensorflow.keras.legacy.optimizers import Adam\n", - "from tensorflow.keras.metrics import TopKCategoricalAccuracy\n", - "from tensorflow.keras.losses import CategoricalCrossentropy\n", - "from tensorflow.keras.models import Sequential\n", - "\n", - "model = Sequential()\n", - "model.add(LSTM(1000, return_sequences=True))\n", - "model.add(LSTM(1000))\n", - "model.add(Dense(1000, activation='tanh'))\n", - "model.add(Dense(10719, activation='softmax'))\n", - "\n", - "optimizer = Adam(learning_rate=0.001)\n", - "loss_fn = CategoricalCrossentropy()\n", - "train_acc_metric = TopKCategoricalAccuracy(k=10)\n", - "val_acc_metric = TopKCategoricalAccuracy(k=10)\n", - "\n", - "batch_size = 64\n", - "model.build(input_shape=[batch_size, 3, 96])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b26d13e", - "metadata": {}, - "outputs": [], - "source": [ - "fed_dataset = NextWordSD(train_bs=64, valid_bs=512)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-groove", - "metadata": {}, - "source": [ - "### Define and register FL tasks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e326d8f", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "# https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit\n", - "@TI.register_fl_task(model='model', data_loader='train_loader', device='device', optimizer='optimizer')\n", - "def train(model, train_loader, device, optimizer):\n", - "\n", - " # Iterate over the batches of the dataset.\n", - " for step, (x_batch_train, y_batch_train) in enumerate(train_loader):\n", - "\n", - " y = tf.convert_to_tensor(y_batch_train)\n", - " with tf.GradientTape() as tape:\n", - " y_pred = model(x_batch_train, training=True) # Forward pass\n", - " # Compute the loss value\n", - " # (the loss function is configured in `compile()`)\n", - " loss = loss_fn(y, y_pred)\n", - "\n", - " # Compute gradients\n", - " trainable_vars = model.trainable_variables\n", - " gradients = tape.gradient(loss, trainable_vars)\n", - "\n", - " # Update weights\n", - " optimizer.apply_gradients(zip(gradients, trainable_vars))\n", - "\n", - " # Update metrics\n", - " train_acc_metric.update_state(y, y_pred)\n", - " \n", - " # Reset training metrics at the end of each epoch\n", - " train_acc = train_acc_metric.result()\n", - " train_acc_metric.reset_states()\n", - " return {'train_acc': train_acc, 'loss': loss}\n", - "\n", - "\n", - "@TI.register_fl_task(model='model', data_loader='val_loader', device='device')\n", - "def validate(model, val_loader, device=''):\n", - " for x_batch_val, y_batch_val in val_loader:\n", - " y = tf.convert_to_tensor(y_batch_val)\n", - " # Compute predictions\n", - " y_pred = model(x_batch_val, training=False)\n", - " # Update the metrics.\n", - " val_acc_metric.update_state(y, y_pred)\n", - " val_acc = val_acc_metric.result()\n", - " val_acc_metric.reset_states()\n", - " return {'validation_accuracy': val_acc}\n" - ] - }, - { - "cell_type": "markdown", - "id": "caroline-passion", - "metadata": {}, - "source": [ - "#### Register model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "145676d1", - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin'\n", - "MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", - "# Save the initial model state\n", - "initial_model = deepcopy(model)" - ] - }, - { - "cell_type": "markdown", - "id": "derived-bride", - "metadata": {}, - "source": [ - "## Time to start a federated learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "df9d0e68", - "metadata": {}, - "outputs": [], - "source": [ - "# create an experimnet in federation\n", - "experiment_name = 'word_prediction_test_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,serializer_plugin='openfl.plugins.interface_serializer.keras_serializer.KerasSerializer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fdb59e1", - "metadata": {}, - "outputs": [], - "source": [ - "# If I use autoreload I got a pickling error\n", - "\n", - "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=fed_dataset,\n", - " rounds_to_train=20,\n", - " opt_treatment='RESET')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92510227", - "metadata": {}, - "outputs": [], - "source": [ - "# If user want to stop IPython session, then reconnect and check how experiment is going \n", - "# fl_experiment.restore_experiment_state(MI)\n", - "\n", - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "e9e0c0a8", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Testing the best model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b55ddee", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r ../envoy/sd_requirements.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b23a6ec6", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.insert(1, '../envoy')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6bbdfedc", - "metadata": {}, - "outputs": [], - "source": [ - "from shard_descriptor import NextWordShardDescriptor\n", - "\n", - "# https://www.gutenberg.org/files/2892/2892-h/2892-h.htm\n", - "fed_dataset = NextWordSD(train_bs=64, valid_bs=512, train_val_split=0)\n", - "fed_dataset.shard_descriptor = NextWordShardDescriptor(title='Irish Fairy Tales', author='James Stephens')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "821b69a7", - "metadata": {}, - "outputs": [], - "source": [ - "best_model = fl_experiment.get_best_model()\n", - "\n", - "# We remove data from director\n", - "fl_experiment.remove_experiment_data()\n", - "\n", - "# Validating initial model\n", - "validate(initial_model, fed_dataset.get_valid_loader())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c75b1f5", - "metadata": {}, - "outputs": [], - "source": [ - "# Validating trained model\n", - "validate(best_model, fed_dataset.get_valid_loader())" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/workspace/requirements.txt deleted file mode 100644 index 906c8971fd..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/Tensorflow_Word_Prediction/workspace/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy==1.22.2 -tensorflow==2.13 diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/README.md b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/README.md deleted file mode 100644 index 966d81073c..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/README.md +++ /dev/null @@ -1,72 +0,0 @@ -# JAX based Linear Regression Tutorial - -### 1. About dataset - -Generate a random regression problem using `make_regression` from sklearn.datasets with pre-defined parameters. - -Define the below param in envoy.yaml config to shard the dataset across participants/envoy. -- rank_worldsize - - -### 2. About model - -Simple Regression Model with XLA compiled and Auto-grad based parameter updates. - - -### 3. How to run this tutorial (without TLC and locally as a simulation): - -1. Run director: - -```sh -cd director_folder -./start_director.sh -``` - -2. Run envoy: - -Step 1: Activate virtual environment and install packages -``` -cd envoy_folder -pip install -r requirements.txt -``` -Step 2: start the envoy -```sh -./start_envoy.sh env_instance_1 envoy_config_1.yaml -``` - -Optional: start second envoy: - -- Copy `envoy_folder` to another place and follow the same process as above: - -```sh -./start_envoy.sh env_instance_2 envoy_config_2.yaml -``` - -3. Run `jax_linear_regression.ipynb` jupyter notebook: - -```sh -cd workspace -jupyter lab jax_linear_regression.ipynb -``` - -4. Visualization - -``` -tensorboard --logdir logs/ -``` - - -### 4. Known issues - -1. ##### CUDA_ERROR_OUT_OF_MEMORY Exception - JAX XLA pre-allocates 90% of the GPU at start - -- Below flag to restrict max GPU allocation to 50% -``` -%env XLA_PYTHON_CLIENT_MEM_FRACTION=.5 -``` -OR - -- set XLA_PYTHON_CLIENT_PREALLOCATE to start with a small footprint. -``` -%env XLA_PYTHON_CLIENT_PREALLOCATE=false -``` \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/director/director_config.yaml deleted file mode 100644 index d22b4b7766..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['1'] # Modify this param if experimenting with `n_features` of shard_descriptor. - target_shape: ['1'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/envoy_config_1.yaml b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/envoy_config_1.yaml deleted file mode 100644 index 8f35387ca8..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/envoy_config_1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: regression_shard_descriptor.RegressionShardDescriptor - params: - rank_worldsize: 1, 2 \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/regression_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/regression_shard_descriptor.py deleted file mode 100644 index 2a9e1f468f..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/regression_shard_descriptor.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (C) 2020-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Noisy-Sin Shard Descriptor.""" - -from typing import List - -import jax.numpy as jnp -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class RegressionShardDescriptor(ShardDescriptor): - """Regression Shard descriptor class.""" - - def __init__(self, rank_worldsize: str = '1, 1', **kwargs) -> None: - """ - Initialize Regression Data Shard Descriptor. - - This Shard Descriptor generate random regression data with some gaussian centered noise - using make_regression method from sklearn.datasets. - Shards data across participants using rank and world size. - """ - - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - X_train, y_train, X_test, y_test = self.generate_data() - self.data_by_type = { - 'train': jnp.concatenate((X_train, y_train[:, None]), axis=1), - 'val': jnp.concatenate((X_test, y_test[:, None]), axis=1) - } - - def generate_data(self): - """Generate regression dataset with predefined params.""" - x, y = make_regression(n_samples=1000, n_features=1, noise=14, random_state=24) - X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=24) - self.data = jnp.concatenate((x, y[:, None]), axis=1) - return X_train, y_train, X_test, y_test - - def get_shard_dataset_types(self) -> List[str]: - """Get available shard dataset types.""" - return list(self.data_by_type) - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - if dataset_type not in self.data_by_type: - raise Exception(f'Incorrect dataset type: {dataset_type}') - - if dataset_type in ['train', 'val']: - return self.data_by_type[dataset_type][self.rank - 1::self.worldsize] - else: - raise ValueError - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - (*x, _) = self.data[0] - return [str(i) for i in jnp.array(x, ndmin=1).shape] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - (*_, y) = self.data[0] - return [str(i) for i in jnp.array(y, ndmin=1).shape] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Regression dataset, shard number {self.rank}' - f' out of {self.worldsize}') diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/requirements.txt deleted file mode 100644 index 9af7b85004..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -jax==0.3.13 -jaxlib==0.3.10 -mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability -openfl==1.3 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/start_envoy.sh deleted file mode 100755 index 4da07821af..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/workspace/JAX_linear_regression.ipynb b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/workspace/JAX_linear_regression.ipynb deleted file mode 100644 index e2562e1349..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/workspace/JAX_linear_regression.ipynb +++ /dev/null @@ -1,536 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "6705776d-d264-493d-9696-7623231e558a", - "metadata": {}, - "source": [ - "# JAX Regression Example - Interactive API" - ] - }, - { - "cell_type": "markdown", - "id": "de0fb48a-e902-4c5a-a7b7-39728f845707", - "metadata": {}, - "source": [ - "Run this jupyter notebook on a virtual environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd66b4cd-03c2-4dc6-9113-0c3fc309e5db", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install jax==0.3.13 jaxlib==0.3.10 -q" - ] - }, - { - "cell_type": "markdown", - "id": "8b95eeb4-9a6b-401d-9d35-2deaa4b51860", - "metadata": {}, - "source": [ - "GPU version of JAX. Pick the jax version compatible with the CUDA and cuDNN pre-installed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e0e9cff-71f9-4710-87ae-cc5c37925008", - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install --upgrade pip # Careful with the pip upgrade, it may cause a package dependency related problems during OpenFL workflow execution.\n", - "\n", - "# Installs the wheel compatible with CUDA 11 and cuDNN 8.2 or newer.\n", - "# Note: wheels only available on linux.\n", - "# !pip install --upgrade \"jax[cuda]\" -f https://storage.googleapis.com/jax-releases/jax_releases.html\n", - "\n", - "# Installs the wheel compatible with Cuda >= 11.4 and cudnn >= 8.2\n", - "# !pip install \"jax[cuda11_cudnn82]\" -f https://storage.googleapis.com/jax-releases/jax_releases.html\n", - "\n", - "# Installs the wheel compatible with Cuda >= 11.1 and cudnn >= 8.0.5\n", - "# !pip install \"jax[cuda11_cudnn805]\" -f https://storage.googleapis.com/jax-releases/jax_releases.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b3fcc5b-ee99-4ea8-95e8-6ce08db00809", - "metadata": {}, - "outputs": [], - "source": [ - "# Without either of the below flags, JAX XLA raised CUDA_OUT_OF_MEMORY exception.\n", - "# JAX XLA pre-allocates 90% of the GPU at start\n", - "\n", - "# Below flag to restrict max GPU allocation to 50%\n", - "%env XLA_PYTHON_CLIENT_MEM_FRACTION=.5\n", - "\n", - "# OR\n", - "\n", - "# set XLA_PYTHON_CLIENT_PREALLOCATE to false to incrementally allocate GPU memory as and when required. But can take entire GPU by the end.\n", - "# %env XLA_PYTHON_CLIENT_PREALLOCATE=false\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9008000e-fe07-4ff5-91cf-facb2decc560", - "metadata": {}, - "outputs": [], - "source": [ - "# Mandatory imports for Federation\n", - "\n", - "import jax\n", - "import jax.numpy as jnp\n", - "import time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d292230e-bc1d-423d-836d-b4630a1c4042", - "metadata": {}, - "outputs": [], - "source": [ - "# Both the MSE function are optimal and accurate in terms of correctness.\n", - "\n", - "# Calculate MSE approach 1\n", - "def mse_loss_function1(W, X, y):\n", - " y_pred = jnp.dot(X, W)\n", - " mse_error = y_pred - y\n", - " return jnp.mean(jnp.square(mse_error))\n", - "\n", - "# Calculate MSE approach 2\n", - "def mse_loss_function2(W, X, Y):\n", - " def squared_error(x, y):\n", - " y_pred = jnp.dot(x, W)\n", - " return jnp.inner(y-y_pred, y-y_pred)\n", - " vectorized_square_error = jax.vmap(squared_error)\n", - " return jnp.mean(vectorized_square_error(X, Y), axis=0)\n", - "\n", - "# Weight update, JAX compiled function. Consequent executions are way faster!!!.\n", - "def update(W, x, y, lr):\n", - " W = W - lr * jax.grad(mse_loss_function1)(W, x, y)\n", - " return W" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cdf9a57-29c4-4787-bbd4-97553187b354", - "metadata": {}, - "outputs": [], - "source": [ - "class LinearRegression:\n", - " def __init__(self, n_feat: int) -> None:\n", - " self.weights = jnp.ones(n_feat)\n", - " \n", - " def mse(self, X, y) -> float:\n", - " return mse_loss_function1(self.weights, X, y)\n", - " \n", - " def predict(self, X):\n", - " return jnp.dot(X, self.weights)\n", - " \n", - " def fit(self, X, Y, n_epochs : int, learning_rate : int, silent : bool) -> None:\n", - " \n", - " # Speed up weight updates with consecutive calls to jitted `update` function.\n", - " update_weights = jax.jit(update)\n", - " \n", - " start_time = time.time()\n", - " print('Training Loss at start: ', self.mse(X, Y))\n", - " for i in range(n_epochs):\n", - " self.weights = update_weights(self.weights, X, Y, learning_rate)\n", - " if i % int(n_epochs/10) == 0 and not silent:\n", - " print(str(i), 'Training Loss: ', self.mse(X, Y))\n", - "\n", - " print(\"--- %s seconds ---\" % (time.time() - start_time))\n", - "\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "ffd4d2d7-5537-496a-88c1-301da87d979c", - "metadata": { - "tags": [] - }, - "source": [ - "# JAX Linear Regression with federation" - ] - }, - { - "cell_type": "markdown", - "id": "09cf7090-da51-4f4e-9d28-2a5c6e3bca02", - "metadata": { - "tags": [] - }, - "source": [ - "## Connect to a Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b3c0039-e1f7-4047-b98b-a2d4bd42f015", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50050\n", - "\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7815120e-b704-4a7d-a65a-3c7542023ead", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "markdown", - "id": "b011dd95-64a7-4a8b-91ec-e61cdf885bbb", - "metadata": {}, - "source": [ - "### Initialize Data Interface" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1985ac9-a2b1-4561-a962-6adfe35c3b97", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "\n", - "class LinearRegressionDataSet(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " \"\"\"Initialize DataLoader.\"\"\"\n", - " self.kwargs = kwargs\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " \"\"\"Return shard descriptor.\"\"\"\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - " \n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self.train_set = shard_descriptor.get_dataset('train')\n", - " self.val_set = shard_descriptor.get_dataset('val')\n", - " \n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks with optimizer in contract.\"\"\"\n", - " return self.train_set\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks without optimizer in contract.\"\"\"\n", - " return self.val_set\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.val_set)\n", - " \n", - "lin_reg_dataset = LinearRegressionDataSet()" - ] - }, - { - "cell_type": "markdown", - "id": "b8909127-99d1-4dba-86fe-01a1b86585e7", - "metadata": {}, - "source": [ - "### Define Model Interface" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9523c9a2-a259-461f-937f-1fb054bd2886", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'custom_adapter.CustomFrameworkAdapter'\n", - "\n", - "# LinearRegression class accepts a parameter n_features. Should be same as `sample_shape` from `director_config.yaml`\n", - "fed_model = LinearRegression(1)\n", - "MI = ModelInterface(model=fed_model, optimizer=None, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = LinearRegression(1)" - ] - }, - { - "cell_type": "markdown", - "id": "2e3558bb-b21b-48ac-b07e-43cf75e6907b", - "metadata": {}, - "source": [ - "### Register Tasks\n", - "We need to employ a trick reporting metrics. OpenFL decides which model is the best based on an *increasing* metric." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f73e1ff9-d54a-49b5-9ce8-8bc72c6a2c6f", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "@TI.add_kwargs(**{'lr': 0.01,\n", - " 'epochs': 101})\n", - "@TI.register_fl_task(model='my_model', data_loader='train_data', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(my_model, train_data, optimizer, device, lr, epochs):\n", - " X, Y = train_data[:,:-1], train_data[:,-1]\n", - " my_model.fit(X, Y, epochs, lr, silent=False)\n", - " return {'train_MSE': my_model.mse(X, Y),}\n", - "\n", - "@TI.register_fl_task(model='my_model', data_loader='val_data', device='device')\n", - "def validate(my_model, val_data, device):\n", - " X, Y = val_data[:,:-1], val_data[:,-1] \n", - " return {'validation_MSE': my_model.mse(X, Y),}" - ] - }, - { - "cell_type": "markdown", - "id": "ee7659cc-6e03-43f5-9078-95707fa0e4d5", - "metadata": { - "tags": [] - }, - "source": [ - "### Run the federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "749100e8-05ce-418c-a980-545e3beb900b", - "metadata": {}, - "outputs": [], - "source": [ - "experiment_name = 'jax_linear_regression_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16bf1df7-8ca8-4a5e-a833-47c265c11e05", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.start(model_provider=MI,\n", - " task_keeper=TI,\n", - " data_loader=lin_reg_dataset,\n", - " rounds_to_train=2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1178d1ea-05e6-46be-ac07-21620bd6ec76", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "6f23cac5-caec-4161-b4b2-dddceb3eab80", - "metadata": { - "tags": [] - }, - "source": [ - "# JAX Linear Regression without federation (Optional Simulation)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "94be17d7-90c9-4e5f-aeb1-102271826370", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install matplotlib scikit-learn -q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f5ced27-a808-4eca-be6e-d8b62929f32b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Imports for running JAX Linear Regression example without OpenFL.\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "%matplotlib inline\n", - "from matplotlib.pylab import rcParams\n", - "rcParams['figure.figsize'] = 7, 5\n", - "\n", - "from jax import make_jaxpr\n", - "from sklearn.datasets import make_regression\n", - "from sklearn.model_selection import train_test_split" - ] - }, - { - "cell_type": "markdown", - "id": "58128c30-7865-4412-b4fd-86b0fe53f1a7", - "metadata": { - "tags": [] - }, - "source": [ - "#### Simple Linear Regression\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "140705fc-4af3-4668-a26b-2ebe2d42575e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# create a dataset with n_features\n", - "X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)\n", - "\n", - "# Train test split - Default 0.75/0.25\n", - "X, X_test, y, y_test = train_test_split(X, y, random_state=42)" - ] - }, - { - "cell_type": "markdown", - "id": "a2665d0b-78ce-4caf-a4ca-a9decffaa96f", - "metadata": {}, - "source": [ - "Visualize data distribution" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68f5afbf-e010-4d4a-bb92-46de06d39866", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "_ = plt.scatter(X, y)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ebe555dd-9e8d-4e80-bfde-87e3cafac14c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "_ = plt.scatter(X_test, y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68a7ddb3-166e-447f-a8f1-881511cfbd9f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# JAX logical execution plan\n", - "print(jax.make_jaxpr(update)(jnp.ones(X.shape[1]), X, y, 0.01))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "574bac29-b5c2-4f41-8655-e98450fdef8d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# X.shape -> (n_samples, n_features)\n", - "\n", - "lr_model = LinearRegression(X.shape[1])\n", - "lr = 0.01\n", - "epochs = 101\n", - "\n", - "print(f\"Initial Test MSE: {lr_model.mse(X_test,y_test)}\")\n", - "\n", - "# silent: logging verbosity\n", - "lr_model.fit(X,y, epochs, lr, silent=False)\n", - "\n", - "print(f\"Final Test MSE: {lr_model.mse(X_test,y_test)}\")\n", - "\n", - "print(f\"Final parameters: {lr_model.weights}\")" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/workspace/custom_adapter.py b/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/workspace/custom_adapter.py deleted file mode 100644 index 35345dfaec..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/jax_linear_regression/workspace/custom_adapter.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2020-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Custom model numpy adapter.""" - -from openfl.plugins.frameworks_adapters.framework_adapter_interface import ( - FrameworkAdapterPluginInterface, -) - - -class CustomFrameworkAdapter(FrameworkAdapterPluginInterface): - """Framework adapter plugin class.""" - - @staticmethod - def get_tensor_dict(model, optimizer=None): - """Extract tensors from a model.""" - return {'w': model.weights} - - @staticmethod - def set_tensor_dict(model, tensor_dict, optimizer=None, device='cpu'): - """Load tensors to a model.""" - model.weights = tensor_dict['w'] diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/README.md b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/README.md deleted file mode 100644 index 8a2c360898..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Linear Regression with Numpy and OpenFL - -This example is devoted to demonstrating several techniques of working with OpenFL. - -1. Envoy workspace contains a Shard Descriptor designed to generate 1-dimensional noisy data for linear regression of sinusoid. The random seed for generation for a specific Envoy is parametrized by the `rank` argument in shard_config. -2. The LinReg frontend jupyter notebook (data scientist's entry point) features a simple numpy-based model for linear regression trained with Ridge regularization. -3. The data scientist's workspace also contains a custom framework adapter allowing extracting and setting weights to the custom model. -4. The start_federation notebook provides shortcut methods to start a Federation with an arbitrary number of Envoys with different datasets. It may save time for people willing to conduct one-node experiments. -5. The SingleNotebook jupyter notebook combines two aforementioned notebooks and allows to run the whole pipeline in Google colab. Besides previously mentioned components, it contains scripts for pulling the OpenFL repo with the example workspaces and installing dependencies. - -## How to use this example -### Locally: -1. Start a Federation -Distributed experiments: -Use OpenFL CLI to start the Director and Envoy services from corresponding folders. -Single-node experiments: -Users may use the same path or benefit from the start_federation notebook in the workspace folder - -2. Submit an experiment -Follow LinReg jupyter notebook. - -### Google Colab: - -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/intel/openfl/blob/develop/openfl-tutorials/interactive_api/numpy_linear_regression/workspace/SingleNotebook.ipynb) diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/director/director_config.yaml deleted file mode 100644 index 478cd5cbc0..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50049 - sample_shape: ['1'] - target_shape: ['1'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/envoy_config.yaml deleted file mode 100644 index 107c566945..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/envoy_config.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: linreg_shard_descriptor.LinRegSD - params: - rank: 1 - n_samples: 80 - noise: 0.15 - \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/linreg_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/linreg_shard_descriptor.py deleted file mode 100644 index 12bfed9df4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/linreg_shard_descriptor.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Noisy-Sin Shard Descriptor.""" - -from typing import List - -import numpy as np - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class LinRegSD(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, rank: int, n_samples: int = 10, noise: float = 0.15) -> None: - """ - Initialize LinReg Shard Descriptor. - - This Shard Descriptor generate random data. Sample features are - floats between pi/3 and 5*pi/3, and targets are calculated - calculated as sin(feature) + normal_noise. - """ - np.random.seed(rank) # Setting seed for reproducibility - self.n_samples = max(n_samples, 5) - self.interval = 240 - self.x_start = 60 - x = np.random.rand(n_samples, 1) * self.interval + self.x_start - x *= np.pi / 180 - y = np.sin(x) + np.random.normal(0, noise, size=(n_samples, 1)) - self.data = np.concatenate((x, y), axis=1) - - def get_dataset(self, dataset_type: str) -> np.ndarray: - """ - Return a shard dataset by type. - - A simple list with elements (x, y) implemets the Shard Dataset interface. - """ - if dataset_type == 'train': - return self.data[:self.n_samples // 2] - elif dataset_type == 'val': - return self.data[self.n_samples // 2:] - else: - pass - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - (*x, _) = self.data[0] - return [str(i) for i in np.array(x, ndmin=1).shape] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - (*_, y) = self.data[0] - return [str(i) for i in np.array(y, ndmin=1).shape] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return 'Allowed dataset types are `train` and `val`' diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/requirements.txt deleted file mode 100644 index fb452e0913..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/envoy/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability -numpy -openfl==1.2.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/LinReg.ipynb b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/LinReg.ipynb deleted file mode 100644 index 3e4131d3e2..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/LinReg.ipynb +++ /dev/null @@ -1,485 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "689ee822", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "id": "d63e64c6-9955-4afc-8d04-d8c85bb28edc", - "metadata": {}, - "source": [ - "# Linear Regression with Numpy and OpenFL" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c9eee14-22a1-4d48-a7da-e68d01037cd4", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List, Union\n", - "import numpy as np\n", - "import random\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "from matplotlib.pylab import rcParams\n", - "rcParams['figure.figsize'] = 7, 5" - ] - }, - { - "cell_type": "markdown", - "id": "c4b334ef-6a72-4b82-b978-1401973d0512", - "metadata": { - "tags": [] - }, - "source": [ - "# We will use MSE as loss function and Ridge weights regularization\n", - "![image.png](https://www.analyticsvidhya.com/wp-content/uploads/2016/01/eq5-1.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4cc8ec2-b818-4db8-8700-39c1a12917df", - "metadata": {}, - "outputs": [], - "source": [ - "class LinRegLasso:\n", - " def __init__(self, n_feat: int) -> None:\n", - " self.weights = np.ones((n_feat + 1)) # (n_feat + 1,) weights + bias\n", - " \n", - " def predict(self, feature_vector: Union[np.ndarray, List[int]]) -> float:\n", - " '''\n", - " feature_vector may be a list or have shape (n_feat,)\n", - " or it may be a bunch of vectors (n_vec, nfeat)\n", - " '''\n", - " feature_vector = np.array(feature_vector)\n", - " if len(feature_vector.shape) == 1:\n", - " feature_vector = feature_vector[:,np.newaxis]\n", - " assert feature_vector.shape[-1] == self.weights.shape[0] - 1, \\\n", - " f\"sample shape is {feature_vector.shape} and weights shape is f{self.weights}\"\n", - " \n", - " return self.weights @ np.concatenate((feature_vector.T, [[1]*feature_vector.shape[0]]))\n", - " \n", - " def mse(self, X: np.ndarray, Y: np.ndarray) -> float:\n", - " Y_hat = self.predict(X)\n", - " return np.sum((Y - Y_hat)**2) / Y.shape[0]\n", - "\n", - " def _update_weights(self, X: np.ndarray, Y: np.ndarray, lr: float, wd: float) -> None:\n", - " '''\n", - " X: (n_samples, n_features)\n", - " Y: (n_samples,)\n", - " self.weights: (n_features + 1)\n", - " \n", - " Cost function is MSE: (y - W*X - b)**2;\n", - " its derivative with resp to any x is -2*X*(y - W*X - b),\n", - " and with resp to b is -2*(y - W*X - b).\n", - " \n", - " Regularisation function is L1 |W|;\n", - " its derivative is SIGN(w)\n", - " '''\n", - " predictions = self.predict(X)\n", - " error = Y - predictions # (n_samples,)\n", - " X_with_bias = np.concatenate((X.T, [[1]*X.shape[0]])).T\n", - " updates = -2 * X_with_bias.T @ error / Y.shape[0]\n", - " regression_term = np.sign(self.weights)\n", - " \n", - " self.weights = self.weights - lr * updates + wd * regression_term\n", - " \n", - " def fit(self, X: np.ndarray, Y: np.ndarray,\n", - " n_epochs: int, lr: float, wd: float,\n", - " silent: bool=False) -> None:\n", - " for i in range(n_epochs):\n", - " self._update_weights(X, Y, lr, wd)\n", - " mse = self.mse(X, Y)\n", - " if not silent:\n", - " print(f'epoch: {i}, \\t MSE: {mse}')\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af89e7e5-6cfc-46bc-acd2-7d5bfb373091", - "metadata": {}, - "outputs": [], - "source": [ - "# Define input array with angles from 60deg to 300deg converted to radians\n", - "x = np.array([i*np.pi/180 for i in range(60,300,4)])\n", - "np.random.seed(10) # Setting seed for reproducibility\n", - "y = np.sin(x) + np.random.normal(0,0.15,len(x))\n", - "# plt.plot(x,y,'.')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ffefca2b-d7f6-4111-8872-c017c182a2de", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "lr_model = LinRegLasso(1)\n", - "wd = 0.0001\n", - "lr = 0.08\n", - "epochs = 100\n", - "\n", - "print(f\"Initila MSE: {lr_model.mse(x,y)}\")\n", - "lr_model.fit(x[:,np.newaxis],y, epochs, lr, wd, silent=True)\n", - "print(f\"Final MSE: {lr_model.mse(x,y)}\")\n", - "print(f\"Final parameters: {lr_model.weights}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "410f2d80-989a-43ab-958f-7b68fd8f2e90", - "metadata": {}, - "outputs": [], - "source": [ - "# We can also solve this 1D problem using Numpy\n", - "numpy_solution = np.polyfit(x,y,1)\n", - "predictor_np = np.poly1d(numpy_solution)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6cb323db-9f3a-42af-94da-4b170adef867", - "metadata": {}, - "outputs": [], - "source": [ - "y_hat = lr_model.predict(x)\n", - "y_np = predictor_np(x)\n", - "plt.plot(x,y,'.')\n", - "plt.plot(x,y_hat,'.')\n", - "plt.plot(x,y_np,'--')" - ] - }, - { - "cell_type": "markdown", - "id": "ffd4d2d7-5537-496a-88c1-301da87d979c", - "metadata": {}, - "source": [ - "# Now we run the same training on federated data" - ] - }, - { - "cell_type": "markdown", - "id": "09cf7090-da51-4f4e-9d28-2a5c6e3bca02", - "metadata": {}, - "source": [ - "## Connect to a Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b3c0039-e1f7-4047-b98b-a2d4bd42f015", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50049\n", - "\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7815120e-b704-4a7d-a65a-3c7542023ead", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "markdown", - "id": "b011dd95-64a7-4a8b-91ec-e61cdf885bbb", - "metadata": {}, - "source": [ - "### Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1985ac9-a2b1-4561-a962-6adfe35c3b97", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "\n", - "class LinRegDataSet(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " \"\"\"Initialize DataLoader.\"\"\"\n", - " self.kwargs = kwargs\n", - " pass\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " \"\"\"Return shard descriptor.\"\"\"\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self.train_set = shard_descriptor.get_dataset(\"train\")\n", - " self.val_set = shard_descriptor.get_dataset(\"val\")\n", - "\n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks with optimizer in contract.\"\"\"\n", - " return self.train_set\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks without optimizer in contract.\"\"\"\n", - " return self.val_set\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.val_set)\n", - " \n", - "lin_reg_dataset = LinRegDataSet()" - ] - }, - { - "cell_type": "markdown", - "id": "b8909127-99d1-4dba-86fe-01a1b86585e7", - "metadata": {}, - "source": [ - "### Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9523c9a2-a259-461f-937f-1fb054bd2886", - "metadata": {}, - "outputs": [], - "source": [ - "framework_adapter = 'custom_adapter.CustomFrameworkAdapter'\n", - "fed_model = LinRegLasso(1)\n", - "MI = ModelInterface(model=fed_model, optimizer=None, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = LinRegLasso(1)" - ] - }, - { - "cell_type": "markdown", - "id": "2e3558bb-b21b-48ac-b07e-43cf75e6907b", - "metadata": {}, - "source": [ - "### Tasks\n", - "We need to employ a trick reporting metrics. OpenFL decides which model is the best based on an *increasing* metric." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f73e1ff9-d54a-49b5-9ce8-8bc72c6a2c6f", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "@TI.add_kwargs(**{'lr': 0.001,\n", - " 'wd': 0.0001,\n", - " 'epoches': 1})\n", - "@TI.register_fl_task(model='my_model', data_loader='train_data', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(my_model, train_data, optimizer, device, lr, wd, epoches):\n", - " X, Y = train_data[:,:-1], train_data[:,-1]\n", - " my_model.fit(X, Y, epochs, lr, wd, silent=True)\n", - " return {'train_MSE': my_model.mse(X, Y),}\n", - "\n", - "@TI.register_fl_task(model='my_model', data_loader='val_data', device='device') \n", - "def validate(my_model, val_data, device):\n", - " X, Y = val_data[:,:-1], val_data[:,-1] \n", - " return {'validation_MSE': my_model.mse(X, Y),}" - ] - }, - { - "cell_type": "markdown", - "id": "ee7659cc-6e03-43f5-9078-95707fa0e4d5", - "metadata": { - "tags": [] - }, - "source": [ - "### Run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "749100e8-05ce-418c-a980-545e3beb900b", - "metadata": {}, - "outputs": [], - "source": [ - "experiment_name = 'linear_regression_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16bf1df7-8ca8-4a5e-a833-47c265c11e05", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=lin_reg_dataset,\n", - " rounds_to_train=10,)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1178d1ea-05e6-46be-ac07-21620bd6ec76", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "af331ccd-66b4-4925-8627-52cf03ceea5e", - "metadata": { - "tags": [] - }, - "source": [ - "### Optional: start tensorboard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fde4ed4d-dda5-4bab-8dd3-e1ac44f5acf9", - "metadata": {}, - "outputs": [], - "source": [ - "%%script /bin/bash --bg\n", - "tensorboard --host $(hostname --all-fqdns | awk '{print $1}') --logdir logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7aa78602-b66a-4378-bea9-e915f2a1fdd8", - "metadata": {}, - "outputs": [], - "source": [ - "last_model = fl_experiment.get_last_model()\n", - "best_model = fl_experiment.get_best_model()\n", - "print(best_model.weights)\n", - "print(last_model.weights)\n", - "print(f\"last model MSE: {last_model.mse(x,y)}\")\n", - "print(f\"best model MSE: {best_model.mse(x,y)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "ae66d688", - "metadata": {}, - "source": [ - "### Evaluate results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "573417e0", - "metadata": {}, - "outputs": [], - "source": [ - "n_cols = 20\n", - "n_samples = 4\n", - "interval = 240\n", - "x_start = 60\n", - "noise = 0.3\n", - "\n", - "X = None\n", - "\n", - "for rank in range(n_cols):\n", - " np.random.seed(rank) # Setting seed for reproducibility\n", - " x = np.random.rand(n_samples, 1) * interval + x_start\n", - " x *= np.pi / 180\n", - " X = x if X is None else np.vstack((X,x))\n", - " y = np.sin(x) + np.random.normal(0, noise, size=(n_samples, 1))\n", - " plt.plot(x,y,'+')\n", - " \n", - "X.sort() \n", - "Y_hat = last_model.predict(X)\n", - "plt.plot(X,Y_hat,'--')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "84e927c8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/SingleNotebook.ipynb b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/SingleNotebook.ipynb deleted file mode 100644 index d0de783101..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/SingleNotebook.ipynb +++ /dev/null @@ -1,810 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "4dd5da0c-1ae1-43e6-8ad9-360c8974476c", - "metadata": {}, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/intel/openfl/blob/develop/openfl-tutorials/interactive_api/numpy_linear_regression/workspace/SingleNotebook.ipynb)" - ] - }, - { - "cell_type": "markdown", - "id": "1637381d-84d0-4132-92c3-bf1a1e9c7f7a", - "metadata": {}, - "source": [ - "# Linear Regression with Numpy and OpenFL" - ] - }, - { - "cell_type": "markdown", - "id": "ee73e205-d273-4b6c-878a-5ea958bfe267", - "metadata": {}, - "source": [ - "### Preparations in colab:\n", - "1. Install OpenFL \n", - "2. Clone the OpenFL repository, it contains infrastructure configs for this example.\n", - "3. Change working directory " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6fafe9e", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install openfl\n", - "!git clone https://github.com/securefederatedai/openfl.git" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2698f1da-fa69-4543-bb15-c7c0dcb776b9", - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import os\n", - "\n", - "# Logging fix for Google Colab\n", - "log = logging.getLogger()\n", - "log.setLevel(logging.INFO)\n", - "\n", - "os.chdir('./openfl/openfl-tutorials/interactive_api/numpy_linear_regression/workspace')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fde856d8-da4e-4d2f-bee2-85e673050623", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List, Union\n", - "import numpy as np\n", - "import random\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "from matplotlib.pylab import rcParams\n", - "rcParams['figure.figsize'] = 7, 5" - ] - }, - { - "cell_type": "markdown", - "id": "72e76f9d", - "metadata": {}, - "source": [ - "# Define a linear model and train it locally\n", - "We start with training a linear model locally on a synthetic dataset so we have a baseline solution." - ] - }, - { - "cell_type": "markdown", - "id": "5e06d979-c582-4f44-b092-e5d60cce88bf", - "metadata": { - "tags": [] - }, - "source": [ - "## We will use MSE as loss function and Ridge weights regularization\n", - "![image.png](https://www.analyticsvidhya.com/wp-content/uploads/2016/01/eq5-1.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9a860ab-91a5-410e-9d1e-4b9bd5a33d70", - "metadata": {}, - "outputs": [], - "source": [ - "class LinRegLasso:\n", - " def __init__(self, n_feat: int) -> None:\n", - " self.weights = np.ones((n_feat + 1)) # (n_feat + 1,) weights + bias\n", - " \n", - " def predict(self, feature_vector: Union[np.ndarray, List[int]]) -> float:\n", - " '''\n", - " feature_vector may be a list or have shape (n_feat,)\n", - " or it may be a bunch of vectors (n_vec, nfeat)\n", - " '''\n", - " feature_vector = np.array(feature_vector)\n", - " if len(feature_vector.shape) == 1:\n", - " feature_vector = feature_vector[:,np.newaxis]\n", - " assert feature_vector.shape[-1] == self.weights.shape[0] - 1, \\\n", - " f\"sample shape is {feature_vector.shape} and weights shape is f{self.weights}\"\n", - " \n", - " return self.weights @ np.concatenate((feature_vector.T, [[1]*feature_vector.shape[0]]))\n", - " \n", - " def mse(self, X: np.ndarray, Y: np.ndarray) -> float:\n", - " Y_hat = self.predict(X)\n", - " return np.sum((Y - Y_hat)**2) / Y.shape[0]\n", - "\n", - " def _update_weights(self, X: np.ndarray, Y: np.ndarray, lr: float, wd: float) -> None:\n", - " '''\n", - " X: (n_samples, n_features)\n", - " Y: (n_samples,)\n", - " self.weights: (n_features + 1)\n", - " \n", - " Cost function is MSE: (y - W*X - b)**2;\n", - " its derivative with resp to any x is -2*X*(y - W*X - b),\n", - " and with resp to b is -2*(y - W*X - b).\n", - " \n", - " Regularisation function is L1 |W|;\n", - " its derivative is SIGN(w)\n", - " '''\n", - " predictions = self.predict(X)\n", - " error = Y - predictions # (n_samples,)\n", - " X_with_bias = np.concatenate((X.T, [[1]*X.shape[0]])).T\n", - " updates = -2 * X_with_bias.T @ error / Y.shape[0]\n", - " regression_term = np.sign(self.weights)\n", - " \n", - " self.weights = self.weights - lr * updates + wd * regression_term\n", - " \n", - " def fit(self, X: np.ndarray, Y: np.ndarray,\n", - " n_epochs: int, lr: float, wd: float,\n", - " silent: bool=False) -> None:\n", - " for i in range(n_epochs):\n", - " self._update_weights(X, Y, lr, wd)\n", - " mse = self.mse(X, Y)\n", - " if not silent:\n", - " print(f'epoch: {i}, \\t MSE: {mse}')\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "796b0de6-cb80-4ca6-91e9-503011d6851f", - "metadata": {}, - "outputs": [], - "source": [ - "# Define input array with angles from 60deg to 300deg converted to radians\n", - "noise=0.2\n", - "\n", - "x = np.array([i*np.pi/180 for i in range(60,300,4)])\n", - "np.random.seed(10) # Setting seed for reproducibility\n", - "y = np.sin(x) + np.random.normal(0, noise, len(x))\n", - "# plt.plot(x,y,'.')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "033a74b4-3bc2-4a19-b734-007ad8a4c037", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "lr_model = LinRegLasso(1)\n", - "wd = 0.0001\n", - "lr = 0.08\n", - "epochs = 100\n", - "\n", - "print(f\"Initila MSE: {lr_model.mse(x,y)}\")\n", - "lr_model.fit(x[:,np.newaxis],y, epochs, lr, wd, silent=True)\n", - "print(f\"Final MSE: {lr_model.mse(x,y)}\")\n", - "print(f\"Final parameters: {lr_model.weights}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25873a87-7564-4e4a-8ef5-79a9415b209f", - "metadata": {}, - "outputs": [], - "source": [ - "# We can also solve this 1D problem using Numpy\n", - "numpy_solution = np.polyfit(x,y,1)\n", - "predictor_np = np.poly1d(numpy_solution)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2e5b7a9b-4d4b-4222-8eef-4ef4ba63434a", - "metadata": {}, - "outputs": [], - "source": [ - "y_hat = lr_model.predict(x)\n", - "y_np = predictor_np(x)\n", - "plt.plot(x,y,'.')\n", - "plt.plot(x,y_hat,'.')\n", - "plt.plot(x,y_np,'--')" - ] - }, - { - "cell_type": "markdown", - "id": "cd2e26a5-9f4e-4011-a999-e428246aa8c1", - "metadata": {}, - "source": [ - "# Now we run the same training on federated data" - ] - }, - { - "cell_type": "markdown", - "id": "83378ece-9cd5-4d40-a134-24cf68bdb79a", - "metadata": { - "tags": [] - }, - "source": [ - "## 1. Start the Director service and several envoys with generated data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0d105a0-04c4-4c26-81c7-a350e14393c2", - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# Here are the main parameters for our Federation\n", - "n_cols=10 # Number of Envoys / Collaborators\n", - "n_samples_per_col=10\n", - "noise=0.2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0c3e78b-6e9d-4efc-9b30-3ddc413c0423", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from time import sleep\n", - "from typing import Dict, List, Union\n", - "import yaml" - ] - }, - { - "cell_type": "markdown", - "id": "463a2821-b657-4e12-90ac-33b7810c5ff4", - "metadata": {}, - "source": [ - "### Start the Director service" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e736d33f-5df2-4a2f-8210-f1feba9fd367", - "metadata": {}, - "outputs": [], - "source": [ - "cwd = Path.cwd()\n", - "director_workspace_path = Path('../director/').absolute()\n", - "director_config_file = director_workspace_path / 'director_config.yaml'\n", - "director_logfile = director_workspace_path / 'director.log'\n", - "if director_logfile.is_file(): director_logfile.unlink()\n", - "\n", - "os.environ['main_folder'] = str(cwd)\n", - "os.environ['director_workspace_path'] = str(director_workspace_path)\n", - "os.environ['director_logfile'] = str(director_logfile)\n", - "os.environ['director_config_file'] = str(director_config_file)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bb950328-c1e6-4062-8b36-b42486d60241", - "metadata": {}, - "outputs": [], - "source": [ - "%%script /bin/bash --bg\n", - "cd $director_workspace_path\n", - "fx director start --disable-tls -c $director_config_file > $director_logfile &\n", - "cd $main_folder" - ] - }, - { - "cell_type": "markdown", - "id": "223f0037-c87e-440d-b8df-8fe9211c34dc", - "metadata": { - "tags": [] - }, - "source": [ - "## Start Envoys" - ] - }, - { - "cell_type": "markdown", - "id": "f6deeee4-5dc8-433d-a4ea-c464c74b1b2b", - "metadata": {}, - "source": [ - "#### First, we create several envoy config files " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0e65a39-15f7-4cca-90bb-a2970b7be9f0", - "metadata": {}, - "outputs": [], - "source": [ - "# Read the original envoy config file content\n", - "with open(Path('../envoy/envoy_config.yaml'), \"r\") as stream:\n", - " orig_config = yaml.safe_load(stream)\n", - "\n", - "def generate_envoy_configs(config: Dict,\n", - " save_path: Union[str, Path] = '../envoy/',\n", - " n_cols: int = 10,\n", - " n_samples_per_col: int = 10,\n", - " noise: float = 0.15) -> List[Path]:\n", - " # Prevent installing requirements by Envoys as they will run in the same environment\n", - " config['params']['install_requirements'] = False\n", - "\n", - " # Pass parameters for Shard Descriptors so they can generate relevant datasets\n", - " config['shard_descriptor']['params']['n_samples'] = n_samples_per_col\n", - " config['shard_descriptor']['params']['noise'] = noise\n", - " \n", - " config_paths = [(Path(save_path) / f'{i}_envoy_config.yaml').absolute()\n", - " for i in range(1, n_cols + 1)]\n", - "\n", - " for i, path in enumerate(config_paths):\n", - " config['shard_descriptor']['params']['rank'] = i\n", - " with open(path, \"w\") as stream:\n", - " yaml.safe_dump(config, stream)\n", - " \n", - " return config_paths\n", - " \n", - "def remove_configs(config_paths):\n", - " for path in config_paths:\n", - " path.unlink()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "90109c5b-c785-4af7-ace9-dcd913018dca", - "metadata": {}, - "outputs": [], - "source": [ - "config_paths = generate_envoy_configs(orig_config,\n", - " n_cols=n_cols,\n", - " n_samples_per_col=n_samples_per_col,\n", - " noise=noise)\n", - "# remove_configs(config_paths)" - ] - }, - { - "cell_type": "markdown", - "id": "70c3078a-7beb-47c5-bcee-2de264ef3266", - "metadata": {}, - "source": [ - "#### Now start Envoy processes in a loop" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "843f698e-5582-4918-828c-cf095988da92", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# envoy_workspace_path = Path('../envoy/').absolute()\n", - "def start_envoys(config_paths: List[Path]) -> None:\n", - " envoy_workspace_path = config_paths[0].parent\n", - " cwd = Path.cwd()\n", - " os.chdir(envoy_workspace_path)\n", - " for i, path in enumerate(config_paths):\n", - " os.system(f'fx envoy start -n env_{i + 1} --disable-tls '\n", - " f'--envoy-config-path {path} -dh localhost -dp 50049 '\n", - " f'>env_{i + 1}.log &')\n", - " os.chdir(cwd)\n", - "\n", - "sleep(5)\n", - "\n", - "start_envoys(config_paths)\n", - "\n", - "sleep(25)" - ] - }, - { - "cell_type": "markdown", - "id": "6216f14c-78d8-444c-9144-ee8316d1487b", - "metadata": {}, - "source": [ - "## 2. Connect to the Director service of out Federation as Data scientist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9d3b764-cb86-4eec-ba8e-df119da7a27f", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50049\n", - "\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bed370b-d0c0-46bc-8114-ea8255b2478b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Data scientist may request a list of connected envoys\n", - "shard_registry = federation.get_shard_registry()\n", - "\n", - "# WARNING!\n", - "\n", - "# Make sure shard registry contains all the envoys you started!\n", - "# In other case try to run this cell again or reconnect to the Director (the cell above).\n", - "shard_registry" - ] - }, - { - "cell_type": "markdown", - "id": "c6401026-795f-491e-90cb-dd59b451df5f", - "metadata": {}, - "source": [ - "### Now we will prepare an FL experimnet using OpenFL Python API" - ] - }, - { - "cell_type": "markdown", - "id": "8166c689-9dde-4500-b05c-5b1ddf968978", - "metadata": {}, - "source": [ - "### Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55fb1a98-b44f-47ff-950d-5a40a1cca0d8", - "metadata": {}, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "\n", - "class LinRegDataSet(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " \"\"\"Initialize DataLoader.\"\"\"\n", - " self.kwargs = kwargs\n", - " pass\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " \"\"\"Return shard descriptor.\"\"\"\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self.train_set = shard_descriptor.get_dataset(\"train\")\n", - " self.val_set = shard_descriptor.get_dataset(\"val\")\n", - "\n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks with optimizer in contract.\"\"\"\n", - " return self.train_set\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks without optimizer in contract.\"\"\"\n", - " return self.val_set\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.val_set)\n", - " \n", - "lin_reg_dataset = LinRegDataSet()" - ] - }, - { - "cell_type": "markdown", - "id": "6233e1ed-a2f2-456f-9417-f35a2c27b236", - "metadata": {}, - "source": [ - "### Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "885a8530-6248-4060-a30a-45cdc79bc41a", - "metadata": {}, - "outputs": [], - "source": [ - "# You can inspect the framework adapter used in this example.\n", - "# It is a plug-in component allowing OpenFL to manage model parameters.\n", - "framework_adapter = 'custom_adapter.CustomFrameworkAdapter'\n", - "fed_model = LinRegLasso(1)\n", - "MI = ModelInterface(model=fed_model, optimizer=None, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = LinRegLasso(1)" - ] - }, - { - "cell_type": "markdown", - "id": "dc9da235-02a8-4e7a-9455-5fe2462aa317", - "metadata": { - "tags": [] - }, - "source": [ - "### Tasks\n", - "Using an Optimizer does not make sense for this experiment. Yet it is a required part of a training task contract in the current version of OpenFL, so we just pass None.\n", - "We need to employ a trick reporting metrics. OpenFL decides which model is the best based on an *increasing* metric." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e101689-8a63-4562-98ff-09443b1ab9f2", - "metadata": {}, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "@TI.add_kwargs(**{'lr': 0.001,\n", - " 'wd': 0.0001,\n", - " 'epoches': 1})\n", - "@TI.register_fl_task(model='my_model', data_loader='train_data', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(my_model, train_data, optimizer, device, lr, wd, epoches):\n", - " X, Y = train_data[:,:-1], train_data[:,-1]\n", - " my_model.fit(X, Y, epochs, lr, wd, silent=True)\n", - " return {'train_MSE': my_model.mse(X, Y),}\n", - "\n", - "@TI.register_fl_task(model='my_model', data_loader='val_data', device='device') \n", - "def validate(my_model, val_data, device):\n", - " X, Y = val_data[:,:-1], val_data[:,-1] \n", - " return {'validation_MSE': my_model.mse(X, Y),}" - ] - }, - { - "cell_type": "markdown", - "id": "40a4623e-6559-4d4c-b199-f9afe16c0bbd", - "metadata": { - "tags": [] - }, - "source": [ - "### Run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb357a88-7098-45b2-85f4-71fe2f2e0f82", - "metadata": {}, - "outputs": [], - "source": [ - "experiment_name = 'linear_regression_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db20124a-949d-4218-abfd-aaf4d0758284", - "metadata": {}, - "outputs": [], - "source": [ - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=lin_reg_dataset,\n", - " rounds_to_train=10,)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4909be2b-d23b-4356-b2af-10a212382d52", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# This method not only prints messages recieved from the director, \n", - "# but also saves logs in the tensorboard format (by default)\n", - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "markdown", - "id": "dd479019-1579-42c4-a446-f7d0a12596df", - "metadata": { - "tags": [] - }, - "source": [ - "### Optional: start tensorboard" - ] - }, - { - "cell_type": "markdown", - "id": "a6faaaea", - "metadata": {}, - "source": [ - "Running on your own machine locally, start tensorboard in background and open localhost:6006 in your browser:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5f4b673-e6b1-4bbe-8294-d2b61a65d40b", - "metadata": {}, - "outputs": [], - "source": [ - "%%script /bin/bash --bg\n", - "tensorboard --host $(hostname --all-fqdns | awk '{print $1}') --logdir logs" - ] - }, - { - "cell_type": "markdown", - "id": "68867a02", - "metadata": {}, - "source": [ - "In Google Colab you may use the inline extension " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3684b26", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext tensorboard\n", - "%tensorboard --logdir logs" - ] - }, - { - "cell_type": "markdown", - "id": "b4d27834-ec5b-4290-8c9d-4c3c5589a7e6", - "metadata": {}, - "source": [ - "### 3. Retrieve the trained model from the Director" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad915ab3-0032-4a06-b2c0-00710585e24d", - "metadata": {}, - "outputs": [], - "source": [ - "last_model = fl_experiment.get_last_model()\n", - "best_model = fl_experiment.get_best_model()\n", - "print(best_model.weights)\n", - "print(last_model.weights)\n", - "print(f\"last model MSE: {last_model.mse(x,y)}\")\n", - "print(f\"best model MSE: {best_model.mse(x,y)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "1930789e-b7b5-415e-844d-14ccc3844482", - "metadata": {}, - "source": [ - "## Lets see what does the unified dataset look like\n", - "And see how the trained model performs.\n", - "Note: dots of the same colour belong to the same Envoy's dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c51e8a56-d2f1-4758-a5a1-6d6652e4355e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "n_cols = n_cols\n", - "n_samples = n_samples_per_col\n", - "interval = 240\n", - "x_start = 60\n", - "noise = noise\n", - "\n", - "X = None\n", - "\n", - "for rank in range(n_cols):\n", - " np.random.seed(rank) # Setting seed for reproducibility\n", - " x = np.random.rand(n_samples, 1) * interval + x_start\n", - " x *= np.pi / 180\n", - " X = x if X is None else np.vstack((X,x))\n", - " y = np.sin(x) + np.random.normal(0, noise, size=(n_samples, 1))\n", - " plt.plot(x,y,'+')\n", - " \n", - "X.sort() \n", - "Y_hat = last_model.predict(X)\n", - "plt.plot(X,Y_hat,'--')" - ] - }, - { - "cell_type": "markdown", - "id": "9e365766-4ea6-40bc-96ae-a183274e8b8c", - "metadata": {}, - "source": [ - "## Cleaning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e5d793be-6c20-4a22-bad7-c082c1ee76ca", - "metadata": {}, - "outputs": [], - "source": [ - "# To stop all services\n", - "!pkill fx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "809b8eb3-4775-43d9-8f96-de84a089a54e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "remove_configs(config_paths)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0b28b29-48d3-4f21-bc69-40259b83f93b", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/custom_adapter.py b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/custom_adapter.py deleted file mode 100644 index e7bb5b372a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/custom_adapter.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Custom model numpy adapter.""" - -from openfl.plugins.frameworks_adapters.framework_adapter_interface import ( - FrameworkAdapterPluginInterface, -) - - -class CustomFrameworkAdapter(FrameworkAdapterPluginInterface): - """Framework adapter plugin class.""" - - @staticmethod - def get_tensor_dict(model, optimizer=None): - """Extract tensors from a model.""" - return {'w': model.weights} - - @staticmethod - def set_tensor_dict(model, tensor_dict, optimizer=None, device='cpu'): - """Load tensors to a model.""" - model.weights = tensor_dict['w'] diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/requirements.txt deleted file mode 100644 index ce2479f563..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -jupyterlab -matplotlib -mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability -numpy -openfl==1.2.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/start_federation.ipynb b/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/start_federation.ipynb deleted file mode 100644 index 9b71200dd5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/numpy_linear_regression/workspace/start_federation.ipynb +++ /dev/null @@ -1,193 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "f813b6ae-b082-49bb-b64f-fd619b6de14a", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "import yaml\n", - "from typing import Dict, List, Union" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1ee62ab-09e4-4f4c-984f-bdb6909d6106", - "metadata": {}, - "outputs": [], - "source": [ - "# Read the original envoy config file content\n", - "with open(Path('../envoy/envoy_config.yaml'), \"r\") as stream:\n", - " orig_config = yaml.safe_load(stream)\n", - "\n", - "def generate_envoy_configs(config: Dict,\n", - " save_path: Union[str, Path] = '../envoy/',\n", - " n_cols: int = 10,\n", - " n_samples_per_col: int = 10,\n", - " noise: float = 0.15) -> List[Path]:\n", - "\n", - " config['shard_descriptor']['params']['n_samples'] = n_samples_per_col\n", - " config['shard_descriptor']['params']['noise'] = noise\n", - " \n", - " config_paths = [(Path(save_path) / f'{i}_envoy_config.yaml').absolute()\n", - " for i in range(1, n_cols + 1)]\n", - "\n", - " for i, path in enumerate(config_paths):\n", - " config['shard_descriptor']['params']['rank'] = i\n", - " with open(path, \"w\") as stream:\n", - " yaml.safe_dump(config, stream)\n", - " \n", - " return config_paths\n", - " \n", - "def remove_configs(config_paths):\n", - " for path in config_paths:\n", - " path.unlink()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d058340-22d4-4630-b8e3-9c3fc29198ab", - "metadata": {}, - "outputs": [], - "source": [ - "config_paths = generate_envoy_configs(orig_config, n_cols=20, n_samples_per_col=8, noise=0.3)\n", - "# remove_configs(config_paths)" - ] - }, - { - "cell_type": "markdown", - "id": "ec065be9-c2c6-4a81-9a2a-ea54794e52ba", - "metadata": {}, - "source": [ - "## Start the Director service" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "60bcaa49-aabb-42ec-a279-9e32b31ce6ca", - "metadata": {}, - "outputs": [], - "source": [ - "cwd = Path.cwd()\n", - "director_workspace_path = Path('../director/').absolute()\n", - "director_config_file = director_workspace_path / 'director_config.yaml'\n", - "director_logfile = director_workspace_path / 'director.log'\n", - "director_logfile.unlink(missing_ok=True)\n", - "# \n", - "\n", - "os.environ['main_folder'] = str(cwd)\n", - "os.environ['director_workspace_path'] = str(director_workspace_path)\n", - "os.environ['director_logfile'] = str(director_logfile)\n", - "os.environ['director_config_file'] = str(director_config_file)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72a9268a-ee1e-4dda-a4c4-cfb29428f45e", - "metadata": {}, - "outputs": [], - "source": [ - "%%script /bin/bash --bg\n", - "cd $director_workspace_path\n", - "fx director start --disable-tls -c $director_config_file > $director_logfile &\n", - "cd $main_folder" - ] - }, - { - "cell_type": "markdown", - "id": "e0a634ea-9c62-4048-bb91-099fe9097b55", - "metadata": {}, - "source": [ - "## Start Envoys" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13470bfd-d67e-48dc-b1ff-10c7ff526c0c", - "metadata": {}, - "outputs": [], - "source": [ - "# envoy_workspace_path = Path('../envoy/').absolute()\n", - "def start_envoys(config_paths: List[Path]) -> None:\n", - " envoy_workspace_path = config_paths[0].parent\n", - " cwd = Path.cwd()\n", - " os.chdir(envoy_workspace_path)\n", - " for i, path in enumerate(config_paths):\n", - " os.system(f'fx envoy start -n env_{i + 1} --disable-tls '\n", - " f'--envoy-config-path {path} -dh localhost -dp 50049 '\n", - " f'>env_{i + 1}.log &')\n", - " os.chdir(cwd)\n", - " \n", - "start_envoys(config_paths)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2fc8a569-6978-4c80-88d1-741799407239", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a5fdc3af-63b5-41b5-b9d6-be2aac8626e0", - "metadata": {}, - "outputs": [], - "source": [ - "# To stop all services run\n", - "!pkill fx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e69ae57-bfa3-4047-af7f-3e1cf24ac35e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "remove_configs(config_paths)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "46095127-f116-4ae3-a3b4-6be24064b49f", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/README.md b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/README.md deleted file mode 100644 index 2485306208..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Scikit-learn based Linear Regression Tutorial - -### 1. About dataset - -Generate 1-dimensional noisy data for linear regression of sinusoid. - -Define the below pamameter in shard_config in the envoy_config.yaml file as the random seed for the dataset generation for a specific Envoy -- rank - -### 2. About model - -Linear Regression Lasso Model based on Scikit-learn. - - -### 3. How to run this tutorial (without TLC and locally as a simulation): - -1. Run director: - -```sh -cd director folder -./start_director.sh -``` - -2. Run envoy: - -Step 1: Activate virtual environment and install packages -``` -cd envoy folder -pip install -r requirements.txt -``` -Step 2: start the envoy -```sh -./start_envoy.sh env_instance_1 envoy_config.yaml -``` - -Optional: start second envoy: - -- Copy `envoy_folder` to another place and follow the same process as above: - -```sh -./start_envoy.sh env_instance_2 envoy_config_2.yaml -``` - -3. Run `scikit_learn_linear_regression.ipynb` jupyter notebook: - -```sh -cd workspace -jupyter lab scikit_learn_linear_regression.ipynb -``` - -4. Visualization - -``` -tensorboard --logdir logs/ -``` diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/director/director_config.yaml b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/director/director_config.yaml deleted file mode 100644 index d22b4b7766..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/director/director_config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -settings: - listen_host: localhost - listen_port: 50050 - sample_shape: ['1'] # Modify this param if experimenting with `n_features` of shard_descriptor. - target_shape: ['1'] - envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/director/start_director.sh b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/director/start_director.sh deleted file mode 100755 index 5806a6cc0a..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/envoy_config.yaml b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/envoy_config.yaml deleted file mode 100644 index 107c566945..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/envoy_config.yaml +++ /dev/null @@ -1,12 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: linreg_shard_descriptor.LinRegSD - params: - rank: 1 - n_samples: 80 - noise: 0.15 - \ No newline at end of file diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/linreg_shard_descriptor.py b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/linreg_shard_descriptor.py deleted file mode 100644 index 12bfed9df4..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/linreg_shard_descriptor.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Noisy-Sin Shard Descriptor.""" - -from typing import List - -import numpy as np - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class LinRegSD(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, rank: int, n_samples: int = 10, noise: float = 0.15) -> None: - """ - Initialize LinReg Shard Descriptor. - - This Shard Descriptor generate random data. Sample features are - floats between pi/3 and 5*pi/3, and targets are calculated - calculated as sin(feature) + normal_noise. - """ - np.random.seed(rank) # Setting seed for reproducibility - self.n_samples = max(n_samples, 5) - self.interval = 240 - self.x_start = 60 - x = np.random.rand(n_samples, 1) * self.interval + self.x_start - x *= np.pi / 180 - y = np.sin(x) + np.random.normal(0, noise, size=(n_samples, 1)) - self.data = np.concatenate((x, y), axis=1) - - def get_dataset(self, dataset_type: str) -> np.ndarray: - """ - Return a shard dataset by type. - - A simple list with elements (x, y) implemets the Shard Dataset interface. - """ - if dataset_type == 'train': - return self.data[:self.n_samples // 2] - elif dataset_type == 'val': - return self.data[self.n_samples // 2:] - else: - pass - - @property - def sample_shape(self) -> List[str]: - """Return the sample shape info.""" - (*x, _) = self.data[0] - return [str(i) for i in np.array(x, ndmin=1).shape] - - @property - def target_shape(self) -> List[str]: - """Return the target shape info.""" - (*_, y) = self.data[0] - return [str(i) for i in np.array(y, ndmin=1).shape] - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return 'Allowed dataset types are `train` and `val`' diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/requirements.txt b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/requirements.txt deleted file mode 100644 index 40d18d92d5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -matplotlib>=2.0.0 -mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability -numpy>=1.13.3 -openfl>=1.2.1 -scikit-learn>=0.24.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/start_envoy.sh b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/start_envoy.sh deleted file mode 100755 index 4da07821af..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/envoy/start_envoy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -ENVOY_NAME=$1 -ENVOY_CONF=$2 - -fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/custom_adapter.py b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/custom_adapter.py deleted file mode 100644 index 6991d2b0ff..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/custom_adapter.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Custom model numpy adapter.""" - -from openfl.plugins.frameworks_adapters.framework_adapter_interface import ( - FrameworkAdapterPluginInterface, -) - - -class CustomFrameworkAdapter(FrameworkAdapterPluginInterface): - """Framework adapter plugin class.""" - - @staticmethod - def get_tensor_dict(model, optimizer=None): - """Extract tensors from a model.""" - return {'w': model.weights} - - @staticmethod - def set_tensor_dict(model, tensor_dict, optimizer=None, device='cpu'): - """Load tensors to a model.""" - model.weights = tensor_dict['w'] diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/requirements.txt b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/requirements.txt deleted file mode 100644 index 40d18d92d5..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -matplotlib>=2.0.0 -mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability -numpy>=1.13.3 -openfl>=1.2.1 -scikit-learn>=0.24.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/scikit_learn_linear_regression.ipynb b/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/scikit_learn_linear_regression.ipynb deleted file mode 100644 index 4ef7023cc9..0000000000 --- a/openfl-tutorials/deprecated/interactive_api/scikit_learn_linear_regression/workspace/scikit_learn_linear_regression.ipynb +++ /dev/null @@ -1,420 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "689ee822", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "id": "d63e64c6-9955-4afc-8d04-d8c85bb28edc", - "metadata": {}, - "source": [ - "# Scikit-learn Linear Regression Example - Interactive API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c9eee14-22a1-4d48-a7da-e68d01037cd4", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from typing import List, Union\n", - "\n", - "from sklearn.linear_model import Lasso\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.datasets import make_regression\n", - "from sklearn.metrics import mean_squared_error\n", - "import numpy as np\n", - "import random\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "from matplotlib.pylab import rcParams\n", - "rcParams['figure.figsize'] = 7, 5" - ] - }, - { - "cell_type": "markdown", - "id": "c4b334ef-6a72-4b82-b978-1401973d0512", - "metadata": { - "tags": [] - }, - "source": [ - "# We will use MSE as loss function and Ridge weights regularization\n", - "![image.png](https://www.analyticsvidhya.com/wp-content/uploads/2016/01/eq5-1.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4cc8ec2-b818-4db8-8700-39c1a12917df", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "class SklearnLinearRegressionLasso:\n", - " def __init__(self, n_feat: int, alpha: float = 1.0) -> None:\n", - " self.model = Lasso(alpha=alpha)\n", - " self.scaler = StandardScaler()\n", - " self.weights = np.ones((n_feat + 1)) \n", - " \n", - " def predict(self, feature_vector: Union[np.ndarray, List[int]]) -> float:\n", - " '''\n", - " feature_vector may be a list or have shape (n_feat,)\n", - " or it may be a bunch of vectors (n_vec, nfeat)\n", - " '''\n", - " feature_vector = np.array(feature_vector)\n", - " if len(feature_vector.shape) == 1:\n", - " feature_vector = feature_vector[:,np.newaxis]\n", - " \n", - " feature_vector = self.scaler.transform(feature_vector)\n", - " return self.model.predict(feature_vector)\n", - " \n", - " def mse(self, X: np.ndarray, Y: np.ndarray) -> float:\n", - " Y_predict = self.predict(X)\n", - " return mean_squared_error(Y, Y_predict)\n", - " \n", - " def fit(self, X: np.ndarray, Y: np.ndarray, silent: bool=False) -> None:\n", - " \n", - " X = self.scaler.fit_transform(X)\n", - " self.model.fit(X, Y)\n", - " mse = self.mse(X, Y)\n", - " #self.weights[:-1] = self.model.coef_\n", - " #self.weights[-1] = self.model.intercept_\n", - " if not silent:\n", - " print(f'MSE: {mse}')\n", - " \n", - " def print_parameters(self) -> None:\n", - " print('Final parameters: ')\n", - " print(f'Weights: {self.model.coef_}')\n", - " print(f'Bias: {self.model.intercept_}')\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af89e7e5-6cfc-46bc-acd2-7d5bfb373091", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Define input array with angles from 60deg to 300deg converted to radians\n", - "x = np.array([i*np.pi/180 for i in range(60,300,4)])\n", - "np.random.seed(10) # Setting seed for reproducibility\n", - "y = np.sin(x) + np.random.normal(0,0.15,len(x))\n", - "# plt.plot(x,y,'.')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ffefca2b-d7f6-4111-8872-c017c182a2de", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "lr_model = SklearnLinearRegressionLasso(n_feat=1, alpha=0.1)\n", - "\n", - "lr_model.fit(x[:,np.newaxis], y)\n", - "\n", - "#print the final parameters\n", - "lr_model.print_parameters()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "410f2d80-989a-43ab-958f-7b68fd8f2e90", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# We can also solve this 1D problem using Numpy\n", - "numpy_solution = np.polyfit(x,y,1)\n", - "predictor_np = np.poly1d(numpy_solution)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6cb323db-9f3a-42af-94da-4b170adef867", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "y_hat = lr_model.predict(x)\n", - "y_np = predictor_np(x)\n", - "plt.plot(x,y,'.')\n", - "plt.plot(x,y_hat,'.')\n", - "plt.plot(x,y_np,'--')" - ] - }, - { - "cell_type": "markdown", - "id": "ffd4d2d7-5537-496a-88c1-301da87d979c", - "metadata": {}, - "source": [ - "# Now we run the same training on federated data" - ] - }, - { - "cell_type": "markdown", - "id": "09cf7090-da51-4f4e-9d28-2a5c6e3bca02", - "metadata": {}, - "source": [ - "## Connect to a Federation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b3c0039-e1f7-4047-b98b-a2d4bd42f015", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Create a federation\n", - "from openfl.interface.interactive_api.federation import Federation\n", - "\n", - "# please use the same identificator that was used in signed certificate\n", - "client_id = 'frontend'\n", - "director_node_fqdn = 'localhost'\n", - "director_port = 50050\n", - "\n", - "federation = Federation(\n", - " client_id=client_id,\n", - " director_node_fqdn=director_node_fqdn,\n", - " director_port=director_port,\n", - " tls=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7815120e-b704-4a7d-a65a-3c7542023ead", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "shard_registry = federation.get_shard_registry()\n", - "shard_registry" - ] - }, - { - "cell_type": "markdown", - "id": "b011dd95-64a7-4a8b-91ec-e61cdf885bbb", - "metadata": {}, - "source": [ - "### Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1985ac9-a2b1-4561-a962-6adfe35c3b97", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment\n", - "\n", - "class LinRegDataSet(DataInterface):\n", - " def __init__(self, **kwargs):\n", - " \"\"\"Initialize DataLoader.\"\"\"\n", - " self.kwargs = kwargs\n", - " pass\n", - "\n", - " @property\n", - " def shard_descriptor(self):\n", - " \"\"\"Return shard descriptor.\"\"\"\n", - " return self._shard_descriptor\n", - " \n", - " @shard_descriptor.setter\n", - " def shard_descriptor(self, shard_descriptor):\n", - " \"\"\"\n", - " Describe per-collaborator procedures or sharding.\n", - "\n", - " This method will be called during a collaborator initialization.\n", - " Local shard_descriptor will be set by Envoy.\n", - " \"\"\"\n", - " self._shard_descriptor = shard_descriptor\n", - " self.train_set = shard_descriptor.get_dataset(\"train\")\n", - " self.val_set = shard_descriptor.get_dataset(\"val\")\n", - "\n", - " def get_train_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks with optimizer in contract.\"\"\"\n", - " return self.train_set\n", - "\n", - " def get_valid_loader(self, **kwargs):\n", - " \"\"\"Output of this method will be provided to tasks without optimizer in contract.\"\"\"\n", - " return self.val_set\n", - "\n", - " def get_train_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.train_set)\n", - "\n", - " def get_valid_data_size(self):\n", - " \"\"\"Information for aggregation.\"\"\"\n", - " return len(self.val_set)\n", - " \n", - "lin_reg_dataset = LinRegDataSet()" - ] - }, - { - "cell_type": "markdown", - "id": "b8909127-99d1-4dba-86fe-01a1b86585e7", - "metadata": {}, - "source": [ - "### Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9523c9a2-a259-461f-937f-1fb054bd2886", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "framework_adapter = 'custom_adapter.CustomFrameworkAdapter'\n", - "fed_model = SklearnLinearRegressionLasso(n_feat=1, alpha=0.1)\n", - "MI = ModelInterface(model=fed_model, optimizer=None, framework_plugin=framework_adapter)\n", - "\n", - "# Save the initial model state\n", - "initial_model = SklearnLinearRegressionLasso(n_feat=1, alpha=0.1)" - ] - }, - { - "cell_type": "markdown", - "id": "2e3558bb-b21b-48ac-b07e-43cf75e6907b", - "metadata": {}, - "source": [ - "### Tasks\n", - "We need to employ a trick reporting metrics. OpenFL decides which model is the best based on an *increasing* metric." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f73e1ff9-d54a-49b5-9ce8-8bc72c6a2c6f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "TI = TaskInterface()\n", - "\n", - "@TI.register_fl_task(model='my_model', data_loader='train_data', \\\n", - " device='device', optimizer='optimizer') \n", - "def train(my_model, train_data, optimizer, device):\n", - " X, Y = train_data[:,:-1], train_data[:,-1]\n", - " my_model.fit(X, Y, silent=True)\n", - " return {'train_MSE': my_model.mse(X, Y),}\n", - "\n", - "@TI.register_fl_task(model='my_model', data_loader='val_data', device='device') \n", - "def validate(my_model, val_data, device):\n", - " X, Y = val_data[:,:-1], val_data[:,-1] \n", - " return {'validation_MSE': my_model.mse(X, Y),}" - ] - }, - { - "cell_type": "markdown", - "id": "ee7659cc-6e03-43f5-9078-95707fa0e4d5", - "metadata": { - "tags": [] - }, - "source": [ - "### Run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "749100e8-05ce-418c-a980-545e3beb900b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "experiment_name = 'linear_regression_experiment'\n", - "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16bf1df7-8ca8-4a5e-a833-47c265c11e05", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "fl_experiment.start(model_provider=MI, \n", - " task_keeper=TI,\n", - " data_loader=lin_reg_dataset,\n", - " rounds_to_train=10,)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1178d1ea-05e6-46be-ac07-21620bd6ec76", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "fl_experiment.stream_metrics()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c04b4ab2-1d40-44c7-907b-a6a7d176c959", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfl-workspace/default/director.yaml b/openfl-workspace/default/director.yaml deleted file mode 100644 index 04c6109c72..0000000000 --- a/openfl-workspace/default/director.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Director's config. -# Parameters: -# 1. sample_shape - sample shape interface unified across the Federation -# 2. target_shape - target shape interface unified across the Federation - -settings: - sample_shape: [] - target_shape: [] - listen_host: localhost # listen FQDN or ip - listen_port: 50051 # listen port - envoy_health_check_period: 60 # in seconds - install_requirements: false - review_experiment: false diff --git a/openfl-workspace/default/envoy_config.yaml b/openfl-workspace/default/envoy_config.yaml deleted file mode 100644 index 1c3e92f099..0000000000 --- a/openfl-workspace/default/envoy_config.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# SETUP ENVOY PARAMETERS - -# cuda_devices - field allows you to put indeces of cuda devices you want to allow using -# in Federated experiments - -# SETUP SHARD DESCRIPTOR -# To start envoy implement LocalShardDescriptor class in shard_descriptor module. -# Alternatively, point to an implemented Shard Descriptor in 'template' field. - -# Put in 'params' group any argument needed to initialize choosen Shard Descriptor. -# Parameters are optional. For clarity, this example config contains three parameters: -# 1. data_path - string with data location on disk -# 2. sample_shape - shape of sample's numpy representaion that will by return from __getitem__ -# 3. target_shape - shape of target's numpy representaion that will by return from __getitem__ - - -params: - install_requirements: true - cuda_devices: [] - review_experiment: False - -optional_plugin_components: - cuda_device_monitor: - template: openfl.plugins.processing_units_monitor.pynvml_monitor.PynvmlCUDADeviceMonitor - settings: [] - -shard_descriptor: - template: shard_descriptor.LocalShardDescriptor - params: - data_path: data - sample_shape: [] - target_shape: [] \ No newline at end of file diff --git a/openfl-workspace/default/plan/cols.yaml b/openfl-workspace/default/plan/cols.yaml deleted file mode 100644 index 95307de3bc..0000000000 --- a/openfl-workspace/default/plan/cols.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -collaborators: - \ No newline at end of file diff --git a/openfl-workspace/default/plan/data.yaml b/openfl-workspace/default/plan/data.yaml deleted file mode 100644 index 9825b4ce09..0000000000 --- a/openfl-workspace/default/plan/data.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -# collaborator_name,data_directory_path -one,1 - diff --git a/openfl-workspace/default/plan/plan.yaml b/openfl-workspace/default/plan/plan.yaml deleted file mode 100644 index d122719909..0000000000 --- a/openfl-workspace/default/plan/plan.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -aggregator : - defaults : plan/defaults/aggregator.yaml - template : openfl.component.Aggregator - settings : - init_state_path : save/init.pbuf - best_state_path : save/best.pbuf - last_state_path : save/last.pbuf - rounds_to_train : 10 - -collaborator : - defaults : plan/defaults/collaborator.yaml - template : openfl.component.Collaborator - settings : - delta_updates : false - opt_treatment : RESET - -data_loader : - defaults : plan/defaults/data_loader.yaml - -task_runner : - defaults : plan/defaults/task_runner.yaml - -network : - defaults : plan/defaults/network.yaml - -assigner : - defaults : plan/defaults/assigner.yaml - -tasks : - defaults : plan/defaults/tasks_fast_estimator.yaml - -compression_pipeline : - defaults : plan/defaults/compression_pipeline.yaml diff --git a/openfl-workspace/default/requirements.txt b/openfl-workspace/default/requirements.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/openfl-workspace/default/shard_descriptor.py b/openfl-workspace/default/shard_descriptor.py deleted file mode 100644 index 08db460737..0000000000 --- a/openfl-workspace/default/shard_descriptor.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -""" -Shard Descriptor template. - -It is recommended to perform tensor manipulations using numpy. -""" - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class LocalShardDescriptor(ShardDescriptor): - """Shard descriptor subclass.""" - - def __init__(self, data_path: str, sample_shape: tuple, target_shape: tuple) -> None: - """ - Initialize local Shard Descriptor. - - Parameters are arbitrary, set up the ShardDescriptor-related part - of the envoy_config.yaml as you need. - """ - super().__init__() diff --git a/openfl/component/director/__init__.py b/openfl/component/director/__init__.py deleted file mode 100644 index fa55e39e8b..0000000000 --- a/openfl/component/director/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -from openfl.component.director.director import Director diff --git a/openfl/component/director/director.py b/openfl/component/director/director.py deleted file mode 100644 index 79ac85d961..0000000000 --- a/openfl/component/director/director.py +++ /dev/null @@ -1,531 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Director module.""" - -import asyncio -import logging -import time -from collections import defaultdict -from pathlib import Path -from typing import Callable, Iterable, List, Union - -from openfl.component.director.experiment import Experiment, ExperimentsRegistry, Status -from openfl.transport.grpc.exceptions import ShardNotFoundError - -logger = logging.getLogger(__name__) - - -class Director: - """Director class. The Director is the central node of the federation - (Director-Based Workflow). - - Attributes: - tls (bool): A flag indicating if TLS should be used for connections. - root_certificate (Union[Path, str]): The path to the root certificate - for TLS. - private_key (Union[Path, str]): The path to the private key for TLS. - certificate (Union[Path, str]): The path to the certificate for TLS. - sample_shape (list): The shape of the sample data. - target_shape (list): The shape of the target data. - review_plan_callback (Union[None, Callable]): A callback function for - reviewing the plan. - envoy_health_check_period (int): The period for health check of envoys - in seconds. - install_requirements (bool): A flag indicating if the requirements - should be installed. - _shard_registry (dict): A dictionary to store the shard registry. - experiments_registry (ExperimentsRegistry): An object of - ExperimentsRegistry to store the experiments. - col_exp_queues (defaultdict): A defaultdict to store the experiment - queues for collaborators. - col_exp (dict): A dictionary to store the experiments for - collaborators. - logger (Logger): A logger for logging activities. - """ - - def __init__( - self, - *, - tls: bool = True, - root_certificate: Union[Path, str] = None, - private_key: Union[Path, str] = None, - certificate: Union[Path, str] = None, - sample_shape: list = None, - target_shape: list = None, - review_plan_callback: Union[None, Callable] = None, - envoy_health_check_period: int = 60, - install_requirements: bool = False, - ) -> None: - """Initialize the Director object. - - Args: - tls (bool, optional): A flag indicating if TLS should be used for - connections. Defaults to True. - root_certificate (Union[Path, str], optional): The path to the - root certificate for TLS. Defaults to None. - private_key (Union[Path, str], optional): The path to the private - key for TLS. Defaults to None. - certificate (Union[Path, str], optional): The path to the - certificate for TLS. Defaults to None. - sample_shape (list, optional): The shape of the sample data. - Defaults to None. - target_shape (list, optional): The shape of the target data. - Defaults to None. - review_plan_callback (Union[None, Callable], optional): A callback - function for reviewing the plan. Defaults to None. - envoy_health_check_period (int, optional): The period for health - check of envoys in seconds. Defaults to 60. - install_requirements (bool, optional): A flag indicating if the - requirements should be installed. Defaults to False. - """ - self.sample_shape, self.target_shape = sample_shape, target_shape - self._shard_registry = {} - self.tls = tls - self.root_certificate = root_certificate - self.private_key = private_key - self.certificate = certificate - self.experiments_registry = ExperimentsRegistry() - self.col_exp_queues = defaultdict(asyncio.Queue) - self.col_exp = {} - self.review_plan_callback = review_plan_callback - self.envoy_health_check_period = envoy_health_check_period - self.install_requirements = install_requirements - - def acknowledge_shard(self, shard_info: dict) -> bool: - """Save shard info to shard registry if it's acceptable. - - Args: - shard_info (dict): The shard info dictionary should be able to - store registries. - - Returns: - is_accepted (bool): Bool value to accept o deny the addition of - the shard info. - """ - is_accepted = False - if ( - self.sample_shape != shard_info["sample_shape"] - or self.target_shape != shard_info["target_shape"] - ): - logger.info( - "Director did not accept shard for %s", - shard_info["node_info"]["name"], - ) - return is_accepted - logger.info("Director accepted shard for %s", shard_info["node_info"]["name"]) - self._shard_registry[shard_info["node_info"]["name"]] = { - "shard_info": shard_info, - "is_online": True, - "is_experiment_running": False, - "valid_duration": 2 * self.envoy_health_check_period, - "last_updated": time.time(), - } - is_accepted = True - return is_accepted - - async def set_new_experiment( - self, - *, - experiment_name: str, - sender_name: str, - tensor_dict: dict, - collaborator_names: Iterable[str], - experiment_archive_path: Path, - ) -> bool: - """Set new experiment. - - Args: - experiment_name (str): String id for experiment. - sender_name (str): The name of the sender. - tensor_dict (dict): Dictionary of tensors. - collaborator_names (Iterable[str]): Names of collaborators. - experiment_archive_path (Path): Path of the experiment. - - Returns: - bool : Boolean returned if the experiment register was successful. - """ - experiment = Experiment( - name=experiment_name, - archive_path=experiment_archive_path, - collaborators=list(collaborator_names), - users=[sender_name], - sender=sender_name, - init_tensor_dict=tensor_dict, - ) - self.experiments_registry.add(experiment) - return True - - async def get_experiment_status(self, experiment_name: str, caller: str): - """Get experiment status. - - Args: - experiment_name (str): String id for experiment. - caller (str): String id for experiment owner. - - Returns: - str: The status of the experiment can be one of the following: - - PENDING = 'pending' - - FINISHED = 'finished' - - IN_PROGRESS = 'in_progress' - - FAILED = 'failed' - - REJECTED = 'rejected' - """ - if ( - experiment_name not in self.experiments_registry - or caller not in self.experiments_registry[experiment_name].users - ): - logger.error("No experiment data in the stash") - return None - return self.experiments_registry[experiment_name].status - - def get_trained_model(self, experiment_name: str, caller: str, model_type: str): - """Get trained model. - - Args: - experiment_name (str): String id for experiment. - caller (str): String id for experiment owner. - model_type (str): The type of the model. - - Returns: - None: One of the following: [No experiment data in the stash] or - [Aggregator have no aggregated model to return] or [Unknown - model type required]. - dict: Dictionary of tensors from the aggregator when the model - type is 'best' or 'last'. - """ - if ( - experiment_name not in self.experiments_registry - or caller not in self.experiments_registry[experiment_name].users - ): - logger.error("No experiment data in the stash") - return None - - aggregator = self.experiments_registry[experiment_name].aggregator - - if aggregator.last_tensor_dict is None: - logger.error("Aggregator has no aggregated model to return") - return None - - if model_type == "best": - return aggregator.best_tensor_dict - elif model_type == "last": - return aggregator.last_tensor_dict - else: - logger.error("Unknown model type required.") - return None - - def get_experiment_data(self, experiment_name: str) -> Path: - """Get experiment data. - - Args: - experiment_name (str): String id for experiment. - - Returns: - str: Path of archive. - """ - return self.experiments_registry[experiment_name].archive_path - - async def wait_experiment(self, envoy_name: str) -> str: - """Wait an experiment. - - Args: - envoy_name (str): The name of the envoy. - - Returns: - str: The name of the experiment on the queue. - """ - experiment_name = self.col_exp.get(envoy_name) - if experiment_name and experiment_name in self.experiments_registry: - # Experiment already set, but the envoy hasn't received experiment - # name (e.g. was disconnected) - experiment = self.experiments_registry[experiment_name] - if experiment.aggregator.round_number < experiment.aggregator.rounds_to_train: - return experiment_name - - self.col_exp[envoy_name] = None - queue = self.col_exp_queues[envoy_name] - experiment_name = await queue.get() - self.col_exp[envoy_name] = experiment_name - - return experiment_name - - def get_dataset_info(self): - """Get dataset info.""" - return self.sample_shape, self.target_shape - - def get_registered_shards(self) -> list: # Why is it here? - """Get registered shard infos.""" - return [shard_status["shard_info"] for shard_status in self._shard_registry.values()] - - async def stream_metrics(self, experiment_name: str, caller: str): - """Stream metrics from the aggregator. - - This method takes next metric dictionary from the aggregator's queue - and returns it to the caller. - - Args: - experiment_name (str): String id for experiment. - caller (str): String id for experiment owner. - - Returns: - metric_dict: {'metric_origin','task_name','metric_name','metric_value','round'} - if the queue is not empty. - None: queue is empty but the experiment is still running. - - Raises: - StopIteration: if the experiment is finished and there is no more metrics to report. - """ - if ( - experiment_name not in self.experiments_registry - or caller not in self.experiments_registry[experiment_name].users - ): - raise Exception( - f'No experiment name "{experiment_name}" in experiments list, or caller "{caller}"' - f" does not have access to this experiment" - ) - - while not self.experiments_registry[experiment_name].aggregator: - await asyncio.sleep(1) - aggregator = self.experiments_registry[experiment_name].aggregator - - while True: - if not aggregator.metric_queue.empty(): - yield aggregator.metric_queue.get() - continue - - if aggregator.all_quit_jobs_sent() and aggregator.metric_queue.empty(): - return - - yield None - - def remove_experiment_data(self, experiment_name: str, caller: str): - """Remove experiment data from stash. - - Args: - experiment_name (str): String id for experiment. - caller (str): String id for experiment owner. - """ - if ( - experiment_name in self.experiments_registry - and caller in self.experiments_registry[experiment_name].users - ): - self.experiments_registry.remove(experiment_name) - - def set_experiment_failed(self, *, experiment_name: str, collaborator_name: str): - """Envoys Set experiment failed RPC. - - Args: - experiment_name (str): String id for experiment. - collaborator_name (str): String id for collaborator. - - Return: - None - """ - """This method shoud call `experiment.abort()` and all the code should - be pushed down to that method. - - It would be also good to be able to interrupt aggregator async task - with the following code: - ``` - run_aggregator_atask = self.experiments_registry[experiment_name].run_aggregator_atask - if asyncio.isfuture(run_aggregator_atask) and not run_aggregator_atask.done(): - run_aggregator_atask.cancel() - ``` - unfortunately, cancelling does not work on already awaited future. - """ - - if experiment_name not in self.experiments_registry: - return - aggregator = self.experiments_registry[experiment_name].aggregator - aggregator.stop(failed_collaborator=collaborator_name) - self.experiments_registry[experiment_name].status = Status.FAILED - - def update_envoy_status( - self, - *, - envoy_name: str, - is_experiment_running: bool, - cuda_devices_status: list = None, - ) -> int: - """Accept health check from envoy. - - Args: - envoy_name (str): String id for envoy. - is_experiment_running (bool): Boolean value for the status of the - experiment. - cuda_devices_status (list, optional): List of cuda devices and - status. Defaults to None. - - Raises: - ShardNotFoundError: When Unknown shard {envoy_name}. - - Returns: - int: Value of the envoy_health_check_period. - """ - shard_info = self._shard_registry.get(envoy_name) - if not shard_info: - raise ShardNotFoundError(f"Unknown shard {envoy_name}") - - shard_info["is_online"] = True - shard_info["is_experiment_running"] = is_experiment_running - shard_info["valid_duration"] = 2 * self.envoy_health_check_period - shard_info["last_updated"] = time.time() - - if cuda_devices_status is not None: - for i in range(len(cuda_devices_status)): - shard_info["shard_info"]["node_info"]["cuda_devices"][i] = cuda_devices_status[i] - - return self.envoy_health_check_period - - def get_envoys(self) -> list: - """Get a status information about envoys. - - Returns: - list: List with the status information about envoys. - """ - logger.debug("Shard registry: %s", self._shard_registry) - for envoy_info in self._shard_registry.values(): - envoy_info["is_online"] = time.time() < envoy_info.get( - "last_updated", 0 - ) + envoy_info.get("valid_duration", 0) - envoy_name = envoy_info["shard_info"]["node_info"]["name"] - envoy_info["experiment_name"] = self.col_exp[envoy_name] - - return self._shard_registry.values() - - def get_experiments_list(self, caller: str) -> list: - """Get experiments list for specific user. - - Args: - caller (str): String id for experiment owner. - Returns: - list: List with the info of the experiment for specific user. - """ - experiments = self.experiments_registry.get_user_experiments(caller) - result = [] - for exp in experiments: - exp_data = { - "name": exp.name, - "status": exp.status, - "collaborators_amount": len(exp.collaborators), - } - progress = _get_experiment_progress(exp) - if progress is not None: - exp_data["progress"] = progress - if exp.aggregator: - tasks_amount = len( - {task["function"] for task in exp.aggregator.assigner.tasks.values()} - ) - exp_data["tasks_amount"] = tasks_amount - result.append(exp_data) - - return result - - def get_experiment_description(self, caller: str, name: str) -> dict: - """Get a experiment information by name for specific user. - - Args: - caller (str): String id for experiment owner. - name (str): String id for experiment name. - - Returns: - dict: Dictionary with the info from the experiment. - """ - exp = self.experiments_registry.get(name) - if not exp or caller not in exp.users: - return {} - progress = _get_experiment_progress(exp) - model_statuses = _get_model_download_statuses(exp) - tasks = _get_experiment_tasks(exp) - collaborators = _get_experiment_collaborators(exp) - result = { - "name": name, - "status": exp.status, - "current_round": exp.aggregator.round_number, - "total_rounds": exp.aggregator.rounds_to_train, - "download_statuses": { - "models": model_statuses, - "logs": [{"name": "aggregator", "status": "ready"}], - }, - "collaborators": collaborators, - "tasks": tasks, - "progress": progress, - } - return result - - async def start_experiment_execution_loop(self): - """Run task to monitor and run experiments.""" - loop = asyncio.get_event_loop() - while True: - async with self.experiments_registry.get_next_experiment() as experiment: - # Review experiment block starts. - if self.review_plan_callback: - if not await experiment.review_experiment(self.review_plan_callback): - logger.info( - f'"{experiment.name}" Plan was rejected by the Director manager.' - ) - continue - # Review experiment block ends. - - run_aggregator_future = loop.create_task( - experiment.start( - root_certificate=self.root_certificate, - certificate=self.certificate, - private_key=self.private_key, - tls=self.tls, - install_requirements=self.install_requirements, - ) - ) - # Adding the experiment to collaborators queues - for col_name in experiment.collaborators: - queue = self.col_exp_queues[col_name] - await queue.put(experiment.name) - await run_aggregator_future - - -def _get_model_download_statuses(experiment) -> List[dict]: - best_model_status = "ready" if experiment.aggregator.best_tensor_dict else "pending" - last_model_status = "ready" if experiment.aggregator.last_tensor_dict else "pending" - model_statuses = [ - { - "name": "best", - "status": best_model_status, - }, - { - "name": "last", - "status": last_model_status, - }, - {"name": "init", "status": "ready"}, - ] - return model_statuses - - -def _get_experiment_progress(experiment) -> Union[float, None]: - if experiment.status == Status.IN_PROGRESS: - return experiment.aggregator.round_number / experiment.aggregator.rounds_to_train - - -def _get_experiment_tasks(experiment) -> List[dict]: - return [ - { - "name": task["function"], - "description": "Task description Mock", - } - for task in experiment.aggregator.assigner.tasks.values() - ] - - -def _get_experiment_collaborators(experiment) -> List[dict]: - return [ - { - "name": name, - "status": "pending_mock", - "progress": 0.0, - "round": 0, - "current_task": "Current Task Mock", - "next_task": "Next Task Mock", - } - for name in experiment.aggregator.authorized_cols - ] diff --git a/openfl/component/director/experiment.py b/openfl/component/director/experiment.py deleted file mode 100644 index 2e40f43247..0000000000 --- a/openfl/component/director/experiment.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Experiment module.""" - -import asyncio -import logging -from contextlib import asynccontextmanager -from pathlib import Path -from typing import Callable, Iterable, List, Union - -from openfl.federated import Plan -from openfl.transport import AggregatorGRPCServer -from openfl.utilities.workspace import ExperimentWorkspace - -logger = logging.getLogger(__name__) - - -class Status: - """Experiment's statuses.""" - - PENDING = "pending" - FINISHED = "finished" - IN_PROGRESS = "in_progress" - FAILED = "failed" - REJECTED = "rejected" - - -class Experiment: - """Experiment class. - - Attributes: - name (str): The name of the experiment. - archive_path (Union[Path, str]): The path to the experiment - archive. - collaborators (List[str]): The list of collaborators. - sender (str): The name of the sender. - init_tensor_dict (dict): The initial tensor dictionary. - plan_path (Union[Path, str]): The path to the plan. - users (Iterable[str]): The list of users. - status (str): The status of the experiment. - aggregator (object): The aggregator object. - run_aggregator_atask (object): The run aggregator async task - object. - """ - - def __init__( - self, - *, - name: str, - archive_path: Union[Path, str], - collaborators: List[str], - sender: str, - init_tensor_dict: dict, - plan_path: Union[Path, str] = "plan/plan.yaml", - users: Iterable[str] = None, - ) -> None: - """Initialize an experiment object. - - Args: - name (str): The name of the experiment. - archive_path (Union[Path, str]): The path to the experiment - archive. - collaborators (List[str]): The list of collaborators. - sender (str): The name of the sender. - init_tensor_dict (dict): The initial tensor dictionary. - plan_path (Union[Path, str], optional): The path to the plan. - Defaults to 'plan/plan.yaml'. - users (Iterable[str], optional): The list of users. Defaults to - None. - """ - self.name = name - self.archive_path = Path(archive_path).absolute() - self.collaborators = collaborators - self.sender = sender - self.init_tensor_dict = init_tensor_dict - self.plan_path = Path(plan_path) - self.users = set() if users is None else set(users) - self.status = Status.PENDING - self.aggregator = None - self.run_aggregator_atask = None - - async def start( - self, - *, - tls: bool = True, - root_certificate: Union[Path, str] = None, - private_key: Union[Path, str] = None, - certificate: Union[Path, str] = None, - install_requirements: bool = False, - ): - """Run experiment. - - Args: - tls (bool, optional): A flag indicating if TLS should be used for - connections. Defaults to True. - root_certificate (Union[Path, str], optional): The path to the - root certificate for TLS. Defaults to None. - private_key (Union[Path, str], optional): The path to the private - key for TLS. Defaults to None. - certificate (Union[Path, str], optional): The path to the - certificate for TLS. Defaults to None. - install_requirements (bool, optional): A flag indicating if the - requirements should be installed. Defaults to False. - """ - self.status = Status.IN_PROGRESS - try: - logger.info(f"New experiment {self.name} for collaborators {self.collaborators}") - - with ExperimentWorkspace( - experiment_name=self.name, - data_file_path=self.archive_path, - install_requirements=install_requirements, - ): - aggregator_grpc_server = self._create_aggregator_grpc_server( - tls=tls, - root_certificate=root_certificate, - private_key=private_key, - certificate=certificate, - ) - self.aggregator = aggregator_grpc_server.aggregator - - self.run_aggregator_atask = asyncio.create_task( - self._run_aggregator_grpc_server( - aggregator_grpc_server=aggregator_grpc_server, - ) - ) - await self.run_aggregator_atask - self.status = Status.FINISHED - logger.info("Experiment %s was finished successfully.", self.name) - except Exception as e: - self.status = Status.FAILED - logger.exception("Experiment %s failed with error: %s.", self.name, e) - - async def review_experiment(self, review_plan_callback: Callable) -> bool: - """Get plan approve in console. - - Args: - review_plan_callback (Callable): A callback function for reviewing the plan. - - Returns: - bool: True if the plan was approved, False otherwise. - """ - logger.debug("Experiment Review starts") - # Extract the workspace for review (without installing requirements) - with ExperimentWorkspace( - self.name, - self.archive_path, - is_install_requirements=False, - remove_archive=False, - ): - loop = asyncio.get_event_loop() - # Call for a review in a separate thread (server is not blocked) - review_approve = await loop.run_in_executor( - None, review_plan_callback, self.name, self.plan_path - ) - if not review_approve: - self.status = Status.REJECTED - self.archive_path.unlink(missing_ok=True) - return False - - logger.debug("Experiment Review succeeded") - return True - - def _create_aggregator_grpc_server( - self, - *, - tls: bool = True, - root_certificate: Union[Path, str] = None, - private_key: Union[Path, str] = None, - certificate: Union[Path, str] = None, - ) -> AggregatorGRPCServer: - """Create an aggregator gRPC server. - - Args: - tls (bool, optional): A flag indicating if TLS should be used for - connections. Defaults to True. - root_certificate (Union[Path, str], optional): The path to the - root certificate for TLS. Defaults to None. - private_key (Union[Path, str], optional): The path to the private - key for TLS. Defaults to None. - certificate (Union[Path, str], optional): The path to the - certificate for TLS. Defaults to None. - - Returns: - AggregatorGRPCServer: The created aggregator gRPC server. - """ - plan = Plan.parse(plan_config_path=self.plan_path) - plan.authorized_cols = list(self.collaborators) - - logger.info("🧿 Created an Aggregator Server for %s experiment.", self.name) - aggregator_grpc_server = plan.interactive_api_get_server( - tensor_dict=self.init_tensor_dict, - root_certificate=root_certificate, - certificate=certificate, - private_key=private_key, - tls=tls, - ) - return aggregator_grpc_server - - @staticmethod - async def _run_aggregator_grpc_server( - aggregator_grpc_server: AggregatorGRPCServer, - ) -> None: - """Run aggregator. - - Args: - aggregator_grpc_server (AggregatorGRPCServer): The aggregator gRPC - server to run. - """ - logger.info("🧿 Starting the Aggregator Service.") - grpc_server = aggregator_grpc_server.get_server() - grpc_server.start() - logger.info("Starting Aggregator gRPC Server") - - try: - while not aggregator_grpc_server.aggregator.all_quit_jobs_sent(): - # Awaiting quit job sent to collaborators - await asyncio.sleep(10) - logger.debug("Aggregator sent quit jobs calls to all collaborators") - except KeyboardInterrupt: - pass - finally: - grpc_server.stop(0) - # Temporary solution to free RAM used by TensorDB - aggregator_grpc_server.aggregator.tensor_db.clean_up(0) - - -class ExperimentsRegistry: - """ExperimentsList class.""" - - def __init__(self) -> None: - """Initialize an experiments list object.""" - self.__active_experiment_name = None - self.__pending_experiments = [] - self.__archived_experiments = [] - self.__dict = {} - - @property - def active_experiment(self) -> Union[Experiment, None]: - """Get active experiment. - - Returns: - Union[Experiment, None]: The active experiment if exists, None - otherwise. - """ - if self.__active_experiment_name is None: - return None - return self.__dict[self.__active_experiment_name] - - @property - def pending_experiments(self) -> List[str]: - """Get queue of not started experiments. - - Returns: - List[str]: The list of pending experiments. - """ - return self.__pending_experiments - - def add(self, experiment: Experiment) -> None: - """Add experiment to queue of not started experiments. - - Args: - experiment (Experiment): The experiment to add. - """ - self.__dict[experiment.name] = experiment - self.__pending_experiments.append(experiment.name) - - def remove(self, name: str) -> None: - """Remove experiment from everywhere. - - Args: - name (str): The name of the experiment to remove. - """ - if self.__active_experiment_name == name: - self.__active_experiment_name = None - if name in self.__pending_experiments: - self.__pending_experiments.remove(name) - if name in self.__archived_experiments: - self.__archived_experiments.remove(name) - if name in self.__dict: - del self.__dict[name] - - def __getitem__(self, key: str) -> Experiment: - """Get experiment by name. - - Args: - key (str): The name of the experiment. - - Returns: - Experiment: The experiment with the given name. - """ - return self.__dict[key] - - def get(self, key: str, default=None) -> Experiment: - """Get experiment by name. - - Args: - key (str): The name of the experiment. - default (optional): The default value to return if the experiment - does not exist. - - Returns: - Experiment: The experiment with the given name, or the default - value if the experiment does not exist. - """ - return self.__dict.get(key, default) - - def get_user_experiments(self, user: str) -> List[Experiment]: - """Get list of experiments for specific user. - - Args: - user (str): The name of the user. - - Returns: - List[Experiment]: The list of experiments for the specific user. - """ - return [exp for exp in self.__dict.values() if user in exp.users] - - def __contains__(self, key: str) -> bool: - """Check if experiment exists. - - Args: - key (str): The name of the experiment. - - Returns: - bool: True if the experiment exists, False otherwise. - """ - return key in self.__dict - - def finish_active(self) -> None: - """Finish active experiment.""" - self.__archived_experiments.insert(0, self.__active_experiment_name) - self.__active_experiment_name = None - - @asynccontextmanager - async def get_next_experiment(self): - """Context manager. - - On enter get experiment from pending_experiments. On exit put finished - experiment to archive_experiments. - """ - while True: - if self.active_experiment is None and self.pending_experiments: - break - await asyncio.sleep(10) - - try: - self.__active_experiment_name = self.pending_experiments.pop(0) - yield self.active_experiment - finally: - self.finish_active() diff --git a/openfl/component/envoy/__init__.py b/openfl/component/envoy/__init__.py deleted file mode 100644 index 36f2c3ece4..0000000000 --- a/openfl/component/envoy/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -from .envoy import Envoy - -__all__ = [ - "Envoy", -] diff --git a/openfl/component/envoy/envoy.py b/openfl/component/envoy/envoy.py deleted file mode 100644 index f98b7e21ba..0000000000 --- a/openfl/component/envoy/envoy.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Envoy module.""" - -import logging -import sys -import time -import traceback -import uuid -from concurrent.futures import ThreadPoolExecutor -from pathlib import Path -from typing import Callable, Optional, Type, Union - -from openfl.federated import Plan -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.plugins.processing_units_monitor.cuda_device_monitor import CUDADeviceMonitor -from openfl.transport.grpc.director_client import ShardDirectorClient -from openfl.transport.grpc.exceptions import ShardNotFoundError -from openfl.utilities.workspace import ExperimentWorkspace - -logger = logging.getLogger(__name__) - -DEFAULT_RETRY_TIMEOUT_IN_SECONDS = 5 - - -class Envoy: - """Envoy class. The Envoy is a long-lived entity that runs on collaborator - nodes connected to the Director. - - Attributes: - name (str): The name of the shard. - root_certificate (Union[Path, str]): The path to the root certificate - for TLS. - private_key (Union[Path, str]): The path to the private key for TLS. - certificate (Union[Path, str]): The path to the certificate for TLS. - director_client (ShardDirectorClient): The director client. - shard_descriptor (Type[ShardDescriptor]): The shard descriptor. - cuda_devices (tuple): The CUDA devices. - install_requirements (bool): A flag indicating if the requirements - should be installed. - review_plan_callback (Union[None, Callable]): A callback function for - reviewing the plan. - cuda_device_monitor (Optional[Type[CUDADeviceMonitor]]): The CUDA - device monitor. - executor (ThreadPoolExecutor): The executor for running tasks. - running_experiments (dict): A dictionary to store the running - experiments. - is_experiment_running (bool): A flag indicating if an experiment is - running. - _health_check_future (object): The future object for the health check. - """ - - def __init__( - self, - *, - shard_name: str, - director_host: str, - director_port: int, - shard_descriptor: Type[ShardDescriptor], - root_certificate: Optional[Union[Path, str]] = None, - private_key: Optional[Union[Path, str]] = None, - certificate: Optional[Union[Path, str]] = None, - tls: bool = True, - install_requirements: bool = True, - cuda_devices: Union[tuple, list] = (), - cuda_device_monitor: Optional[Type[CUDADeviceMonitor]] = None, - review_plan_callback: Union[None, Callable] = None, - ) -> None: - """Initialize a envoy object. - - Args: - shard_name (str): The name of the shard. - director_host (str): The host of the director. - director_port (int): The port of the director. - shard_descriptor (Type[ShardDescriptor]): The shard descriptor. - root_certificate (Optional[Union[Path, str]], optional): The path - to the root certificate for TLS. Defaults to None. - private_key (Optional[Union[Path, str]], optional): The path to - the private key for TLS. Defaults to None. - certificate (Optional[Union[Path, str]], optional): The path to - the certificate for TLS. Defaults to None. - tls (bool, optional): A flag indicating if TLS should be used for - connections. Defaults to True. - install_requirements (bool, optional): A flag indicating if the - requirements should be installed. Defaults to True. - cuda_devices (Union[tuple, list], optional): The CUDA devices. - Defaults to (). - cuda_device_monitor (Optional[Type[CUDADeviceMonitor]], optional): - The CUDA device monitor. Defaults to None. - review_plan_callback (Union[None, Callable], optional): A callback - function for reviewing the plan. Defaults to None. - """ - self.name = shard_name - self.root_certificate = ( - Path(root_certificate).absolute() if root_certificate is not None else None - ) - self.private_key = Path(private_key).absolute() if root_certificate is not None else None - self.certificate = Path(certificate).absolute() if root_certificate is not None else None - self.director_client = ShardDirectorClient( - director_host=director_host, - director_port=director_port, - shard_name=shard_name, - tls=tls, - root_certificate=root_certificate, - private_key=private_key, - certificate=certificate, - ) - - self.shard_descriptor = shard_descriptor - self.cuda_devices = tuple(cuda_devices) - self.install_requirements = install_requirements - - self.review_plan_callback = review_plan_callback - - # Optional plugins - self.cuda_device_monitor = cuda_device_monitor - - self.executor = ThreadPoolExecutor() - self.running_experiments = {} - self.is_experiment_running = False - self._health_check_future = None - - def run(self): - """Run of the envoy working cycle.""" - while True: - try: - # Workspace import should not be done by gRPC client! - experiment_name = self.director_client.wait_experiment() - data_stream = self.director_client.get_experiment_data(experiment_name) - except Exception as exc: - logger.exception("Failed to get experiment: %s", exc) - time.sleep(DEFAULT_RETRY_TIMEOUT_IN_SECONDS) - continue - - data_file_path = self._save_data_stream_to_file(data_stream) - - try: - with ExperimentWorkspace( - experiment_name=f"{self.name}_{experiment_name}", - data_file_path=data_file_path, - install_requirements=self.install_requirements, - ): - # If the callback is passed - if self.review_plan_callback: - # envoy to review the experiment before starting - if not self.review_plan_callback("plan", "plan/plan.yaml"): - self.director_client.set_experiment_failed( - experiment_name, - error_description="Experiment is rejected" - f' by Envoy "{self.name}" manager.', - ) - continue - logger.debug( - f'Experiment "{experiment_name}" was accepted by Envoy manager' - ) - self.is_experiment_running = True - self._run_collaborator() - except Exception as exc: - logger.exception("Collaborator failed with error: %s:", exc) - self.director_client.set_experiment_failed( - experiment_name, - error_code=1, - error_description=traceback.format_exc(), - ) - finally: - self.is_experiment_running = False - - @staticmethod - def _save_data_stream_to_file(data_stream): - """Save data stream to file. - - Args: - data_stream: The data stream to save. - - Returns: - Path: The path to the saved data file. - """ - data_file_path = Path(str(uuid.uuid4())).absolute() - with open(data_file_path, "wb") as data_file: - for response in data_stream: - if response.size == len(response.npbytes): - data_file.write(response.npbytes) - else: - raise Exception("Broken archive") - return data_file_path - - def send_health_check(self): - """Send health check to the director.""" - logger.debug("Sending envoy node status to director.") - timeout = DEFAULT_RETRY_TIMEOUT_IN_SECONDS - while True: - cuda_devices_info = self._get_cuda_device_info() - try: - timeout = self.director_client.send_health_check( - envoy_name=self.name, - is_experiment_running=self.is_experiment_running, - cuda_devices_info=cuda_devices_info, - ) - except ShardNotFoundError: - logger.info("The director has lost information about current shard. Resending...") - self.director_client.report_shard_info( - shard_descriptor=self.shard_descriptor, - cuda_devices=self.cuda_devices, - ) - time.sleep(timeout) - - def _get_cuda_device_info(self): - """Get CUDA device info. - - Returns: - list: A list of dictionaries containing info about each CUDA - device. - """ - cuda_devices_info = None - try: - if self.cuda_device_monitor is not None: - cuda_devices_info = [] - cuda_driver_version = self.cuda_device_monitor.get_driver_version() - cuda_version = self.cuda_device_monitor.get_cuda_version() - for device_id in self.cuda_devices: - memory_total = self.cuda_device_monitor.get_device_memory_total(device_id) - memory_utilized = self.cuda_device_monitor.get_device_memory_utilized(device_id) - device_utilization = self.cuda_device_monitor.get_device_utilization(device_id) - device_name = self.cuda_device_monitor.get_device_name(device_id) - cuda_devices_info.append( - { - "index": device_id, - "memory_total": memory_total, - "memory_utilized": memory_utilized, - "device_utilization": device_utilization, - "cuda_driver_version": cuda_driver_version, - "cuda_version": cuda_version, - "name": device_name, - } - ) - except Exception as exc: - logger.exception( - f"Failed to get cuda device info: {exc}. Check your cuda device monitor plugin." - ) - return cuda_devices_info - - def _run_collaborator(self, plan="plan/plan.yaml"): - """Run the collaborator for the experiment running. - - Args: - plan (str, optional): The path to the plan. Defaults to 'plan/plan.yaml'. - """ - plan = Plan.parse(plan_config_path=Path(plan)) - - # TODO: Need to restructure data loader config file loader - logger.debug("Data = %s", plan.cols_data_paths) - logger.info("🧿 Starting the Collaborator Service.") - - col = plan.get_collaborator( - self.name, - self.root_certificate, - self.private_key, - self.certificate, - shard_descriptor=self.shard_descriptor, - ) - col.set_available_devices(cuda=self.cuda_devices) - col.run() - - def start(self): - """Start the envoy.""" - try: - is_accepted = self.director_client.report_shard_info( - shard_descriptor=self.shard_descriptor, - cuda_devices=self.cuda_devices, - ) - except Exception as exc: - logger.exception("Failed to report shard info: %s", exc) - sys.exit(1) - else: - if is_accepted: - logger.info("Shard was accepted by director") - # Shard accepted for participation in the federation - self._health_check_future = self.executor.submit(self.send_health_check) - self.run() - else: - # Shut down - logger.error("Report shard info was not accepted") - sys.exit(1) diff --git a/openfl/federated/plan/plan.py b/openfl/federated/plan/plan.py index 5a36dd5d49..f61bc814dc 100644 --- a/openfl/federated/plan/plan.py +++ b/openfl/federated/plan/plan.py @@ -422,7 +422,7 @@ def get_straggler_handling_policy(self): return self.straggler_policy_ - # legacy api (TaskRunner subclassing) + # TaskRunner API def get_data_loader(self, collaborator_name): """Get data loader for a specific collaborator. @@ -444,20 +444,6 @@ def get_data_loader(self, collaborator_name): return self.loader_ - # Python interactive api - def initialize_data_loader(self, data_loader, shard_descriptor): - """Initialize data loader. - - Args: - data_loader (DataLoader): Data loader to initialize. - shard_descriptor (ShardDescriptor): Descriptor of the data shard. - - Returns: - DataLoader: Initialized data loader. - """ - data_loader.shard_descriptor = shard_descriptor - return data_loader - # legacy api (TaskRunner subclassing) def get_task_runner(self, data_loader): """Get task runner. @@ -483,49 +469,6 @@ def get_task_runner(self, data_loader): return self.runner_ - # Python interactive api - def get_core_task_runner(self, data_loader=None, model_provider=None, task_keeper=None): - """Get core task runner. - - Args: - data_loader (DataLoader, optional): Data loader for the tasks. - Defaults to None. - model_provider (ModelProvider, optional): Provider for the model. - Defaults to None. - task_keeper (TaskKeeper, optional): Keeper for the tasks. Defaults - to None. - - Returns: - CoreTaskRunner: Core task runner for the tasks. - """ - defaults = self.config.get( - "task_runner", - { - TEMPLATE: "openfl.federated.task.task_runner.CoreTaskRunner", - SETTINGS: {}, - }, - ) - - # We are importing a CoreTaskRunner instance!!! - if self.runner_ is None: - self.runner_ = Plan.build(**defaults) - - self.runner_.set_data_loader(data_loader) - - self.runner_.set_model_provider(model_provider) - self.runner_.set_task_provider(task_keeper) - - framework_adapter = Plan.build( - self.config["task_runner"]["required_plugin_components"]["framework_adapters"], - {}, - ) - - # This step initializes tensorkeys - # Which have no sens if task provider is not set up - self.runner_.set_framework_adapter(framework_adapter) - - return self.runner_ - def get_collaborator( self, collaborator_name, @@ -534,7 +477,6 @@ def get_collaborator( certificate=None, task_runner=None, client=None, - shard_descriptor=None, ): """Get collaborator. @@ -554,8 +496,6 @@ def get_collaborator( collaborator. Defaults to None. client (Client, optional): Client for the collaborator. Defaults to None. - shard_descriptor (ShardDescriptor, optional): Descriptor of the - data shard. Defaults to None. Returns: self.collaborator_ (Collaborator): The collaborator instance. @@ -574,24 +514,9 @@ def get_collaborator( if task_runner is not None: defaults[SETTINGS]["task_runner"] = task_runner else: - # Here we support new interactive api as well as old task_runner subclassing interface - # If Task Runner class is placed incide openfl `task-runner` subpackage it is - # a part of the New API and it is a part of OpenFL kernel. - # If Task Runner is placed elsewhere, somewhere in user workspace, than it is - # a part of the old interface and we follow legacy initialization procedure. - if "openfl.federated.task.task_runner" in self.config["task_runner"]["template"]: - # Interactive API - model_provider, task_keeper, data_loader = self.deserialize_interface_objects() - data_loader = self.initialize_data_loader(data_loader, shard_descriptor) - defaults[SETTINGS]["task_runner"] = self.get_core_task_runner( - data_loader=data_loader, - model_provider=model_provider, - task_keeper=task_keeper, - ) - else: - # TaskRunner subclassing API - data_loader = self.get_data_loader(collaborator_name) - defaults[SETTINGS]["task_runner"] = self.get_task_runner(data_loader) + # TaskRunner subclassing API + data_loader = self.get_data_loader(collaborator_name) + defaults[SETTINGS]["task_runner"] = self.get_task_runner(data_loader) defaults[SETTINGS]["compression_pipeline"] = self.get_tensor_pipe() defaults[SETTINGS]["task_config"] = self.config.get("tasks", {}) @@ -703,85 +628,6 @@ def get_server( return self.server_ - def interactive_api_get_server( - self, *, tensor_dict, root_certificate, certificate, private_key, tls - ): - """Get gRPC server of the aggregator instance for interactive API. - - Args: - tensor_dict (dict): Dictionary of tensors. - root_certificate (str): Root certificate for the server. - certificate (str): Certificate for the server. - private_key (str): Private key for the server. - tls (bool): Whether to use Transport Layer Security. - - Returns: - AggregatorGRPCServer: gRPC server of the aggregator instance. - """ - server_args = self.config["network"][SETTINGS] - - # patch certificates - server_args["root_certificate"] = root_certificate - server_args["certificate"] = certificate - server_args["private_key"] = private_key - server_args["use_tls"] = tls - - server_args["aggregator"] = self.get_aggregator(tensor_dict) - - if self.server_ is None: - self.server_ = AggregatorGRPCServer(**server_args) - - return self.server_ - - def deserialize_interface_objects(self): - """Deserialize objects for TaskRunner. - - Returns: - tuple: Tuple containing the deserialized objects. - """ - api_layer = self.config["api_layer"] - filenames = [ - "model_interface_file", - "tasks_interface_file", - "dataloader_interface_file", - ] - return (self.restore_object(api_layer["settings"][filename]) for filename in filenames) - - def get_serializer_plugin(self, **kwargs): - """Get serializer plugin. - - This plugin is used for serialization of interfaces in new interactive - API. - - Args: - **kwargs: Additional keyword arguments. - - Returns: - SerializerPlugin: Serializer plugin. - """ - if self.serializer_ is None: - if "api_layer" not in self.config: # legacy API - return None - required_plugin_components = self.config["api_layer"]["required_plugin_components"] - serializer_plugin = required_plugin_components["serializer_plugin"] - self.serializer_ = Plan.build(serializer_plugin, kwargs) - return self.serializer_ - - def restore_object(self, filename): - """Deserialize an object. - - Args: - filename (str): Name of the file. - - Returns: - object: Deserialized object. - """ - serializer_plugin = self.get_serializer_plugin() - if serializer_plugin is None: - return None - obj = serializer_plugin.restore_object(filename) - return obj - def save_model_to_state_file(self, tensor_dict, round_number, output_path): """Save model weights to a protobuf state file. diff --git a/openfl/federated/task/task_runner.py b/openfl/federated/task/task_runner.py deleted file mode 100644 index ef6d83e4ea..0000000000 --- a/openfl/federated/task/task_runner.py +++ /dev/null @@ -1,455 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Interactive API package.""" - -from logging import getLogger - -import numpy as np - -from openfl.utilities import TensorKey, change_tags -from openfl.utilities.split import split_tensor_dict_for_holdouts - - -class CoreTaskRunner: - """Federated Learning Task Runner Class. - - Attributes: - kwargs (dict): Additional parameters passed to the function. - TASK_REGISTRY (dict): Registry of tasks. - training_round_completed (bool): Flag indicating if a training round - has been completed. - tensor_dict_split_fn_kwargs (dict): Key word arguments for determining - which parameters to hold out from aggregation. - required_tensorkeys_for_function (dict): Required tensorkeys for all - public functions in CoreTaskRunner. - logger (logging.Logger): Logger object for logging events. - opt_treatment (str): Treatment of current instance optimizer. - """ - - def _prepare_tensorkeys_for_agggregation( - self, metric_dict, validation_flag, col_name, round_num - ): - """Prepare tensorkeys for aggregation. - Args: - metric_dict (dict): Dictionary of metrics. - validation_flag (bool): Flag indicating if validation is to be - performed. - col_name (str): The column name. - round_num (int): The round number. - - Returns: - tuple: Tuple containing global_tensor_dict and local_tensor_dict. - """ - global_tensor_dict, local_tensor_dict = {}, {} - origin = col_name - if not validation_flag: - # Output metric tensors (scalar) - tags = ("trained",) - - # output model tensors (Doesn't include TensorKey) - output_model_dict = self.get_tensor_dict(with_opt_vars=True) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - - # Create global tensorkeys - global_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num, False, tags): nparray - for tensor_name, nparray in global_model_dict.items() - } - # Create tensorkeys that should stay local - local_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num, False, tags): nparray - for tensor_name, nparray in local_model_dict.items() - } - # The train/validate aggregated function of the next - # round will look for the updated model parameters. - # This ensures they will be resolved locally - next_local_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num + 1, False, ("model",)): nparray - for tensor_name, nparray in local_model_dict.items() - } - - global_tensor_dict = global_tensorkey_model_dict - local_tensor_dict = { - **local_tensorkey_model_dict, - **next_local_tensorkey_model_dict, - } - - # Update the required tensors if they need to be - # pulled from the aggregator - # TODO this logic can break if different collaborators - # have different roles between rounds. - # For example, if a collaborator only performs validation - # in the first round but training - # in the second, it has no way of knowing the optimizer - # state tensor names to request from the aggregator - # because these are only created after training occurs. - # A work around could involve doing a single epoch of training - # on random data to get the optimizer names, - # and then throwing away the model. - if self.opt_treatment == "CONTINUE_GLOBAL": - self.initialize_tensorkeys_for_functions(with_opt_vars=True) - - # This will signal that the optimizer values are now present, - # and can be loaded when the model is rebuilt - self.training_round_completed = True - - else: - suffix = "validate" + validation_flag - tags = (suffix,) - tags = change_tags(tags, add_field="metric") - metric_dict = { - TensorKey(metric, origin, round_num, True, tags): np.array(value) - for metric, value in metric_dict.items() - } - global_tensor_dict = {**global_tensor_dict, **metric_dict} - - return global_tensor_dict, local_tensor_dict - - def adapt_tasks(self): - """Prepare tasks for the collaborator. - - Using functions from a task provider (deserialized interface object) - and registered task contracts prepares callable tasks to be invoked by - the collaborator. - - Preparing includes conditional model rebuilding and filling output - dicts with tensors for aggregation and storing in local DB. - - There is an assumption that any training task accepts optimizer as one - of the arguments, thus the model should be aggregated after such tasks. - - Returns: - None - """ - - def task_binder(task_name, callable_task): - def collaborator_adapted_task(col_name, round_num, input_tensor_dict, **kwargs): - task_contract = self.task_provider.task_contract[task_name] - # Validation flag can be [False, '_local', '_agg'] - validation_flag = True if task_contract["optimizer"] is None else False - task_settings = self.task_provider.task_settings[task_name] - - device = kwargs.get("device", "cpu") - - self.rebuild_model(input_tensor_dict, validation=validation_flag, device=device) - task_kwargs = {} - if validation_flag: - loader = self.data_loader.get_valid_loader() - if kwargs["apply"] == "local": - validation_flag = "_local" - else: - validation_flag = "_agg" - else: - loader = self.data_loader.get_train_loader() - # If train task we also pass optimizer - task_kwargs[task_contract["optimizer"]] = self.optimizer - - if task_contract["round_num"] is not None: - task_kwargs[task_contract["round_num"]] = round_num - - for en_name, entity in zip( - ["model", "data_loader", "device"], - [self.model, loader, device], - ): - task_kwargs[task_contract[en_name]] = entity - - # Add task settings to the keyword arguments - task_kwargs.update(task_settings) - - # Here is the training metod call - metric_dict = callable_task(**task_kwargs) - - return self._prepare_tensorkeys_for_agggregation( - metric_dict, validation_flag, col_name, round_num - ) - - return collaborator_adapted_task - - for ( - task_name, - callable_task, - ) in self.task_provider.task_registry.items(): - self.TASK_REGISTRY[task_name] = task_binder(task_name, callable_task) - - def __init__(self, **kwargs): - """Initializes the Task Runner object. - - This class is a part of the Interactive python API release. - It is no longer a user interface entity that should be subclassed - but a part of OpenFL kernel. - - Args: - **kwargs: Additional parameters to pass to the function. - """ - self.set_logger() - - self.kwargs = kwargs - - self.TASK_REGISTRY = {} - - # Why is it here - self.opt_treatment = "RESET" - self.tensor_dict_split_fn_kwargs = {} - self.required_tensorkeys_for_function = {} - - # Complete hell below - self.training_round_completed = False - # overwrite attribute to account for one optimizer param (in every - # child model that does not overwrite get and set tensordict) that is - # not a numpy array - self.tensor_dict_split_fn_kwargs.update({"holdout_tensor_names": ["__opt_state_needed"]}) - - def set_task_provider(self, task_provider): - """Set task registry. - - This method recieves Task Interface object as an argument - and uses provided callables and information to prepare - tasks that may be called by the collaborator component. - - Args: - task_provider: Task provider object. - - Returns: - None - """ - if task_provider is None: - return - self.task_provider = task_provider - self.adapt_tasks() - - def set_data_loader(self, data_loader): - """Register a data loader initialized with local data path. - - Args: - data_loader: Data loader object. - - Returns: - None - """ - self.data_loader = data_loader - - def set_model_provider(self, model_provider): - """Retrieve a model and an optimizer from the interface object. - - Args: - model_provider: Model provider object. - - Returns: - None - """ - self.model_provider = model_provider - self.model = self.model_provider.provide_model() - self.optimizer = self.model_provider.provide_optimizer() - - def set_framework_adapter(self, framework_adapter): - """Set framework adapter. - - Setting a framework adapter allows first extraction of the weigths - of the model with the purpose to make a list of parameters to be - aggregated. - - Args: - framework_adapter: Framework adapter object. - - Returns: - None - """ - self.framework_adapter = framework_adapter - if self.opt_treatment == "CONTINUE_GLOBAL": - aggregate_optimizer_parameters = True - else: - aggregate_optimizer_parameters = False - self.initialize_tensorkeys_for_functions(with_opt_vars=aggregate_optimizer_parameters) - - def set_logger(self): - """Set up the log object. - - Returns: - None - """ - self.logger = getLogger(__name__) - - def set_optimizer_treatment(self, opt_treatment): - # SHould be removed! We have this info at the initialization time - # and do not change this one during training. - """Change the treatment of current instance optimizer. - - Args: - opt_treatment (str): The optimizer treatment. - - Returns: - None - """ - self.opt_treatment = opt_treatment - - def rebuild_model(self, input_tensor_dict, validation=False, device="cpu"): - """ - Parse tensor names and update weights of model. Handles the - optimizer treatment. - - Args: - input_tensor_dict (dict): The input tensor dictionary. - validation (bool): If True, perform validation. Default is False. - device (str): The device to use. Default is 'cpu'. - - Returns: - None - """ - if self.opt_treatment == "RESET": - self.reset_opt_vars() - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False, device=device) - elif ( - self.training_round_completed - and self.opt_treatment == "CONTINUE_GLOBAL" - and not validation - ): - self.set_tensor_dict(input_tensor_dict, with_opt_vars=True, device=device) - else: - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False, device=device) - - def get_required_tensorkeys_for_function(self, func_name, **kwargs): - """Get the required tensors for specified function that could be called - as part of a task. - - By default, this is just all of the layers and optimizer of the model. - - Args: - func_name (str): The function name. - **kwargs: Additional parameters to pass to the function. - - Returns: - list: List of required TensorKey. - """ - # We rely on validation type tasks parameter `apply` - # In the interface layer we add those parameters automatically - if "apply" not in kwargs: - return [ - TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) - for tensor_name in self.required_tensorkeys_for_function["global_model_dict"] - ] + [ - TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) - for tensor_name in self.required_tensorkeys_for_function["local_model_dict"] - ] - - if kwargs["apply"] == "local": - return [ - TensorKey(tensor_name, "LOCAL", 0, False, ("trained",)) - for tensor_name in { - **self.required_tensorkeys_for_function["local_model_dict_val"], - **self.required_tensorkeys_for_function["global_model_dict_val"], - } - ] - - elif kwargs["apply"] == "global": - return [ - TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) - for tensor_name in self.required_tensorkeys_for_function["global_model_dict_val"] - ] + [ - TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) - for tensor_name in self.required_tensorkeys_for_function["local_model_dict_val"] - ] - - def initialize_tensorkeys_for_functions(self, with_opt_vars=False): - """Set the required tensors for all publicly accessible task methods. - - By default, this is just all of the layers and optimizer of the model. - Custom tensors should be added to this function. - - Args: - with_opt_vars (bool): Specify if we also want to set the variables - of the optimizer. Default is False. - - Returns: - None - """ - # TODO: Framework adapters should have separate methods for dealing - # with optimizer. Set model dict for validation tasks - output_model_dict = self.get_tensor_dict(with_opt_vars=False) - global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - # Now set model dict for training tasks - if with_opt_vars: - output_model_dict = self.get_tensor_dict(with_opt_vars=True) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - else: - global_model_dict = global_model_dict_val - local_model_dict = local_model_dict_val - - self.required_tensorkeys_for_function["global_model_dict"] = global_model_dict - self.required_tensorkeys_for_function["local_model_dict"] = local_model_dict - self.required_tensorkeys_for_function["global_model_dict_val"] = global_model_dict_val - self.required_tensorkeys_for_function["local_model_dict_val"] = local_model_dict_val - - def reset_opt_vars(self): - """Reset optimizer variables. - - Returns: - None - """ - self.optimizer = self.model_provider.provide_optimizer() - - def get_train_data_size(self): - """Get the number of training examples. - - It will be used for weighted averaging in aggregation. - - Returns: - int: The number of training examples. - """ - return self.data_loader.get_train_data_size() - - def get_valid_data_size(self): - """Get the number of examples. - - It will be used for weighted averaging in aggregation. - - Returns: - int: The number of validation examples. - """ - return self.data_loader.get_valid_data_size() - - def get_tensor_dict(self, with_opt_vars=False): - """Return the tensor dictionary. - - Args: - with_opt_vars (bool): Return the tensor dictionary including the - optimizer tensors (Default=False). - - Returns: - dict: Tensor dictionary {**dict, **optimizer_dict}. - """ - args = [self.model] - if with_opt_vars: - args.append(self.optimizer) - - return self.framework_adapter.get_tensor_dict(*args) - - def set_tensor_dict(self, tensor_dict, with_opt_vars=False, device="cpu"): - """Set the tensor dictionary. - - Args: - tensor_dict: The tensor dictionary - with_opt_vars (bool): Return the tensor dictionary including the - optimizer tensors (Default=False). - """ - # Sets tensors for model layers and optimizer state. - # FIXME: self.parameters() instead? Unclear if load_state_dict() or - # simple assignment is better - # for now, state dict gives us names, which is good - # FIXME: do both and sanity check each time? - args = [self.model, tensor_dict] - if with_opt_vars: - args.append(self.optimizer) - - kwargs = { - "device": device, - } - - return self.framework_adapter.set_tensor_dict(*args, **kwargs) diff --git a/openfl/interface/director.py b/openfl/interface/director.py deleted file mode 100644 index bd73d865c1..0000000000 --- a/openfl/interface/director.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Director CLI.""" - -import logging -import shutil -import sys -from pathlib import Path - -import click -from click import Path as ClickPath -from click import group, option, pass_context -from dynaconf import Validator - -from openfl.component.director import Director -from openfl.interface.cli import review_plan_callback -from openfl.interface.cli_helper import WORKSPACE -from openfl.transport import DirectorGRPCServer -from openfl.utilities import merge_configs -from openfl.utilities.path_check import is_directory_traversal - -logger = logging.getLogger(__name__) - - -@group() -@pass_context -def director(context): - """Manage Federated Learning Director. - - Args: - context (click.core.Context): Click context. - """ - context.obj["group"] = "director" - - -@director.command(name="start") -@option( - "-c", - "--director-config-path", - default="director.yaml", - help="The director config file path", - type=ClickPath(exists=True), -) -@option( - "--tls/--disable-tls", - default=True, - is_flag=True, - help="Use TLS or not (By default TLS is enabled)", -) -@option( - "-rc", - "--root-cert-path", - "root_certificate", - required=False, - type=ClickPath(exists=True), - default=None, - help="Path to a root CA cert", -) -@option( - "-pk", - "--private-key-path", - "private_key", - required=False, - type=ClickPath(exists=True), - default=None, - help="Path to a private key", -) -@option( - "-oc", - "--public-cert-path", - "certificate", - required=False, - type=ClickPath(exists=True), - default=None, - help="Path to a signed certificate", -) -def start(director_config_path, tls, root_certificate, private_key, certificate): - """Start the director service. - - Args: - director_config_path (str): The director config file path. - tls (bool): Use TLS or not. - root_certificate (str): Path to a root CA cert. - private_key (str): Path to a private key. - certificate (str): Path to a signed certificate. - """ - - director_config_path = Path(director_config_path).absolute() - logger.info("🧿 Starting the Director Service.") - if is_directory_traversal(director_config_path): - click.echo("The director config file path is out of the openfl workspace scope.") - sys.exit(1) - config = merge_configs( - settings_files=director_config_path, - overwrite_dict={ - "root_certificate": root_certificate, - "private_key": private_key, - "certificate": certificate, - }, - validators=[ - Validator("settings.listen_host", default="localhost"), - Validator("settings.listen_port", default=50051, gte=1024, lte=65535), - Validator("settings.sample_shape", default=[]), - Validator("settings.target_shape", default=[]), - Validator("settings.install_requirements", default=False), - Validator( - "settings.envoy_health_check_period", - default=60, # in seconds - gte=1, - lte=24 * 60 * 60, - ), - Validator("settings.review_experiment", default=False), - ], - value_transform=[ - ("settings.sample_shape", lambda x: list(map(str, x))), - ("settings.target_shape", lambda x: list(map(str, x))), - ], - ) - - logger.info( - f"Sample shape: {config.settings.sample_shape}, " - f"target shape: {config.settings.target_shape}" - ) - - if config.root_certificate: - config.root_certificate = Path(config.root_certificate).absolute() - - if config.private_key: - config.private_key = Path(config.private_key).absolute() - - if config.certificate: - config.certificate = Path(config.certificate).absolute() - - # We pass the `review_experiment` callback only if it is needed. - # Otherwise we pass None. - overwritten_review_plan_callback = None - if config.settings.review_experiment: - overwritten_review_plan_callback = review_plan_callback - - director_server = DirectorGRPCServer( - director_cls=Director, - tls=tls, - sample_shape=config.settings.sample_shape, - target_shape=config.settings.target_shape, - root_certificate=config.root_certificate, - private_key=config.private_key, - certificate=config.certificate, - listen_host=config.settings.listen_host, - listen_port=config.settings.listen_port, - review_plan_callback=overwritten_review_plan_callback, - envoy_health_check_period=config.settings.envoy_health_check_period, - install_requirements=config.settings.install_requirements, - ) - director_server.start() - - -@director.command(name="create-workspace") -@option( - "-p", - "--director-path", - required=True, - help="The director path", - type=ClickPath(), -) -def create(director_path): - """Create a director workspace. - - Args: - director_path (str): The director path. - """ - if is_directory_traversal(director_path): - click.echo("The director path is out of the openfl workspace scope.") - sys.exit(1) - director_path = Path(director_path).absolute() - if director_path.exists(): - if not click.confirm("Director workspace already exists. Recreate?", default=True): - sys.exit(1) - shutil.rmtree(director_path) - (director_path / "cert").mkdir(parents=True, exist_ok=True) - (director_path / "logs").mkdir(parents=True, exist_ok=True) - shutil.copyfile(WORKSPACE / "default/director.yaml", director_path / "director.yaml") diff --git a/openfl/interface/envoy.py b/openfl/interface/envoy.py deleted file mode 100644 index 0fd5eb0cd7..0000000000 --- a/openfl/interface/envoy.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Envoy CLI.""" - -import logging -import shutil -import sys -from importlib import import_module -from pathlib import Path - -import click -from click import Path as ClickPath -from click import group, option, pass_context -from dynaconf import Validator - -from openfl.component.envoy.envoy import Envoy -from openfl.interface.cli import review_plan_callback -from openfl.interface.cli_helper import WORKSPACE -from openfl.utilities import click_types, merge_configs -from openfl.utilities.path_check import is_directory_traversal - -logger = logging.getLogger(__name__) - - -@group() -@pass_context -def envoy(context): - """Manage Federated Learning Envoy. - - Args: - context (click.core.Context): Click context. - """ - context.obj["group"] = "envoy" - - -@envoy.command(name="start") -@option("-n", "--shard-name", required=True, help="Current shard name") -@option( - "-dh", - "--director-host", - required=True, - help="The FQDN of the federation director", - type=click_types.FQDN, -) -@option( - "-dp", - "--director-port", - required=True, - help="The federation director port", - type=click.IntRange(1, 65535), -) -@option( - "--tls/--disable-tls", - default=True, - is_flag=True, - help="Use TLS or not (By default TLS is enabled)", -) -@option( - "-ec", - "--envoy-config-path", - default="envoy_config.yaml", - help="The envoy config path", - type=ClickPath(exists=True), -) -@option( - "-rc", - "--root-cert-path", - "root_certificate", - default=None, - help="Path to a root CA cert", - type=ClickPath(exists=True), -) -@option( - "-pk", - "--private-key-path", - "private_key", - default=None, - help="Path to a private key", - type=ClickPath(exists=True), -) -@option( - "-oc", - "--public-cert-path", - "certificate", - default=None, - help="Path to a signed certificate", - type=ClickPath(exists=True), -) -def start_( - shard_name, - director_host, - director_port, - tls, - envoy_config_path, - root_certificate, - private_key, - certificate, -): - """Start the Envoy. - - Args: - shard_name (str): Current shard name. - director_host (str): The FQDN of the federation director. - director_port (int): The federation director port. - tls (bool): Use TLS or not. - envoy_config_path (str): The envoy config path. - root_certificate (str): Path to a root CA cert. - private_key (str): Path to a private key. - certificate (str): Path to a signed certificate. - """ - - logger.info("🧿 Starting the Envoy.") - if is_directory_traversal(envoy_config_path): - click.echo("The shard config path is out of the openfl workspace scope.") - sys.exit(1) - - config = merge_configs( - settings_files=envoy_config_path, - overwrite_dict={ - "root_certificate": root_certificate, - "private_key": private_key, - "certificate": certificate, - }, - validators=[ - Validator("shard_descriptor.template", required=True), - Validator("params.cuda_devices", default=[]), - Validator("params.install_requirements", default=True), - Validator("params.review_experiment", default=False), - ], - ) - - if config.root_certificate: - config.root_certificate = Path(config.root_certificate).absolute() - if config.private_key: - config.private_key = Path(config.private_key).absolute() - if config.certificate: - config.certificate = Path(config.certificate).absolute() - - # Parse envoy parameters - envoy_params = config.get("params", {}) - - # Build optional plugin components - optional_plugins_section = config.get("optional_plugin_components") - if optional_plugins_section is not None: - for plugin_name, plugin_settings in optional_plugins_section.items(): - template = plugin_settings.get("template") - if not template: - raise Exception(f"You should put a templatefor plugin {plugin_name}") - module_path, _, class_name = template.rpartition(".") - plugin_params = plugin_settings.get("params", {}) - - module = import_module(module_path) - instance = getattr(module, class_name)(**plugin_params) - envoy_params[plugin_name] = instance - - # We pass the `review_experiment` callback only if it is needed. - # Otherwise we pass None. - overwritten_review_plan_callback = None - if envoy_params.review_experiment: - overwritten_review_plan_callback = review_plan_callback - del envoy_params.review_experiment - - # Instantiate Shard Descriptor - shard_descriptor = shard_descriptor_from_config(config.get("shard_descriptor", {})) - envoy = Envoy( - shard_name=shard_name, - director_host=director_host, - director_port=director_port, - tls=tls, - shard_descriptor=shard_descriptor, - root_certificate=config.root_certificate, - private_key=config.private_key, - certificate=config.certificate, - review_plan_callback=overwritten_review_plan_callback, - **envoy_params, - ) - - envoy.start() - - -@envoy.command(name="create-workspace") -@option("-p", "--envoy-path", required=True, help="The Envoy path", type=ClickPath()) -def create(envoy_path): - """Create an envoy workspace. - - Args: - envoy_path (str): The Envoy path. - """ - if is_directory_traversal(envoy_path): - click.echo("The Envoy path is out of the openfl workspace scope.") - sys.exit(1) - envoy_path = Path(envoy_path).absolute() - if envoy_path.exists(): - if not click.confirm("Envoy workspace already exists. Recreate?", default=True): - sys.exit(1) - shutil.rmtree(envoy_path) - (envoy_path / "cert").mkdir(parents=True, exist_ok=True) - (envoy_path / "logs").mkdir(parents=True, exist_ok=True) - (envoy_path / "data").mkdir(parents=True, exist_ok=True) - shutil.copyfile( - WORKSPACE / "default/envoy_config.yaml", - envoy_path / "envoy_config.yaml", - ) - shutil.copyfile( - WORKSPACE / "default/shard_descriptor.py", - envoy_path / "shard_descriptor.py", - ) - shutil.copyfile(WORKSPACE / "default/requirements.txt", envoy_path / "requirements.txt") - - -def shard_descriptor_from_config(shard_config: dict): - """Build a shard descriptor from config. - - Args: - shard_config (dict): Shard configuration. - - Returns: - instance: Shard descriptor instance. - """ - template = shard_config.get("template") - if not template: - raise Exception("You should define a shard descriptor template in the envoy config") - class_name = template.split(".")[-1] - module_path = ".".join(template.split(".")[:-1]) - params = shard_config.get("params", {}) - - module = import_module(module_path) - instance = getattr(module, class_name)(**params) - - return instance diff --git a/openfl/interface/interactive_api/__init__.py b/openfl/interface/interactive_api/__init__.py deleted file mode 100644 index 4549e54838..0000000000 --- a/openfl/interface/interactive_api/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Interactive API package.""" diff --git a/openfl/interface/interactive_api/experiment.py b/openfl/interface/interactive_api/experiment.py deleted file mode 100644 index 0a9b383b2f..0000000000 --- a/openfl/interface/interactive_api/experiment.py +++ /dev/null @@ -1,949 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Python low-level API module.""" - -import os -import time -from collections import defaultdict -from copy import deepcopy -from logging import getLogger -from os import getcwd, makedirs -from os.path import basename -from pathlib import Path -from shutil import copytree, ignore_patterns, make_archive -from typing import Dict, Tuple - -from openfl.component.assigner.tasks import Task, TrainTask, ValidateTask -from openfl.federated import Plan -from openfl.interface.aggregation_functions import AggregationFunction, WeightedAverage -from openfl.interface.cli import setup_logging -from openfl.interface.cli_helper import WORKSPACE -from openfl.native import update_plan -from openfl.utilities.split import split_tensor_dict_for_holdouts -from openfl.utilities.utils import rmtree -from openfl.utilities.workspace import dump_requirements_file - - -class ModelStatus: - """Model statuses. - - This class defines the various statuses a model can have during an - experiment. - - Attributes: - INITIAL (str): Initial status of the model. - BEST (str): Status indicating the model with the best score. - LAST (str): Status indicating the last model used in the experiment. - RESTORED (str): Status indicating a model that has been restored. - """ - - INITIAL = "initial" - BEST = "best" - LAST = "last" - RESTORED = "restored" - - -class FLExperiment: - """Central class for FL experiment orchestration. - - This class is responsible for orchestrating the federated learning - experiment. It manages - the experiment's lifecycle and interacts with the federation. - - Attributes: - federation: The federation that this experiment is part of. - experiment_name (str): The name of the experiment. - summary_writer (SummaryWriter): The summary writer. - serializer_plugin (str): The serializer plugin to use. - experiment_submitted (bool): Whether the experiment has been submitted. - is_validate_task_exist (bool): Whether a validate task exists. - logger (Logger): The logger to use. - plan (Plan): The plan for the experiment. - """ - - def __init__( - self, - federation, - experiment_name: str = None, - serializer_plugin: str = "openfl.plugins.interface_serializer." - "cloudpickle_serializer.CloudpickleSerializer", - ) -> None: - """Initialize an experiment inside a federation. - - Experiment makes sense in a scope of some machine learning problem. - Information about the data on collaborators is contained on the - federation level. - - Args: - federation: The federation that this experiment is part of. - experiment_name (str, optional): The name of the experiment. - Defaults to None. - serializer_plugin (str, optional): The serializer plugin. Defaults - to 'openfl.plugins.interface_serializer. - cloudpickle_serializer.CloudpickleSerializer'. - """ - self.federation = federation - self.experiment_name = experiment_name or "test-" + time.strftime("%Y%m%d-%H%M%S") - self.summary_writer = None - self.serializer_plugin = serializer_plugin - - self.experiment_submitted = False - - self.is_validate_task_exist = False - - self.logger = getLogger(__name__) - setup_logging() - - self._initialize_plan() - - def _initialize_plan(self): - """Setup plan from base plan interactive api.""" - # Create a folder to store plans - os.makedirs("./plan", exist_ok=True) - os.makedirs("./save", exist_ok=True) - os.makedirs("./local_state", exist_ok=True) - # Load the default plan - base_plan_path = WORKSPACE / "workspace/plan/plans/default/base_plan_interactive_api.yaml" - plan = Plan.parse(base_plan_path, resolve=False) - # Change plan name to default one - plan.name = "plan.yaml" - - self.plan = deepcopy(plan) - - def _assert_experiment_submitted(self): - """Assure experiment is sent to director and accepted.""" - if not self.experiment_submitted: - self.logger.error("The experiment was not submitted to a Director service.") - self.logger.error("Report the experiment first: use the Experiment.start() method.") - return False - return True - - def get_experiment_status(self): - """Returns the current state of the experiment.""" - if not self._assert_experiment_submitted(): - return - exp_status = self.federation.dir_client.get_experiment_status( - experiment_name=self.experiment_name - ) - return exp_status.experiment_status - - def get_best_model(self): - """Retrieve the model with the best score.""" - if not self._assert_experiment_submitted(): - return - tensor_dict = self.federation.dir_client.get_best_model( - experiment_name=self.experiment_name - ) - - return self._rebuild_model(tensor_dict, upcoming_model_status=ModelStatus.BEST) - - def get_last_model(self): - """Retrieve the aggregated model after the last round.""" - if not self._assert_experiment_submitted(): - return - tensor_dict = self.federation.dir_client.get_last_model( - experiment_name=self.experiment_name - ) - - return self._rebuild_model(tensor_dict, upcoming_model_status=ModelStatus.LAST) - - def _rebuild_model(self, tensor_dict, upcoming_model_status=ModelStatus.BEST): - """Use tensor dict to update model weights. - - This method updates the model weights using the provided tensor - dictionary. If the tensor dictionary is empty, it logs a warning and - returns the current model. Otherwise, it rebuilds the model with the - new weights and updates the current model status. - - Args: - tensor_dict (dict): A dictionary containing tensor names as keys - and tensor values as values. - upcoming_model_status (ModelStatus, optional): The upcoming status - of the model. Defaults to ModelStatus.BEST. - - Returns: - The updated model. - """ - if len(tensor_dict) == 0: - warning_msg = ( - "No tensors received from director\n" - "Possible reasons:\n" - "\t1. Aggregated model is not ready\n" - "\t2. Experiment data removed from director" - ) - - if upcoming_model_status == ModelStatus.BEST and not self.is_validate_task_exist: - warning_msg += "\n\t3. No validation tasks are provided" - - warning_msg += f"\nReturn {self.current_model_status} model" - - self.logger.warning(warning_msg) - - else: - self.task_runner_stub.rebuild_model(tensor_dict, validation=True, device="cpu") - self.current_model_status = upcoming_model_status - - return deepcopy(self.task_runner_stub.model) - - def stream_metrics(self, tensorboard_logs: bool = True) -> None: - """Stream metrics.""" - if not self._assert_experiment_submitted(): - return - for metric_message_dict in self.federation.dir_client.stream_metrics(self.experiment_name): - self.logger.metric( - f"Round {metric_message_dict['round']}, " - f"collaborator {metric_message_dict['metric_origin']} " - f"{metric_message_dict['task_name']} result " - f"{metric_message_dict['metric_name']}:\t{metric_message_dict['metric_value']:f}" - ) - - if tensorboard_logs: - self.write_tensorboard_metric(metric_message_dict) - - def write_tensorboard_metric(self, metric: dict) -> None: - """Write metric callback.""" - if not self.summary_writer: - from tensorboardX import SummaryWriter - - self.summary_writer = SummaryWriter(f"./logs/{self.experiment_name}", flush_secs=5) - - self.summary_writer.add_scalar( - f"{metric['metric_origin']}/{metric['task_name']}/{metric['metric_name']}", - metric["metric_value"], - metric["round"], - ) - - def remove_experiment_data(self): - """Remove experiment data.""" - if not self._assert_experiment_submitted(): - return - log_message = "Removing experiment data " - if self.federation.dir_client.remove_experiment_data(name=self.experiment_name): - log_message += "succeed." - self.experiment_submitted = False - else: - log_message += "failed." - - self.logger.info(log_message) - - def prepare_workspace_distribution( - self, - model_provider, - task_keeper, - data_loader, - task_assigner, - pip_install_options: Tuple[str] = (), - ): - """Prepare an archive from a user workspace. - - This method serializes interface objects and saves them to disk, - dumps the prepared plan, prepares a requirements file to restore - the Python environment, and compresses the workspace to restore it on - a collaborator. - - Args: - model_provider: The model provider object. - task_keeper: The task keeper object. - data_loader: The data loader object. - task_assigner: The task assigner object. - pip_install_options (tuple, optional): A tuple of options for pip - install. Defaults to an empty tuple. - - Returns: - None - """ - # Save serialized python objects to disc - self._serialize_interface_objects(model_provider, task_keeper, data_loader, task_assigner) - # Save the prepared plan - Plan.dump(Path(f"./plan/{self.plan.name}"), self.plan.config, freeze=False) - - # PACK the WORKSPACE! - # Prepare requirements file to restore python env - dump_requirements_file(keep_original_prefixes=True, prefixes=pip_install_options) - - # Compress te workspace to restore it on collaborator - self.arch_path = self._pack_the_workspace() - - def start( - self, - *, - model_provider, - task_keeper, - data_loader, - rounds_to_train: int, - task_assigner=None, - override_config: dict = None, - delta_updates: bool = False, - opt_treatment: str = "RESET", - device_assignment_policy: str = "CPU_ONLY", - pip_install_options: Tuple[str] = (), - ) -> None: - """ - Prepare workspace distribution and send to Director. - - A successful call of this function will result in sending the - experiment workspace to the Director service and experiment start. - - Args: - model_provider: Model Interface instance. - task_keeper: Task Interface instance. - data_loader: Data Interface instance. - rounds_to_train (int): Required number of training rounds for the - experiment. - task_assigner (optional): Task assigner instance. Defaults to None. - override_config (dict, optional): Configuration to override the - default settings. Defaults to None. - delta_updates (bool, optional): Flag to indicate if delta updates - should be sent. Defaults to False. - opt_treatment (str, optional): Optimizer state treatment policy. - Defaults to 'RESET'. - Valid options: 'RESET' - reinitialize optimizer for every - round, - 'CONTINUE_LOCAL' - keep local optimizer state, - 'CONTINUE_GLOBAL' - aggregate optimizer state. - device_assignment_policy (str, optional): Device assignment policy. - Defaults to 'CPU_ONLY'. - Valid options: 'CPU_ONLY' - device parameter passed to tasks - will always be 'cpu', - 'CUDA_PREFERRED' - enable passing CUDA device identifiers to - tasks by collaborators, works with cuda-device-monitor plugin - equipped Envoys. - pip_install_options (Tuple[str], optional): Options for the remote - `pip install` calls. Defaults to (). - example: ('-f some.website', '--no-index') - """ - if not task_assigner: - task_assigner = self.define_task_assigner(task_keeper, rounds_to_train) - - self._prepare_plan( - model_provider, - data_loader, - rounds_to_train, - delta_updates=delta_updates, - opt_treatment=opt_treatment, - device_assignment_policy=device_assignment_policy, - override_config=override_config, - model_interface_file="model_obj.pkl", - tasks_interface_file="tasks_obj.pkl", - dataloader_interface_file="loader_obj.pkl", - ) - - self.prepare_workspace_distribution( - model_provider, - task_keeper, - data_loader, - task_assigner, - pip_install_options, - ) - - self.logger.info("Starting experiment!") - self.plan.resolve() - initial_tensor_dict = self._get_initial_tensor_dict(model_provider) - try: - response = self.federation.dir_client.set_new_experiment( - name=self.experiment_name, - col_names=self.plan.authorized_cols, - arch_path=self.arch_path, - initial_tensor_dict=initial_tensor_dict, - ) - finally: - self.remove_workspace_archive() - - if response.accepted: - self.logger.info("Experiment was submitted to the director!") - self.experiment_submitted = True - else: - self.logger.info("Experiment could not be submitted to the director.") - - def define_task_assigner(self, task_keeper, rounds_to_train): # noqa: C901 - """Define task assigner by registered tasks. - - This method defines a task assigner based on the registered tasks. - It checks if there are 'train' and 'validate' tasks among the - registered tasks and defines the task assigner accordingly. If there - are both 'train' and 'validate' tasks, the task assigner assigns these - tasks to each collaborator. If there are only 'validate' tasks, the - task assigner assigns only these tasks to each collaborator. - If there are no 'train' or 'validate' tasks, an exception is raised. - - Args: - task_keeper: The task keeper object that holds the registered - tasks. - rounds_to_train (int): The number of rounds to train. - - Returns: - assigner: A function that assigns tasks to each collaborator. - - Raises: - Exception: If there are no 'train' tasks and rounds_to_train is - not 1. - Exception: If there are no 'validate' tasks. - Exception: If there are no 'train' or 'validate' tasks. - """ - tasks = task_keeper.get_registered_tasks() - is_train_task_exist = False - self.is_validate_task_exist = False - for task in tasks.values(): - if task.task_type == "train": - is_train_task_exist = True - if task.task_type == "validate": - self.is_validate_task_exist = True - - if not is_train_task_exist and rounds_to_train != 1: - # Since we have only validation tasks, we do not have to train it multiple times - raise Exception( - "Variable rounds_to_train must be equal 1, because only validation tasks were given" - ) - if is_train_task_exist and self.is_validate_task_exist: - - def assigner(collaborators, round_number, **kwargs): - tasks_by_collaborator = {} - for collaborator in collaborators: - tasks_by_collaborator[collaborator] = [ - tasks["train"], - tasks["locally_tuned_model_validate"], - tasks["aggregated_model_validate"], - ] - return tasks_by_collaborator - - return assigner - - elif not is_train_task_exist and self.is_validate_task_exist: - - def assigner(collaborators, round_number, **kwargs): - tasks_by_collaborator = {} - for collaborator in collaborators: - tasks_by_collaborator[collaborator] = [ - tasks["aggregated_model_validate"], - ] - return tasks_by_collaborator - - return assigner - - elif is_train_task_exist and not self.is_validate_task_exist: - raise Exception("You should define validate task!") - else: - raise Exception("You should define train and validate tasks!") - - def restore_experiment_state(self, model_provider): - """Restores the state of an accepted experiment object. - - This method restores the state of an accepted experiment object by - getting the core task runner from the plan and setting the current - model status to RESTORED. It also sets the experiment_submitted - attribute to True. - - Args: - model_provider: The provider of the model used in the experiment. - - """ - self.task_runner_stub = self.plan.get_core_task_runner(model_provider=model_provider) - self.current_model_status = ModelStatus.RESTORED - self.experiment_submitted = True - - @staticmethod - def _pack_the_workspace(): - """Packing the archive.""" - - archive_type = "zip" - archive_name = basename(getcwd()) - - tmp_dir = "temp_" + archive_name - makedirs(tmp_dir, exist_ok=True) - - ignore = ignore_patterns( - "__pycache__", - "data", - "cert", - tmp_dir, - "*.crt", - "*.key", - "*.csr", - "*.srl", - "*.pem", - "*.pbuf", - "*zip", - ) - - copytree("./", tmp_dir + "/workspace", ignore=ignore) - - arch_path = make_archive(archive_name, archive_type, tmp_dir + "/workspace") - - rmtree(tmp_dir) - - return arch_path - - def remove_workspace_archive(self): - """Remove the workspace archive.""" - os.remove(self.arch_path) - del self.arch_path - - def _get_initial_tensor_dict(self, model_provider): - """Extracts initial weights from the model. - - This method extracts the initial weights from the model by getting the - core task runner from the plan and setting the current model status to - INITIAL. It then splits the tensor dictionary for holdouts and returns - the tensor dictionary. - - Args: - model_provider: The provider of the model used in the experiment. - - Returns: - dict: The tensor dictionary. - """ - self.task_runner_stub = self.plan.get_core_task_runner(model_provider=model_provider) - self.current_model_status = ModelStatus.INITIAL - tensor_dict, _ = split_tensor_dict_for_holdouts( - self.logger, - self.task_runner_stub.get_tensor_dict(False), - **self.task_runner_stub.tensor_dict_split_fn_kwargs, - ) - return tensor_dict - - def _prepare_plan( - self, - model_provider, - data_loader, - rounds_to_train, - delta_updates, - opt_treatment, - device_assignment_policy, - override_config=None, - model_interface_file="model_obj.pkl", - tasks_interface_file="tasks_obj.pkl", - dataloader_interface_file="loader_obj.pkl", - aggregation_function_interface_file="aggregation_function_obj.pkl", - task_assigner_file="task_assigner_obj.pkl", - ): - """ - Fills the plan.yaml file using user-provided settings. - - It sets up the network, aggregator, collaborator, data loader, task - runner, and API layer according to the user's specifications. - - Args: - model_provider: The provider of the model used in the experiment. - data_loader: The data loader to be used in the experiment. - rounds_to_train (int): The number of rounds to train. - delta_updates (bool): Whether to use delta updates. - opt_treatment (str): The optimization treatment to be used. - device_assignment_policy (str): The device assignment policy to be - used. - override_config (dict, optional): The configuration to override - the default settings. - model_interface_file (str, optional): The file for the model - interface. Defaults to 'model_obj.pkl'. - tasks_interface_file (str, optional): The file for the tasks - interface. Defaults to 'tasks_obj.pkl'. - dataloader_interface_file (str, optional): The file for the data - loader interface. Defaults to 'loader_obj.pkl'. - aggregation_function_interface_file (str, optional): The file for - the aggregation function interface. Defaults to - 'aggregation_function_obj.pkl'. - task_assigner_file (str, optional): The file for the task assigner. - Defaults to 'task_assigner_obj.pkl'. - """ - - # Seems like we still need to fill authorized_cols list - # So aggregator know when to start sending tasks - # We also could change the aggregator logic so it will send tasks to - # aggregator as soon as it connects. This change should be a part of a - # bigger PR brining in fault tolerance changes - - shard_registry = self.federation.get_shard_registry() - self.plan.authorized_cols = [ - name for name, info in shard_registry.items() if info["is_online"] - ] - # Network part of the plan - # We keep in mind that an aggregator FQND will be the same as the - # directors FQDN - # We just choose a port randomly from plan hash - director_fqdn = self.federation.director_node_fqdn.split(":")[0] # We drop the port - self.plan.config["network"]["settings"]["agg_addr"] = director_fqdn - self.plan.config["network"]["settings"]["use_tls"] = self.federation.tls - - # Aggregator part of the plan - self.plan.config["aggregator"]["settings"]["rounds_to_train"] = rounds_to_train - - # Collaborator part - self.plan.config["collaborator"]["settings"]["delta_updates"] = delta_updates - self.plan.config["collaborator"]["settings"]["opt_treatment"] = opt_treatment - self.plan.config["collaborator"]["settings"]["device_assignment_policy"] = ( - device_assignment_policy - ) - - # DataLoader part - for setting, value in data_loader.kwargs.items(): - self.plan.config["data_loader"]["settings"][setting] = value - - # TaskRunner framework plugin - # ['required_plugin_components'] should be already in the default plan with all the fields - # filled with the default values - self.plan.config["task_runner"]["required_plugin_components"] = { - "framework_adapters": model_provider.framework_plugin - } - - # API layer - self.plan.config["api_layer"] = { - "required_plugin_components": {"serializer_plugin": self.serializer_plugin}, - "settings": { - "model_interface_file": model_interface_file, - "tasks_interface_file": tasks_interface_file, - "dataloader_interface_file": dataloader_interface_file, - "aggregation_function_interface_file": aggregation_function_interface_file, - "task_assigner_file": task_assigner_file, - }, - } - - if override_config: - self.plan = update_plan(override_config, plan=self.plan, resolve=False) - - def _serialize_interface_objects(self, model_provider, task_keeper, data_loader, task_assigner): - """ - Save python objects to be restored on collaborators. - - This method serializes the provided python objects and saves them for - later use. The objects are serialized using the serializer plugin - specified in the plan configuration. - - Args: - model_provider: The ModelInterface instance to be serialized. - task_keeper: The TaskKeeper instance to be serialized. - data_loader: The DataInterface instance to be serialized. - task_assigner: The task assigner to be serialized. - """ - serializer = self.plan.build( - self.plan.config["api_layer"]["required_plugin_components"]["serializer_plugin"], - {}, - ) - framework_adapter = Plan.build(model_provider.framework_plugin, {}) - # Model provider serialization may need preprocessing steps - framework_adapter.serialization_setup() - - obj_dict = { - "model_interface_file": model_provider, - "tasks_interface_file": task_keeper, - "dataloader_interface_file": data_loader, - "aggregation_function_interface_file": task_keeper.aggregation_functions, - "task_assigner_file": task_assigner, - } - - for filename, object_ in obj_dict.items(): - serializer.serialize(object_, self.plan.config["api_layer"]["settings"][filename]) - - -class TaskKeeper: - """Task keeper class. - - This class is responsible for managing tasks in a federated learning - experiment. It keeps track of registered tasks, their settings, and - aggregation functions. - - Task should accept the following entities that exist on collaborator nodes: - 1. model - will be rebuilt with relevant weights for every task by - `TaskRunner`. - 2. data_loader - data loader equipped with `repository adapter` that - provides local data. - 3. device - a device to be used on collaborator machines. - 4. optimizer (optional). - - Task returns a dictionary {metric name: metric value for this task} - - Attributes: - task_registry (dict): A dictionary mapping task names to callable - functions. - task_contract (dict): A dictionary mapping task names to their - contract. - task_settings (dict): A dictionary mapping task names to their - settings. - aggregation_functions (dict): A dictionary mapping task names to their - aggregation functions. - _tasks (dict): A dictionary mapping task aliases to Task objects. - """ - - def __init__(self) -> None: - """Initialize task registry.""" - # Mapping 'task name' -> callable - self.task_registry = {} - # Mapping 'task name' -> arguments - self.task_contract = {} - # Mapping 'task name' -> arguments - self.task_settings = defaultdict(dict) - # Mapping 'task name' -> callable - self.aggregation_functions = defaultdict(WeightedAverage) - # Mapping 'task_alias' -> Task - self._tasks: Dict[str, Task] = {} - - def register_fl_task(self, model, data_loader, device, optimizer=None, round_num=None): - """Register FL tasks. - - The task contract should be set up by providing variable names: - [model, data_loader, device] - necessarily - and optimizer - optionally - - All tasks should accept contract entities to be run on collaborator - node. - Moreover we ask users return dict{'metric':value} in every task - ` - TI = TaskInterface() - - task_settings = { - 'batch_size': 32, - 'some_arg': 228, - } - @TI.add_kwargs(**task_settings) - @TI.register_fl_task(model='my_model', data_loader='train_loader', - device='device', optimizer='my_Adam_opt') - def foo_task(my_model, train_loader, my_Adam_opt, device, batch_size, - some_arg=356) - ... - return {'metric_name': metric, 'metric_name_2': metric_2,} - ` - - Args: - model: The model to be used in the task. - data_loader: The data loader to be used in the task. - device: The device to be used in the task. - optimizer (optional): The optimizer to be used in the task. - Defaults to None. - round_num (optional): The round number for the task. Defaults - to None. - """ - - # The highest level wrapper for allowing arguments for the decorator - def decorator_with_args(training_method): - """A high-level wrapper that allows arguments for the decorator. - - This function is a decorator that wraps a training method. It - saves the task and the contract for later serialization. It also - defines tasks based on whether an optimizer is provided. - - Args: - training_method: The training method to be wrapped. - - Returns: - function: The wrapped training method. - """ - - # We could pass hooks to the decorator - # @functools.wraps(training_method) - - def wrapper_decorator(**task_keywords): - metric_dict = training_method(**task_keywords) - return metric_dict - - # Saving the task and the contract for later serialization - function_name = training_method.__name__ - self.task_registry[function_name] = wrapper_decorator - contract = { - "model": model, - "data_loader": data_loader, - "device": device, - "optimizer": optimizer, - "round_num": round_num, - } - self.task_contract[function_name] = contract - # define tasks - if optimizer: - self._tasks["train"] = TrainTask( - name="train", - function_name=function_name, - ) - else: - self._tasks["locally_tuned_model_validate"] = ValidateTask( - name="locally_tuned_model_validate", - function_name=function_name, - apply_local=True, - ) - self._tasks["aggregated_model_validate"] = ValidateTask( - name="aggregated_model_validate", - function_name=function_name, - ) - # We do not alter user environment - - return training_method - - return decorator_with_args - - def add_kwargs(self, **task_kwargs): - """Register tasks settings. - - Warning! We do not actually need to register additional kwargs, - we ust serialize them. - This one is a decorator because we need task name and - to be consistent with the main registering method - - Args: - **task_kwargs: Keyword arguments for the task settings. - """ - - # The highest level wrapper for allowing arguments for the decorator - def decorator_with_args(training_method): - # Saving the task's settings to be written in plan - self.task_settings[training_method.__name__] = task_kwargs - - return training_method - - return decorator_with_args - - def set_aggregation_function(self, aggregation_function: AggregationFunction): - """Set aggregation function for the task. - - To be serialized and sent to aggregator node. - - There is no support for aggregation functions - containing logic from workspace-related libraries - that are not present on director yet. - - You might need to override default FedAvg aggregation with built-in - aggregation types: - - openfl.interface.aggregation_functions.GeometricMedian - - openfl.interface.aggregation_functions.Median - or define your own AggregationFunction subclass. - See more details on `Overriding the aggregation function`_ - documentation page. - .. _Overriding the aggregation function: - https://openfl.readthedocs.io/en/latest/overriding_agg_fn.html - - Args: - aggregation_function: The aggregation function to be used for - the task. - """ - - def decorator_with_args(training_method): - if not isinstance(aggregation_function, AggregationFunction): - raise Exception( - "aggregation_function must implement AggregationFunction interface." - ) - self.aggregation_functions[training_method.__name__] = aggregation_function - return training_method - - return decorator_with_args - - def get_registered_tasks(self) -> Dict[str, Task]: - """Return registered tasks. - - Returns: - A dictionary mapping task names to Task objects. - """ - return self._tasks - - -# Backward compatibility -TaskInterface = TaskKeeper - - -class ModelInterface: - """Registers model graph and optimizer. - - This class is responsible for managing the model and optimizer in a - federated learning experiment. - - To be serialized and sent to collaborator nodes. - This is the place to determine correct framework adapter - as they are needed to fill the model graph with trained tensors. - - There is no support for several models / optimizers yet. - - Attributes: - model: The model to be used in the experiment. - optimizer: The optimizer to be used in the experiment. - framework_plugin: The framework plugin to be used in the experiment. - """ - - def __init__(self, model, optimizer, framework_plugin) -> None: - """Initialize model keeper. - - Tensors in provided graphs will be used for - initialization of the global model. - - Args: - model (Union[Path, str]) : The model to be used in the experiment. - optimizer (Union[tuple, optimizer]) : The optimizer to be used in - the experiment. - framework_plugin: The framework plugin to be used in the - experiment. - """ - self.model = model - self.optimizer = optimizer - self.framework_plugin = framework_plugin - - def provide_model(self): - """Retrieve model. - - Returns: - The model used in the experiment. - """ - return self.model - - def provide_optimizer(self): - """Retrieve optimizer. - - Returns: - The optimizer used in the experiment. - """ - return self.optimizer - - -class DataInterface: - """The class to define dataloaders. - - This class is responsible for managing the data loaders in a federated - learning experiment. - - In the future users will have to adapt `unified data interface hook` - in their dataloaders. - For now, we can provide `data_path` variable on every collaborator node - at initialization time for dataloader customization. - - Attributes: - kwargs (dict): The keyword arguments for the data loaders. - shard_descriptor: The shard descriptor for the dataloader. - """ - - def __init__(self, **kwargs): - """Initialize DataLoader.""" - self.kwargs = kwargs - - @property - def shard_descriptor(self): - """Return shard descriptor. - - Returns: - The shard descriptor for the data loaders. - """ - return self._shard_descriptor - - @shard_descriptor.setter - def shard_descriptor(self, shard_descriptor): - """Describe per-collaborator procedures or sharding. - - This method will be called during a collaborator initialization. - Local shard_descriptor will be set by Envoy. - - Args: - shard_descriptor: The shard descriptor for the data loaders. - """ - self._shard_descriptor = shard_descriptor - raise NotImplementedError - - def get_train_loader(self, **kwargs): - """Output of this method will be provided to tasks with optimizer in - contract.""" - raise NotImplementedError - - def get_valid_loader(self, **kwargs): - """Output of this method will be provided to tasks without optimizer - in contract.""" - raise NotImplementedError - - def get_train_data_size(self): - """Information for aggregation.""" - raise NotImplementedError - - def get_valid_data_size(self): - """Information for aggregation.""" - raise NotImplementedError diff --git a/openfl/interface/interactive_api/federation.py b/openfl/interface/interactive_api/federation.py deleted file mode 100644 index 11d220c718..0000000000 --- a/openfl/interface/interactive_api/federation.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Federation API module.""" - -from openfl.interface.interactive_api.shard_descriptor import DummyShardDescriptor -from openfl.transport.grpc.director_client import DirectorClient -from openfl.utilities.utils import getfqdn_env - - -class Federation: - """Federation class. - - Manages information about collaborator settings, local data, and network settings. - - The Federation class is used to maintain information about collaborator-related settings, - their local data, and network settings to enable communication within the federation. - - Attributes: - director_node_fqdn (str): The fully qualified domain name (FQDN) of the director node. - tls (bool): A boolean indicating whether mTLS (mutual Transport Layer Security) is enabled. - cert_chain (str): The path to the certificate chain to the Certificate Authority (CA). - api_cert (str): The path to the API certificate. - api_private_key (str): The path to the API private key. - dir_client (DirectorClient): An instance of the DirectorClient class. - sample_shape (tuple): The shape of the samples in the dataset. - target_shape (tuple): The shape of the targets in the dataset. - """ - - def __init__( - self, - client_id=None, - director_node_fqdn=None, - director_port=None, - tls=True, - cert_chain=None, - api_cert=None, - api_private_key=None, - ) -> None: - """ - Initialize federation. - - Federation API class should be initialized with the Director node FQDN - and encryption settings. One may disable mTLS in trusted environments or - provide paths to a certificate chain to CA, API certificate and - pricate key to enable mTLS. - - Args: - client_id (str): Name of created Frontend API instance. - The same name user certify. - director_node_fqdn (str): Address and port a director's service is running on. - User passes here an address with a port. - director_port (int): Port a director's service is running on. - tls (bool): Enable mTLS. - cert_chain (str): Path to a certificate chain to CA. - api_cert (str): Path to API certificate. - api_private_key (str): Path to API private key. - """ - if director_node_fqdn is None: - self.director_node_fqdn = getfqdn_env() - else: - self.director_node_fqdn = director_node_fqdn - - self.tls = tls - - self.cert_chain = cert_chain - self.api_cert = api_cert - self.api_private_key = api_private_key - - # Create Director client - self.dir_client = DirectorClient( - client_id=client_id, - director_host=director_node_fqdn, - director_port=director_port, - tls=tls, - root_certificate=cert_chain, - private_key=api_private_key, - certificate=api_cert, - ) - - # Request sample and target shapes from Director. - # This is an internal method for finding out dataset properties in a Federation. - self.sample_shape, self.target_shape = self.dir_client.get_dataset_info() - - def get_dummy_shard_descriptor(self, size): - """Return a dummy shard descriptor. - - Args: - size (int): Size of the shard descriptor. - - Returns: - DummyShardDescriptor: A dummy shard descriptor. - """ - return DummyShardDescriptor(self.sample_shape, self.target_shape, size) - - def get_shard_registry(self): - """Return a shard registry. - - Returns: - list: A list of envoys. - """ - return self.dir_client.get_envoys() diff --git a/openfl/interface/interactive_api/shard_descriptor.py b/openfl/interface/interactive_api/shard_descriptor.py deleted file mode 100644 index 816a50e4be..0000000000 --- a/openfl/interface/interactive_api/shard_descriptor.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Shard descriptor.""" - -from typing import Iterable, List - -import numpy as np - - -class ShardDataset: - """Shard dataset class.""" - - def __len__(self) -> int: - """Return the len of the shard dataset.""" - raise NotImplementedError - - def __getitem__(self, index: int): - """Return an item by the index.""" - raise NotImplementedError - - -class ShardDescriptor: - """Shard descriptor class.""" - - def get_dataset(self, dataset_type: str) -> ShardDataset: - """Return a shard dataset by type. - - Args: - dataset_type (str): The type of the dataset. - - Returns: - ShardDataset: The shard dataset. - """ - raise NotImplementedError - - @property - def sample_shape(self) -> List[int]: - """Return the sample shape info. - - Returns: - List[int]: The sample shape. - """ - raise NotImplementedError - - @property - def target_shape(self) -> List[int]: - """Return the target shape info. - - Returns: - List[int]: The target shape. - """ - raise NotImplementedError - - @property - def dataset_description(self) -> str: - """Return the dataset description. - - Returns: - str: The dataset description. - """ - return "" - - -class DummyShardDataset(ShardDataset): - """Dummy shard dataset class.""" - - def __init__(self, *, size: int, sample_shape: List[int], target_shape: List[int]): - """ - Initialize DummyShardDataset. - - Args: - size (int): The size of the dataset. - sample_shape (List[int]): The shape of the samples. - target_shape (List[int]): The shape of the targets. - """ - self.size = size - self.samples = np.random.randint(0, 255, (self.size, *sample_shape), np.uint8) - self.targets = np.random.randint(0, 255, (self.size, *target_shape), np.uint8) - - def __len__(self) -> int: - """Return the len of the dataset. - - Returns: - int: The length of the dataset. - """ - return self.size - - def __getitem__(self, index: int): - """Return a item by the index. - - Args: - index (int): The index of the item. - - Returns: - tuple: The sample and target at the given index. - """ - return self.samples[index], self.targets[index] - - -class DummyShardDescriptor(ShardDescriptor): - """Dummy shard descriptor class.""" - - def __init__( - self, - sample_shape: Iterable[str], - target_shape: Iterable[str], - size: int, - ) -> None: - """ - Initialize DummyShardDescriptor. - - Args: - sample_shape (Iterable[str]): The shape of the samples. - target_shape (Iterable[str]): The shape of the targets. - size (int): The size of the dataset. - """ - self._sample_shape = [int(dim) for dim in sample_shape] - self._target_shape = [int(dim) for dim in target_shape] - self.size = size - - def get_dataset(self, dataset_type: str) -> ShardDataset: - """ - Return a shard dataset by type. - - Args: - dataset_type (str): The type of the dataset. - - Returns: - ShardDataset: The shard dataset. - """ - return DummyShardDataset( - size=self.size, - sample_shape=self._sample_shape, - target_shape=self._target_shape, - ) - - @property - def sample_shape(self) -> List[int]: - """Return the sample shape info. - - Returns: - List[int]: The sample shape. - """ - return self._sample_shape - - @property - def target_shape(self) -> List[int]: - """Return the target shape info. - - Returns: - List[int]: The target shape. - """ - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description. - - Returns: - str: The dataset description. - """ - return "Dummy shard descriptor" diff --git a/openfl/native/__init__.py b/openfl/native/__init__.py deleted file mode 100644 index 230ded708d..0000000000 --- a/openfl/native/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""openfl.native package.""" - -from openfl.native.native import * # NOQA diff --git a/openfl/native/fastestimator.py b/openfl/native/fastestimator.py deleted file mode 100644 index d91b1ebc01..0000000000 --- a/openfl/native/fastestimator.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""FederatedFastEstimator module.""" - -import os -from logging import getLogger -from pathlib import Path -from sys import path - -import fastestimator as fe -from fastestimator.trace.io.best_model_saver import BestModelSaver - -import openfl.native as fx -from openfl.federated import Plan -from openfl.federated.data import FastEstimatorDataLoader -from openfl.federated.task import FastEstimatorTaskRunner -from openfl.protocols import utils -from openfl.utilities.split import split_tensor_dict_for_holdouts - - -class FederatedFastEstimator: - """A wrapper for fastestimator.estimator that allows running in federated - mode. - - Attributes: - estimator: The FastEstimator to be used. - logger: A logger to record events. - rounds: The number of rounds to train. - """ - - def __init__(self, estimator, override_config: dict = None, **kwargs): - """Initializes a new instance of the FederatedFastEstimator class. - - Args: - estimator: The FastEstimator to be used. - override_config (dict, optional): A dictionary to override the - default configuration. Defaults to None. - **kwargs: Additional keyword arguments. - """ - self.estimator = estimator - self.logger = getLogger(__name__) - fx.init(**kwargs) - if override_config: - fx.update_plan(override_config) - - def fit(self): # noqa: C901 - """Runs the estimator in federated mode.""" - file = Path(__file__).resolve() - # interface root, containing command modules - root = file.parent.resolve() - work = Path.cwd().resolve() - path.append(str(root)) - path.insert(0, str(work)) - - # TODO: Fix this implementation. The full plan parsing is reused here, - # but the model and data will be overwritten based on - # user specifications - plan_config = Path(fx.WORKSPACE_PREFIX) / "plan" / "plan.yaml" - cols_config = Path(fx.WORKSPACE_PREFIX) / "plan" / "cols.yaml" - data_config = Path(fx.WORKSPACE_PREFIX) / "plan" / "data.yaml" - - plan = Plan.parse( - plan_config_path=plan_config, - cols_config_path=cols_config, - data_config_path=data_config, - ) - - self.rounds = plan.config["aggregator"]["settings"]["rounds_to_train"] - data_loader = FastEstimatorDataLoader(self.estimator.pipeline) - runner = FastEstimatorTaskRunner(self.estimator, data_loader=data_loader) - # Overwrite plan values - tensor_pipe = plan.get_tensor_pipe() - # Initialize model weights - init_state_path = plan.config["aggregator"]["settings"]["init_state_path"] - tensor_dict, holdout_params = split_tensor_dict_for_holdouts( - self.logger, runner.get_tensor_dict(False) - ) - - model_snap = utils.construct_model_proto( - tensor_dict=tensor_dict, round_number=0, tensor_pipe=tensor_pipe - ) - - self.logger.info(f"Creating Initial Weights File 🠆 {init_state_path}") - - utils.dump_proto(model_proto=model_snap, fpath=init_state_path) - - self.logger.info("Starting Experiment...") - - aggregator = plan.get_aggregator() - - model_states = dict.fromkeys(plan.authorized_cols, None) - runners = {} - save_dir = {} - data_path = 1 - for col in plan.authorized_cols: - data = self.estimator.pipeline.data - train_data, eval_data, test_data = split_data( - data["train"], - data["eval"], - data["test"], - data_path, - len(plan.authorized_cols), - ) - pipeline_kwargs = {} - for k, v in self.estimator.pipeline.__dict__.items(): - if k in [ - "batch_size", - "ops", - "num_process", - "drop_last", - "pad_value", - "collate_fn", - ]: - pipeline_kwargs[k] = v - pipeline_kwargs.update( - { - "train_data": train_data, - "eval_data": eval_data, - "test_data": test_data, - } - ) - pipeline = fe.Pipeline(**pipeline_kwargs) - - data_loader = FastEstimatorDataLoader(pipeline) - self.estimator.system.pipeline = pipeline - - runners[col] = FastEstimatorTaskRunner( - estimator=self.estimator, data_loader=data_loader - ) - runners[col].set_optimizer_treatment("CONTINUE_LOCAL") - - for trace in runners[col].estimator.system.traces: - if isinstance(trace, BestModelSaver): - save_dir_path = f"{trace.save_dir}/{col}" - os.makedirs(save_dir_path, exist_ok=True) - save_dir[col] = save_dir_path - - data_path += 1 - - # Create the collaborators - collaborators = { - collaborator: fx.create_collaborator( - plan, collaborator, runners[collaborator], aggregator - ) - for collaborator in plan.authorized_cols - } - - model = None - for round_num in range(self.rounds): - for col in plan.authorized_cols: - collaborator = collaborators[col] - - if round_num != 0: - # For FastEstimator Jupyter notebook, models must be - # saved in different directories (i.e. path must be - # reset here) - - runners[col].estimator.system.load_state(f"save/{col}_state") - runners[col].rebuild_model(round_num, model_states[col]) - - # Reset the save directory if BestModelSaver is present - # in traces - for trace in runners[col].estimator.system.traces: - if isinstance(trace, BestModelSaver): - trace.save_dir = save_dir[col] - - collaborator.run_simulation() - - model_states[col] = runners[col].get_tensor_dict(with_opt_vars=True) - model = runners[col].model - runners[col].estimator.system.save_state(f"save/{col}_state") - - # TODO This will return the model from the last collaborator, - # NOT the final aggregated model (though they should be similar). - # There should be a method added to the aggregator that will load - # the best model from disk and return it - return model - - -def split_data(train, eva, test, rank, collaborator_count): - """Split data into N parts, where N is the collaborator count. - - Args: - train : The training data. - eva : The evaluation data. - test : The testing data. - rank (int): The rank of the current collaborator. - collaborator_count (int): The total number of collaborators. - - Returns: - tuple: The training, evaluation, and testing data for the current - collaborator. - """ - if collaborator_count == 1: - return train, eva, test - - fraction = [1.0 / float(collaborator_count)] - fraction *= collaborator_count - 1 - - # Expand the split list into individual parameters - train_split = train.split(*fraction) - eva_split = eva.split(*fraction) - test_split = test.split(*fraction) - - train = [train] - eva = [eva] - test = [test] - - if type(train_split) is not list: - train.append(train_split) - eva.append(eva_split) - test.append(test_split) - else: - # Combine all partitions into a single list - train = [train] + train_split - eva = [eva] + eva_split - test = [test] + test_split - - # Extract the right shard - train = train[rank - 1] - eva = eva[rank - 1] - test = test[rank - 1] - - return train, eva, test diff --git a/openfl/native/native.py b/openfl/native/native.py deleted file mode 100644 index f0ed3f3071..0000000000 --- a/openfl/native/native.py +++ /dev/null @@ -1,370 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""OpenFL Native functions module. - -This file defines openfl entrypoints to be used directly through python (not -CLI) -""" - -import json -import logging -import os -from copy import copy -from importlib import util -from logging import basicConfig, getLogger -from pathlib import Path -from sys import path - -import flatten_json -from rich.console import Console -from rich.logging import RichHandler - -import openfl.interface.aggregator as aggregator -import openfl.interface.collaborator as collaborator -import openfl.interface.workspace as workspace -from openfl.federated import Plan -from openfl.protocols import utils -from openfl.utilities import add_log_level -from openfl.utilities.split import split_tensor_dict_for_holdouts - -logger = getLogger(__name__) - -WORKSPACE_PREFIX = os.path.join(os.path.expanduser("~"), ".local", "workspace") - - -def setup_plan(log_level="CRITICAL"): - """ - Dump the plan with all defaults + overrides set. - - Args: - log_level (str, optional): The log level Whether to save the plan to - disk. - Defaults to 'CRITICAL'. - - Returns: - plan: Plan object. - """ - plan_config = "plan/plan.yaml" - cols_config = "plan/cols.yaml" - data_config = "plan/data.yaml" - - current_level = logging.root.level - getLogger().setLevel(log_level) - plan = Plan.parse( - plan_config_path=Path(plan_config), - cols_config_path=Path(cols_config), - data_config_path=Path(data_config), - resolve=False, - ) - getLogger().setLevel(current_level) - - return plan - - -def flatten(config, return_complete=False): - """ - Flatten nested config. - - Args: - config (dict): The configuration dictionary to flatten. - return_complete (bool, optional): Whether to return the complete - flattened config. Defaults to False. - - Returns: - flattened_config (dict): The flattened configuration dictionary. - """ - flattened_config = flatten_json.flatten(config, ".") - if not return_complete: - keys_to_remove = [k for k, v in flattened_config.items() if ("defaults" in k or v is None)] - else: - keys_to_remove = [k for k, v in flattened_config.items() if v is None] - for k in keys_to_remove: - del flattened_config[k] - - return flattened_config - - -def update_plan(override_config, plan=None, resolve=True): # noqa: C901 - """Updates the plan with the provided override and saves it to disk. - - For a list of available override options, call `fx.get_plan()` - - Args: - override_config (dict): A dictionary of values to override in the plan. - plan (Plan, optional): The plan to update. If None, a new plan is set - up. Defaults to None. - resolve (bool, optional): Whether to resolve the plan. Defaults to - True. - - Returns: - plan (object): The updated plan. - """ - if plan is None: - plan = setup_plan() - flat_plan_config = flatten(plan.config, return_complete=True) - - org_list_keys_with_count = {} - for k in flat_plan_config: - k_split = k.rsplit(".", 1) - if k_split[1].isnumeric(): - if k_split[0] in org_list_keys_with_count: - org_list_keys_with_count[k_split[0]] += 1 - else: - org_list_keys_with_count[k_split[0]] = 1 - - for key, val in override_config.items(): - if key in org_list_keys_with_count: - # remove old list corresponding to this key entirely - for idx in range(org_list_keys_with_count[key]): - del flat_plan_config[f"{key}.{idx}"] - logger.info("Updating %s to %s... ", key, val) - elif key in flat_plan_config: - logger.info("Updating %s to %s... ", key, val) - else: - # TODO: We probably need to validate the new key somehow - logger.info( - "Did not find %s in config. Make sure it should exist. Creating...", - key, - ) - if type(val) is list: - for idx, v in enumerate(val): - flat_plan_config[f"{key}.{idx}"] = v - else: - flat_plan_config[key] = val - - plan.config = unflatten(flat_plan_config, ".") - if resolve: - plan.resolve() - return plan - - -def unflatten(config, separator="."): - """Unfolds `config` settings that have `separator` in their names. - - Args: - config (dict): The flattened configuration dictionary to unfold. - separator (str, optional): The separator used in the flattened config. - Defaults to '.'. - - Returns: - config (dict): The unfolded configuration dictionary. - """ - config = flatten_json.unflatten_list(config, separator) - return config - - -def setup_logging(level="INFO", log_file=None): - """Initializes logging settings. - - Args: - level (str, optional): The log level. Defaults to 'INFO'. - log_file (str, optional): The name of the file to log to. - If None, logs are not saved to a file. Defaults to None. - """ - # Setup logging - - if util.find_spec("tensorflow") is not None: - import tensorflow as tf # pylint: disable=import-outside-toplevel - - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - metric = 25 - add_log_level("METRIC", metric) - - if isinstance(level, str): - level = level.upper() - - handlers = [] - if log_file: - fh = logging.FileHandler(log_file) - formatter = logging.Formatter( - "%(asctime)s %(levelname)s %(message)s %(filename)s:%(lineno)d" - ) - fh.setFormatter(formatter) - handlers.append(fh) - - console = Console(width=160) - handlers.append(RichHandler(console=console)) - basicConfig(level=level, format="%(message)s", datefmt="[%X]", handlers=handlers) - - -def init( - workspace_template: str = "default", - log_level: str = "INFO", - log_file: str = None, - agg_fqdn: str = None, - col_names=None, -): - """ - Initialize the openfl package. - - It performs the following tasks: - - 1. Creates a workspace in ~/.local/workspace (Equivalent to `fx - workspace create --prefix ~/.local/workspace --template - $workspace_template) - 2. Setup certificate authority (equivalent to `fx workspace certify`) - 3. Setup aggregator PKI (equivalent to `fx aggregator - generate-cert-request` followed by `fx aggregator certify`) - 4. Setup list of collaborators (col_names) and their PKI. (Equivalent - to running `fx collaborator generate-cert-request` followed by `fx - collaborator certify` for each of the collaborators in col_names) - 5. Setup logging - - Args: - workspace_template (str): The template that should be used as the - basis for the experiment. Defaults to 'default'. - Other options include are any of the template names - [keras/mnist, torch/histology, torch/mnist]. - log_level (str): Log level for logging. METRIC level is available. - Defaults to 'INFO'. - log_file (str): Name of the file in which the log will be duplicated. - If None, logs are not saved to a file. Defaults to None. - agg_fqdn (str): The local node's fully qualified domain name (if it - can't be resolved automatically). Defaults to None. - col_names (list[str]): The names of the collaborators that will be - created. These collaborators will be set up to participate in the - experiment, but are not required to. Defaults to None. - - Returns: - None - """ - if col_names is None: - col_names = ["one", "two"] - workspace.create(WORKSPACE_PREFIX, workspace_template) - os.chdir(WORKSPACE_PREFIX) - workspace.certify() - aggregator.generate_cert_request(agg_fqdn) - aggregator.certify(agg_fqdn, silent=True) - data_path = 1 - for col_name in col_names: - collaborator.create(col_name, str(data_path), silent=True) - collaborator.generate_cert_request(col_name, silent=True, skip_package=True) - collaborator.certify(col_name, silent=True) - data_path += 1 - - setup_logging(level=log_level, log_file=log_file) - - -def get_collaborator(plan, name, model, aggregator): - """Create the collaborator. - - Using the same plan object to create multiple collaborators leads to - identical collaborator objects. This function can be removed once - collaborator generation is fixed in openfl/federated/plan/plan.py - - Args: - plan (Plan): The plan to use to create the collaborator. - name (str): The name of the collaborator. - model (Model): The model to use for the collaborator. - aggregator (Aggregator): The aggregator to use for the collaborator. - - Returns: - Collaborator: The created collaborator. - """ - plan = copy(plan) - - return plan.get_collaborator(name, task_runner=model, client=aggregator) - - -def run_experiment(collaborator_dict: dict, override_config: dict = None): - """Core function that executes the FL Plan. - - Args: - collaborator_dict (dict): A dictionary mapping collaborator names to - their federated models. - Example: {collaborator_name(str): FederatedModel} - This dictionary defines which collaborators will participate in - the experiment, as well as a reference to that collaborator's - federated model. - override_config (dict, optional): A dictionary of values to override - in the plan. Defaults to None. - Example: dict {flplan.key : flplan.value} - Override any of the plan parameters at runtime using this - dictionary. To get a list of the available options, execute - `fx.get_plan()` - - Returns: - model: Final Federated model. The model resulting from the federated - learning experiment - """ - - if override_config is None: - override_config = {} - - file = Path(__file__).resolve() - root = file.parent.resolve() # interface root, containing command modules - work = Path.cwd().resolve() - - path.append(str(root)) - path.insert(0, str(work)) - - # Update the plan if necessary - plan = update_plan(override_config) - # Overwrite plan values - plan.authorized_cols = list(collaborator_dict) - tensor_pipe = plan.get_tensor_pipe() - - # This must be set to the final index of the list (this is the last - # tensorflow session to get created) - plan.runner_ = list(collaborator_dict.values())[-1] - model = plan.runner_ - - # Initialize model weights - init_state_path = plan.config["aggregator"]["settings"]["init_state_path"] - rounds_to_train = plan.config["aggregator"]["settings"]["rounds_to_train"] - tensor_dict, holdout_params = split_tensor_dict_for_holdouts( - logger, model.get_tensor_dict(False) - ) - - model_snap = utils.construct_model_proto( - tensor_dict=tensor_dict, round_number=0, tensor_pipe=tensor_pipe - ) - - logger.info("Creating Initial Weights File 🠆 %s", init_state_path) - - utils.dump_proto(model_proto=model_snap, fpath=init_state_path) - - logger.info("Starting Experiment...") - - aggregator = plan.get_aggregator() - - # get the collaborators - collaborators = { - collaborator: get_collaborator( - plan, collaborator, collaborator_dict[collaborator], aggregator - ) - for collaborator in plan.authorized_cols - } - - for _ in range(rounds_to_train): - for col in plan.authorized_cols: - collaborator = collaborators[col] - collaborator.run_simulation() - - # Set the weights for the final model - model.rebuild_model(rounds_to_train - 1, aggregator.last_tensor_dict, validation=True) - return model - - -def get_plan(fl_plan=None, indent=4, sort_keys=True): - """Returns a string representation of the current Plan. - - Args: - fl_plan (Plan): The plan to get a string representation of. If None, a - new plan is set up. Defaults to None. - indent (int): The number of spaces to use for indentation in the - string representation. Defaults to 4. - sort_keys (bool): Whether to sort the keys in the string - representation. Defaults to True. - - Returns: - str: A string representation of the plan. - """ - if fl_plan is None: - plan = setup_plan() - else: - plan = fl_plan - flat_plan_config = flatten(plan.config) - return json.dumps(flat_plan_config, indent=indent, sort_keys=sort_keys) diff --git a/openfl/transport/__init__.py b/openfl/transport/__init__.py index 895ec38054..72bc7864c8 100644 --- a/openfl/transport/__init__.py +++ b/openfl/transport/__init__.py @@ -2,4 +2,4 @@ # SPDX-License-Identifier: Apache-2.0 -from openfl.transport.grpc import AggregatorGRPCClient, AggregatorGRPCServer, DirectorGRPCServer +from openfl.transport.grpc import AggregatorGRPCClient, AggregatorGRPCServer diff --git a/openfl/transport/grpc/__init__.py b/openfl/transport/grpc/__init__.py index 4f560a0a84..cd452bfe6c 100644 --- a/openfl/transport/grpc/__init__.py +++ b/openfl/transport/grpc/__init__.py @@ -4,8 +4,3 @@ from openfl.transport.grpc.aggregator_client import AggregatorGRPCClient from openfl.transport.grpc.aggregator_server import AggregatorGRPCServer -from openfl.transport.grpc.director_server import DirectorGRPCServer - - -class ShardNotFoundError(Exception): - """Indicates that director has no information about that shard.""" diff --git a/openfl/transport/grpc/director_client.py b/openfl/transport/grpc/director_client.py deleted file mode 100644 index 2acf3ca3e0..0000000000 --- a/openfl/transport/grpc/director_client.py +++ /dev/null @@ -1,522 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Director clients module.""" - -import logging -from datetime import datetime -from typing import List, Type - -import grpc - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.pipelines import NoCompressionPipeline -from openfl.protocols import director_pb2, director_pb2_grpc, interceptors -from openfl.protocols.utils import construct_model_proto, deconstruct_model_proto -from openfl.transport.grpc.director_server import CLIENT_ID_DEFAULT -from openfl.transport.grpc.exceptions import ShardNotFoundError -from openfl.transport.grpc.grpc_channel_options import channel_options - -logger = logging.getLogger(__name__) - - -class ShardDirectorClient: - """ - The internal director client class. - - This class communicates with the director to manage the shard's - participation in the federation. - - Attributes: - shard_name (str): The name of the shard. - stub (director_pb2_grpc.DirectorStub): The gRPC stub for communication - with the director. - """ - - def __init__( - self, - *, - director_host, - director_port, - shard_name, - tls=True, - root_certificate=None, - private_key=None, - certificate=None, - ) -> None: - """ - Initialize a shard director client object. - - Args: - director_host (str): The host of the director. - director_port (int): The port of the director. - shard_name (str): The name of the shard. - tls (bool): Whether to use TLS for the connection. - root_certificate (str): The path to the root certificate for the - TLS connection. - private_key (str): The path to the private key for the TLS - connection. - certificate (str): The path to the certificate for the TLS - connection. - """ - self.shard_name = shard_name - director_addr = f"{director_host}:{director_port}" - logger.info("Director address: %s", director_addr) - if not tls: - channel = grpc.insecure_channel(director_addr, options=channel_options) - else: - if not (root_certificate and private_key and certificate): - raise Exception("No certificates provided") - try: - with open(root_certificate, "rb") as f: - root_certificate_b = f.read() - with open(private_key, "rb") as f: - private_key_b = f.read() - with open(certificate, "rb") as f: - certificate_b = f.read() - except FileNotFoundError as exc: - raise Exception(f"Provided certificate file is not exist: {exc.filename}") - - credentials = grpc.ssl_channel_credentials( - root_certificates=root_certificate_b, - private_key=private_key_b, - certificate_chain=certificate_b, - ) - channel = grpc.secure_channel(director_addr, credentials, options=channel_options) - self.stub = director_pb2_grpc.DirectorStub(channel) - - def report_shard_info( - self, shard_descriptor: Type[ShardDescriptor], cuda_devices: tuple - ) -> bool: - """ - Report shard info to the director. - - Args: - shard_descriptor (Type[ShardDescriptor]): The descriptor of the - shard. - cuda_devices (tuple): The CUDA devices available on the shard. - - Returns: - acknowledgement (bool): Whether the report was accepted by the - director. - """ - logger.info("Sending %s shard info to director", self.shard_name) - # True considered as successful registration - shard_info = director_pb2.ShardInfo( - shard_description=shard_descriptor.dataset_description, - sample_shape=shard_descriptor.sample_shape, - target_shape=shard_descriptor.target_shape, - ) - - shard_info.node_info.name = self.shard_name - shard_info.node_info.cuda_devices.extend( - director_pb2.CudaDeviceInfo(index=cuda_device) for cuda_device in cuda_devices - ) - - request = director_pb2.UpdateShardInfoRequest(shard_info=shard_info) - acknowledgement = self.stub.UpdateShardInfo(request) - return acknowledgement.accepted - - def wait_experiment(self): - """ - Wait an experiment data from the director. - - Returns: - experiment_name (str): The name of the experiment. - """ - logger.info("Waiting for an experiment to run...") - response = self.stub.WaitExperiment(self._get_experiment_data()) - logger.info("New experiment received: %s", response) - experiment_name = response.experiment_name - if not experiment_name: - raise Exception("No experiment") - - return experiment_name - - def get_experiment_data(self, experiment_name): - """ - Get an experiment data from the director. - - Args: - experiment_name (str): The name of the experiment. - - Returns: - data_stream (grpc._channel._MultiThreadedRendezvous): The data - stream of the experiment data. - """ - logger.info("Getting experiment data for %s...", experiment_name) - request = director_pb2.GetExperimentDataRequest( - experiment_name=experiment_name, collaborator_name=self.shard_name - ) - data_stream = self.stub.GetExperimentData(request) - - return data_stream - - def set_experiment_failed( - self, - experiment_name: str, - error_code: int = 1, - error_description: str = "", - ): - """ - Set the experiment failed. - - Args: - experiment_name (str): The name of the experiment. - error_code (int, optional): The error code. Defaults to 1. - error_description (str, optional): The description of the error. - Defaults to ''. - """ - logger.info("Experiment %s failed", experiment_name) - request = director_pb2.SetExperimentFailedRequest( - experiment_name=experiment_name, - collaborator_name=self.shard_name, - error_code=error_code, - error_description=error_description, - ) - self.stub.SetExperimentFailed(request) - - def _get_experiment_data(self): - """Generate the experiment data request. - - Returns: - director_pb2.WaitExperimentRequest: The request for experiment - data. - """ - return director_pb2.WaitExperimentRequest(collaborator_name=self.shard_name) - - def send_health_check( - self, - *, - envoy_name: str, - is_experiment_running: bool, - cuda_devices_info: List[dict] = None, - ) -> int: - """Send envoy health check. - - Args: - envoy_name (str): The name of the envoy. - is_experiment_running (bool): Whether an experiment is currently - running. - cuda_devices_info (List[dict], optional): Information about the - CUDA devices. Defaults to None. - - Returns: - health_check_period (int): The period for health checks. - """ - status = director_pb2.UpdateEnvoyStatusRequest( - name=envoy_name, - is_experiment_running=is_experiment_running, - ) - - cuda_messages = [] - if cuda_devices_info is not None: - try: - cuda_messages = [director_pb2.CudaDeviceInfo(**item) for item in cuda_devices_info] - except Exception as e: - logger.info("%s", e) - - status.cuda_devices.extend(cuda_messages) - - logger.debug("Sending health check status: %s", status) - - try: - response = self.stub.UpdateEnvoyStatus(status) - except grpc.RpcError as rpc_error: - logger.error(rpc_error) - if rpc_error.code() == grpc.StatusCode.NOT_FOUND: - raise ShardNotFoundError - else: - health_check_period = response.health_check_period.seconds - - return health_check_period - - -class DirectorClient: - """Director client class for users. - - This class communicates with the director to manage the user's - participation in the federation. - - Attributes: - stub (director_pb2_grpc.DirectorStub): The gRPC stub for communication - with the director. - """ - - def __init__( - self, - *, - client_id: str, - director_host: str, - director_port: int, - tls: bool, - root_certificate: str, - private_key: str, - certificate: str, - ) -> None: - """ - Initialize director client object. - - Args: - client_id (str): The ID of the client. - director_host (str): The host of the director. - director_port (int): The port of the director. - tls (bool): Whether to use TLS for the connection. - root_certificate (str): The path to the root certificate for the - TLS connection. - private_key (str): The path to the private key for the TLS - connection. - certificate (str): The path to the certificate for the TLS - connection. - """ - director_addr = f"{director_host}:{director_port}" - if not tls: - if not client_id: - client_id = CLIENT_ID_DEFAULT - channel = grpc.insecure_channel(director_addr, options=channel_options) - headers = { - "client_id": client_id, - } - header_interceptor = interceptors.headers_adder(headers) - channel = grpc.intercept_channel(channel, header_interceptor) - else: - if not (root_certificate and private_key and certificate): - raise Exception("No certificates provided") - try: - with open(root_certificate, "rb") as f: - root_certificate_b = f.read() - with open(private_key, "rb") as f: - private_key_b = f.read() - with open(certificate, "rb") as f: - certificate_b = f.read() - except FileNotFoundError as exc: - raise Exception(f"Provided certificate file is not exist: {exc.filename}") - - credentials = grpc.ssl_channel_credentials( - root_certificates=root_certificate_b, - private_key=private_key_b, - certificate_chain=certificate_b, - ) - - channel = grpc.secure_channel(director_addr, credentials, options=channel_options) - self.stub = director_pb2_grpc.DirectorStub(channel) - - def set_new_experiment(self, name, col_names, arch_path, initial_tensor_dict=None): - """ - Send the new experiment to director to launch. - - Args: - name (str): The name of the experiment. - col_names (List[str]): The names of the collaborators. - arch_path (str): The path to the architecture. - initial_tensor_dict (dict, optional): The initial tensor - dictionary. Defaults to None. - - Returns: - resp (director_pb2.SetNewExperimentResponse): The response from - the director. - """ - logger.info("Submitting new experiment %s to director", name) - if initial_tensor_dict: - model_proto = construct_model_proto(initial_tensor_dict, 0, NoCompressionPipeline()) - experiment_info_gen = self._get_experiment_info( - arch_path=arch_path, - name=name, - col_names=col_names, - model_proto=model_proto, - ) - resp = self.stub.SetNewExperiment(experiment_info_gen) - return resp - - def _get_experiment_info(self, arch_path, name, col_names, model_proto): - """ - Generate the experiment data request. - - This method generates a stream of experiment data to be sent to the - director. - - Args: - arch_path (str): The path to the architecture. - name (str): The name of the experiment. - col_names (List[str]): The names of the collaborators. - model_proto (ModelProto): The initial model. - - Yields: - director_pb2.ExperimentInfo: The experiment data. - """ - with open(arch_path, "rb") as arch: - max_buffer_size = 2 * 1024 * 1024 - chunk = arch.read(max_buffer_size) - while chunk != b"": - if not chunk: - raise StopIteration - # TODO: add hash or/and size to check - experiment_info = director_pb2.ExperimentInfo( - name=name, - collaborator_names=col_names, - model_proto=model_proto, - ) - experiment_info.experiment_data.size = len(chunk) - experiment_info.experiment_data.npbytes = chunk - yield experiment_info - chunk = arch.read(max_buffer_size) - - def get_experiment_status(self, experiment_name): - """ - Check if the experiment was accepted by the director. - - Args: - experiment_name (str): The name of the experiment. - - Returns: - resp (director_pb2.GetExperimentStatusResponse): The response from - the director. - """ - logger.info("Getting experiment Status...") - request = director_pb2.GetExperimentStatusRequest(experiment_name=experiment_name) - resp = self.stub.GetExperimentStatus(request) - return resp - - def get_dataset_info(self): - """Request the dataset info from the director. - - Returns: - Tuple[List[int], List[int]]: The sample shape and target shape of - the dataset. - """ - resp = self.stub.GetDatasetInfo(director_pb2.GetDatasetInfoRequest()) - return resp.shard_info.sample_shape, resp.shard_info.target_shape - - def _get_trained_model(self, experiment_name, model_type): - """Get trained model RPC. - - Args: - experiment_name (str): The name of the experiment. - model_type (director_pb2.GetTrainedModelRequest.ModelType): The - type of the model. - - Returns: - tensor_dict (Dict[str, numpy.ndarray]): The trained model. - """ - get_model_request = director_pb2.GetTrainedModelRequest( - experiment_name=experiment_name, - model_type=model_type, - ) - model_proto_response = self.stub.GetTrainedModel(get_model_request) - tensor_dict, _ = deconstruct_model_proto( - model_proto_response.model_proto, - NoCompressionPipeline(), - ) - return tensor_dict - - def get_best_model(self, experiment_name): - """Get best model method. - - Args: - experiment_name (str): The name of the experiment. - - Returns: - Dict[str, numpy.ndarray]: The best model. - """ - model_type = director_pb2.GetTrainedModelRequest.BEST_MODEL - return self._get_trained_model(experiment_name, model_type) - - def get_last_model(self, experiment_name): - """Get last model method. - - Args: - experiment_name (str): The name of the experiment. - - Returns: - Dict[str, numpy.ndarray]: The last model. - """ - model_type = director_pb2.GetTrainedModelRequest.LAST_MODEL - return self._get_trained_model(experiment_name, model_type) - - def stream_metrics(self, experiment_name): - """Stream metrics RPC. - - Args: - experiment_name (str): The name of the experiment. - - Yields: - Dict[str, Any]: The metrics. - """ - request = director_pb2.GetMetricStreamRequest(experiment_name=experiment_name) - for metric_message in self.stub.GetMetricStream(request): - yield { - "metric_origin": metric_message.metric_origin, - "task_name": metric_message.task_name, - "metric_name": metric_message.metric_name, - "metric_value": metric_message.metric_value, - "round": metric_message.round, - } - - def remove_experiment_data(self, name): - """Remove experiment data RPC. - - Args: - name (str): The name of the experiment. - - Returns: - bool: Whether the removal was acknowledged. - """ - request = director_pb2.RemoveExperimentRequest(experiment_name=name) - response = self.stub.RemoveExperimentData(request) - return response.acknowledgement - - def get_envoys(self, raw_result=False): - """Get envoys info. - - Args: - raw_result (bool, optional): Whether to return the raw result. - Defaults to False. - - Returns: - result (Union[director_pb2.GetEnvoysResponse, - Dict[str, Dict[str, Any]]]): The envoys info. - """ - envoys = self.stub.GetEnvoys(director_pb2.GetEnvoysRequest()) - if raw_result: - return envoys - now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - result = {} - for envoy in envoys.envoy_infos: - result[envoy.shard_info.node_info.name] = { - "shard_info": envoy.shard_info, - "is_online": envoy.is_online or False, - "is_experiment_running": envoy.is_experiment_running or False, - "last_updated": datetime.fromtimestamp(envoy.last_updated.seconds).strftime( - "%Y-%m-%d %H:%M:%S" - ), - "current_time": now, - "valid_duration": envoy.valid_duration, - "experiment_name": "ExperimentName Mock", - } - return result - - def get_experiments_list(self): - """ - Get experiments list. - - Returns: - List[str]: The list of experiments. - """ - response = self.stub.GetExperimentsList(director_pb2.GetExperimentsListRequest()) - return response.experiments - - def get_experiment_description(self, name): - """Get experiment info. - - Args: - name (str): The name of the experiment. - - Returns: - director_pb2.ExperimentDescription: The description of the - experiment. - """ - response = self.stub.GetExperimentDescription( - director_pb2.GetExperimentDescriptionRequest(name=name) - ) - return response.experiment diff --git a/openfl/transport/grpc/director_server.py b/openfl/transport/grpc/director_server.py deleted file mode 100644 index 5a606c9ecf..0000000000 --- a/openfl/transport/grpc/director_server.py +++ /dev/null @@ -1,566 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""Director server.""" - -import asyncio -import logging -import uuid -from pathlib import Path -from typing import Callable, Optional, Union - -import grpc -from google.protobuf.json_format import MessageToDict, ParseDict -from grpc import aio, ssl_server_credentials - -from openfl.pipelines import NoCompressionPipeline -from openfl.protocols import base_pb2, director_pb2, director_pb2_grpc -from openfl.protocols.utils import construct_model_proto, deconstruct_model_proto, get_headers -from openfl.transport.grpc.exceptions import ShardNotFoundError -from openfl.transport.grpc.grpc_channel_options import channel_options - -logger = logging.getLogger(__name__) - -CLIENT_ID_DEFAULT = "__default__" - - -class DirectorGRPCServer(director_pb2_grpc.DirectorServicer): - """ - Director transport class. - - This class implements a gRPC server for the Director, allowing it to - communicate with collaborators. - - Attributes: - director (Director): The director that this server is serving. - listen_uri (str): The URI that the server is serving on. - tls (bool): Whether to use TLS for the connection. - root_certificate (Path): The path to the root certificate for the TLS - connection. - private_key (Path): The path to the server's private key for the TLS - connection. - certificate (Path): The path to the server's certificate for the TLS - connection. - server (grpc.Server): The gRPC server. - """ - - def __init__( - self, - *, - director_cls, - tls: bool = True, - root_certificate: Optional[Union[Path, str]] = None, - private_key: Optional[Union[Path, str]] = None, - certificate: Optional[Union[Path, str]] = None, - review_plan_callback: Union[None, Callable] = None, - listen_host: str = "[::]", - listen_port: int = 50051, - envoy_health_check_period: int = 0, - **kwargs, - ) -> None: - """ - Initialize a director object. - - Args: - director_cls (Type[Director]): The class of the director. - tls (bool, optional): Whether to use TLS for the connection. - Defaults to True. - root_certificate (Optional[Union[Path, str]], optional): The path - to the root certificate for the TLS connection. Defaults to - None. - private_key (Optional[Union[Path, str]], optional): The path to - the server's private key for the TLS connection. Defaults to - None. - certificate (Optional[Union[Path, str]], optional): The path to - the server's certificate for the TLS connection. Defaults to - None. - review_plan_callback (Union[None, Callable], optional): The - callback for reviewing the plan. Defaults to None. - listen_host (str, optional): The host to listen on. Defaults to - '[::]'. - listen_port (int, optional): The port to listen on. Defaults to - 50051. - envoy_health_check_period (int, optional): The period for health - checks. Defaults to 0. - **kwargs: Additional keyword arguments. - """ - # TODO: add working directory - super().__init__() - - self.listen_uri = f"{listen_host}:{listen_port}" - self.tls = tls - self.root_certificate = None - self.private_key = None - self.certificate = None - self._fill_certs(root_certificate, private_key, certificate) - self.server = None - self.root_dir = Path.cwd() - self.director = director_cls( - tls=self.tls, - root_certificate=self.root_certificate, - private_key=self.private_key, - certificate=self.certificate, - review_plan_callback=review_plan_callback, - envoy_health_check_period=envoy_health_check_period, - **kwargs, - ) - - def _fill_certs(self, root_certificate, private_key, certificate): - """Fill certificates. - - Args: - root_certificate (Union[Path, str]): The path to the root - certificate for the TLS connection. - private_key (Union[Path, str]): The path to the server's private - key for the TLS connection. - certificate (Union[Path, str]): The path to the server's - certificate for the TLS connection. - """ - if self.tls: - if not (root_certificate and private_key and certificate): - raise Exception("No certificates provided") - self.root_certificate = Path(root_certificate).absolute() - self.private_key = Path(private_key).absolute() - self.certificate = Path(certificate).absolute() - - def get_caller(self, context): - """Get caller name from context. - - if tls == True: get caller name from auth_context - if tls == False: get caller name from context header 'client_id' - - Args: - context (grpc.ServicerContext): The context of the request. - - Returns: - str: The name of the caller. - """ - if self.tls: - return context.auth_context()["x509_common_name"][0].decode("utf-8") - headers = get_headers(context) - client_id = headers.get("client_id", CLIENT_ID_DEFAULT) - return client_id - - def start(self): - """Launch the director GRPC server.""" - loop = asyncio.get_event_loop() # TODO: refactor after end of support for python3.6 - loop.create_task(self.director.start_experiment_execution_loop()) - loop.run_until_complete(self._run_server()) - - async def _run_server(self): - """Run the gRPC server.""" - self.server = aio.server(options=channel_options) - director_pb2_grpc.add_DirectorServicer_to_server(self, self.server) - - if not self.tls: - self.server.add_insecure_port(self.listen_uri) - else: - with open(self.private_key, "rb") as f: - private_key_b = f.read() - with open(self.certificate, "rb") as f: - certificate_b = f.read() - with open(self.root_certificate, "rb") as f: - root_certificate_b = f.read() - server_credentials = ssl_server_credentials( - ((private_key_b, certificate_b),), - root_certificates=root_certificate_b, - require_client_auth=True, - ) - self.server.add_secure_port(self.listen_uri, server_credentials) - logger.info("Starting director server on %s", self.listen_uri) - await self.server.start() - await self.server.wait_for_termination() - - async def UpdateShardInfo(self, request, context): # NOQA:N802 - """ - Receive acknowledge shard info. - - Args: - request (director_pb2.UpdateShardInfoRequest): The request from - the shard. - context (grpc.ServicerContext): The context of the request. - - Returns: - reply (director_pb2.UpdateShardInfoResponse): The response to the - request. - """ - logger.info("Updating shard info: %s", request.shard_info) - dict_shard_info = MessageToDict(request.shard_info, preserving_proto_field_name=True) - is_accepted = self.director.acknowledge_shard(dict_shard_info) - reply = director_pb2.UpdateShardInfoResponse(accepted=is_accepted) - - return reply - - async def SetNewExperiment(self, stream, context): # NOQA:N802 - """Request to set new experiment. - - Args: - stream (grpc.aio._MultiThreadedRendezvous): The stream of - experiment data. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.SetNewExperimentResponse: The response to the request. - """ - # TODO: add streaming reader - data_file_path = self.root_dir / str(uuid.uuid4()) - with open(data_file_path, "wb") as data_file: - async for request in stream: - if request.experiment_data.size == len(request.experiment_data.npbytes): - data_file.write(request.experiment_data.npbytes) - else: - raise Exception("Could not register new experiment") - - tensor_dict = None - if request.model_proto: - tensor_dict, _ = deconstruct_model_proto(request.model_proto, NoCompressionPipeline()) - - caller = self.get_caller(context) - - is_accepted = await self.director.set_new_experiment( - experiment_name=request.name, - sender_name=caller, - tensor_dict=tensor_dict, - collaborator_names=request.collaborator_names, - experiment_archive_path=data_file_path, - ) - - logger.info("Experiment %s registered", request.name) - return director_pb2.SetNewExperimentResponse(accepted=is_accepted) - - async def GetExperimentStatus(self, request, context): # NOQA: N802 - """ - Get experiment status and update if experiment was approved. - - Args: - request (director_pb2.GetExperimentStatusRequest): The request - from the collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.GetExperimentStatusResponse: The response to the - request. - """ - logger.debug("GetExperimentStatus request received") - caller = self.get_caller(context) - experiment_status = await self.director.get_experiment_status( - experiment_name=request.experiment_name, caller=caller - ) - logger.debug("Sending GetExperimentStatus response") - return director_pb2.GetExperimentStatusResponse(experiment_status=experiment_status) - - async def GetTrainedModel(self, request, context): # NOQA:N802 - """ - RPC for retrieving trained models. - - Args: - request (director_pb2.GetTrainedModelRequest): The request from - the collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.TrainedModelResponse: The response to the request. - """ - logger.debug("Received request for trained model...") - - if request.model_type == director_pb2.GetTrainedModelRequest.BEST_MODEL: - model_type = "best" - elif request.model_type == director_pb2.GetTrainedModelRequest.LAST_MODEL: - model_type = "last" - else: - logger.error("Incorrect model type") - return director_pb2.TrainedModelResponse() - - caller = self.get_caller(context) - - trained_model_dict = self.director.get_trained_model( - experiment_name=request.experiment_name, - caller=caller, - model_type=model_type, - ) - - if trained_model_dict is None: - return director_pb2.TrainedModelResponse() - - model_proto = construct_model_proto(trained_model_dict, 0, NoCompressionPipeline()) - - logger.debug("Sending trained model") - - return director_pb2.TrainedModelResponse(model_proto=model_proto) - - async def GetExperimentData(self, request, context): # NOQA:N802 - """Receive experiment data. - - Args: - request (director_pb2.GetExperimentDataRequest): The request from - the collaborator. - context (grpc.ServicerContext): The context of the request. - - Yields: - director_pb2.ExperimentData: The experiment data. - """ - # TODO: add size filling - # TODO: add experiment name field - # TODO: rename npbytes to data - data_file_path = self.director.get_experiment_data(request.experiment_name) - max_buffer_size = 2 * 1024 * 1024 - with open(data_file_path, "rb") as df: - while True: - data = df.read(max_buffer_size) - if len(data) == 0: - break - yield director_pb2.ExperimentData(size=len(data), npbytes=data) - - async def WaitExperiment(self, request, context): # NOQA:N802 - """ - Request for wait an experiment. - - Args: - request (director_pb2.WaitExperimentRequest): The request from the - collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.WaitExperimentResponse: The response to the request. - """ - logger.debug( - "Request WaitExperiment received from envoy %s", - request.collaborator_name, - ) - experiment_name = await self.director.wait_experiment(request.collaborator_name) - logger.debug( - "Experiment %s is ready for %s", - experiment_name, - request.collaborator_name, - ) - - return director_pb2.WaitExperimentResponse(experiment_name=experiment_name) - - async def GetDatasetInfo(self, request, context): # NOQA:N802 - """ - Request the info about target and sample shapes in the dataset. - - Args: - request (director_pb2.GetDatasetInfoRequest): The request from the - collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.GetDatasetInfoResponse: The response to the request. - """ - logger.debug("Received request for dataset info...") - - sample_shape, target_shape = self.director.get_dataset_info() - shard_info = director_pb2.ShardInfo(sample_shape=sample_shape, target_shape=target_shape) - resp = director_pb2.GetDatasetInfoResponse(shard_info=shard_info) - - logger.debug("Sending dataset info") - return resp - - async def GetMetricStream(self, request, context): # NOQA:N802 - """ - Request to stream metrics from the aggregator to frontend. - - Args: - request (director_pb2.GetMetricStreamRequest): The request from - the collaborator. - context (grpc.ServicerContext): The context of the request. - - Yields: - director_pb2.GetMetricStreamResponse: The metrics. - """ - logger.info("Getting metrics for %s...", request.experiment_name) - - caller = self.get_caller(context) - async for metric_dict in self.director.stream_metrics( - experiment_name=request.experiment_name, caller=caller - ): - if metric_dict is None: - await asyncio.sleep(1) - continue - yield director_pb2.GetMetricStreamResponse(**metric_dict) - - async def RemoveExperimentData(self, request, context): # NOQA:N802 - """Remove experiment data RPC. - - Args: - request (director_pb2.RemoveExperimentRequest): The request from - the collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - response (director_pb2.RemoveExperimentResponse): The response to - the request. - """ - response = director_pb2.RemoveExperimentResponse(acknowledgement=False) - caller = self.get_caller(context) - self.director.remove_experiment_data( - experiment_name=request.experiment_name, - caller=caller, - ) - - response.acknowledgement = True - return response - - async def SetExperimentFailed(self, request, context): # NOQA:N802 - """Set the experiment failed. - - Args: - request (director_pb2.SetExperimentFailedRequest): The request - from the collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - response (director_pb2.SetExperimentFailedResponse): The response - to the request. - """ - response = director_pb2.SetExperimentFailedResponse() - if self.get_caller(context) != CLIENT_ID_DEFAULT: - return response - logger.error( - f"Collaborator {request.collaborator_name} failed with error code:" - f" {request.error_code}, error_description: {request.error_description}" - f"Stopping experiment." - ) - self.director.set_experiment_failed( - experiment_name=request.experiment_name, - collaborator_name=request.collaborator_name, - ) - - return response - - async def UpdateEnvoyStatus(self, request, context): # NOQA:N802 - """ - Accept health check from envoy. - - Args: - request (director_pb2.UpdateEnvoyStatusRequest): The request from - the envoy. - context (grpc.ServicerContext): The context of the request. - - Returns: - resp (director_pb2.UpdateEnvoyStatusResponse): The response to the - request. - """ - logger.debug("Updating envoy status: %s", request) - cuda_devices_info = [ - MessageToDict(message, preserving_proto_field_name=True) - for message in request.cuda_devices - ] - try: - health_check_period = self.director.update_envoy_status( - envoy_name=request.name, - is_experiment_running=request.is_experiment_running, - cuda_devices_status=cuda_devices_info, - ) - except ShardNotFoundError as exc: - logger.error(exc) - await context.abort(grpc.StatusCode.NOT_FOUND, str(exc)) - else: - resp = director_pb2.UpdateEnvoyStatusResponse() - resp.health_check_period.seconds = health_check_period - - return resp - - async def GetEnvoys(self, request, context): # NOQA:N802 - """Get a status information about envoys. - - Args: - request (director_pb2.GetEnvoysRequest): The request from the - collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.GetEnvoysResponse: The response to the request. - """ - envoy_infos = self.director.get_envoys() - envoy_statuses = [] - for envoy_info in envoy_infos: - envoy_info_message = director_pb2.EnvoyInfo( - shard_info=ParseDict( - envoy_info["shard_info"], - director_pb2.ShardInfo(), - ignore_unknown_fields=True, - ), - is_online=envoy_info["is_online"], - is_experiment_running=envoy_info["is_experiment_running"], - ) - envoy_info_message.valid_duration.seconds = envoy_info["valid_duration"] - envoy_info_message.last_updated.seconds = int(envoy_info["last_updated"]) - - envoy_statuses.append(envoy_info_message) - - return director_pb2.GetEnvoysResponse(envoy_infos=envoy_statuses) - - async def GetExperimentsList(self, request, context): # NOQA:N802 - """Get list of experiments description. - - Args: - request (director_pb2.GetExperimentsListRequest): The request from - the collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.GetExperimentsListResponse: The response to the - request. - """ - caller = self.get_caller(context) - experiments = self.director.get_experiments_list(caller) - experiment_list = [director_pb2.ExperimentListItem(**exp) for exp in experiments] - return director_pb2.GetExperimentsListResponse(experiments=experiment_list) - - async def GetExperimentDescription(self, request, context): # NOQA:N802 - """Get an experiment description. - - Args: - request (director_pb2.GetExperimentDescriptionRequest): The - request from the collaborator. - context (grpc.ServicerContext): The context of the request. - - Returns: - director_pb2.GetExperimentDescriptionResponse: The response to the - request. - """ - caller = self.get_caller(context) - experiment = self.director.get_experiment_description(caller, request.name) - models_statuses = [ - base_pb2.DownloadStatus(name=ms["name"], status=ms["status"]) - for ms in experiment["download_statuses"]["models"] - ] - logs_statuses = [ - base_pb2.DownloadStatus(name=ls["name"], status=ls["status"]) - for ls in experiment["download_statuses"]["logs"] - ] - download_statuses = base_pb2.DownloadStatuses( - models=models_statuses, - logs=logs_statuses, - ) - collaborators = [ - base_pb2.CollaboratorDescription( - name=col["name"], - status=col["status"], - progress=col["progress"], - round=col["round"], - current_task=col["current_task"], - next_task=col["next_task"], - ) - for col in experiment["collaborators"] - ] - tasks = [ - base_pb2.TaskDescription(name=task["name"], description=task["description"]) - for task in experiment["tasks"] - ] - - return director_pb2.GetExperimentDescriptionResponse( - experiment=base_pb2.ExperimentDescription( - name=experiment["name"], - status=experiment["status"], - progress=experiment["progress"], - current_round=experiment["current_round"], - total_rounds=experiment["total_rounds"], - download_statuses=download_statuses, - collaborators=collaborators, - tasks=tasks, - ), - ) diff --git a/tests/github/interactive_api_director/exp_1.py b/tests/github/interactive_api_director/exp_1.py deleted file mode 100644 index ea32ed62d6..0000000000 --- a/tests/github/interactive_api_director/exp_1.py +++ /dev/null @@ -1,36 +0,0 @@ -from time import sleep -import getpass - - -from tests.github.interactive_api_director.experiment_runner import run_federation -from tests.github.interactive_api_director.experiment_runner import stop_federation -from tests.github.interactive_api_director.experiment_runner import Shard -from tests.github.interactive_api_director.experiment_runner import create_federation - - -col_names = ['one', 'two'] -username = getpass.getuser() -director_path = f'/home/{username}/test/exp_1/director' - -director_host = 'localhost' -director_port = 50051 - -shards = { - f'/home/{username}/test/exp_1/{col_name}': - Shard( - shard_name=col_name, - director_host=director_host, - director_port=director_port, - data_path=f'/home/{username}/test/data/{col_name}' - ) - for col_name in col_names -} - -create_federation(director_path, shards.keys()) - -processes = run_federation(shards, director_path) - -# run experiments -sleep(5) - -stop_federation(processes) diff --git a/tests/github/interactive_api_director/experiment_runner.py b/tests/github/interactive_api_director/experiment_runner.py deleted file mode 100644 index f59a89f8ef..0000000000 --- a/tests/github/interactive_api_director/experiment_runner.py +++ /dev/null @@ -1,140 +0,0 @@ -import os -import logging -import shutil -import subprocess -import typing -from time import sleep -from dataclasses import dataclass -from rich.console import Console -from rich.logging import RichHandler - - -root = logging.getLogger() -root.setLevel(logging.INFO) -console = Console(width=160) -handler = RichHandler(console=console) -formatter = logging.Formatter( - "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s" -) -handler.setFormatter(formatter) -root.addHandler(handler) - -logger = logging.getLogger(__name__) - - -def prepare_collaborator_workspace(col_dir, arch_path): - logger.info(f'Prepare collaborator directory: {col_dir}') - if os.path.exists(col_dir): - shutil.rmtree(col_dir) - os.makedirs(col_dir) - arch_col_path = shutil.copy(arch_path, col_dir) - shutil.unpack_archive(arch_col_path, col_dir) - logger.info('Collaborator directory prepared') - - -def run_aggregator(model_interface, fl_experiment): - logger.info('Aggregator has been started.') - fl_experiment.start_experiment(model_interface) - logger.info('Aggregator has been stopped.') - - -def run_experiment(col_data_paths, model_interface, arch_path, fl_experiment): - logger.info('Starting the experiment!') - for col_dir in col_data_paths: - prepare_collaborator_workspace(col_dir, arch_path) - - processes = [] - for col_name in col_data_paths: - logger.info(f'Starting collaborator: {col_name}') - p = subprocess.Popen( - f"fx collaborator start -n {col_name} -p plan/plan.yaml -d data.yaml".split(' '), - cwd=os.path.join(os.getcwd(), col_name) - ) - processes.append(p) - - run_aggregator(model_interface, fl_experiment) - for p in processes: - p.terminate() - - logger.info('The experiment completed!') - - -def create_director(director_path, recreate, config): - logger.info(f'Creating the director in {director_path}!') - if os.path.exists(director_path): - if not recreate: - return - shutil.rmtree(director_path) - subprocess.Popen( - f'fx director create-workspace -p {director_path}', - shell=True - ).wait() - shutil.copy(config, director_path) - - -def create_envoy(col_path, recreate, envoy_config, shard_descriptor): - logger.info(f'Creating the envoy in {col_path}!') - if os.path.exists(col_path): - if not recreate: - return - shutil.rmtree(col_path) - subprocess.Popen( - f'fx envoy create-workspace -p {col_path}', - shell=True - ).wait() - shutil.copy(envoy_config, col_path) - shutil.copy(shard_descriptor, col_path) - - -def create_federation( - director_path: str, - collaborator_paths: typing.Iterable[str], - director_config, - envoy_config, - shard_descriptor, - recreate=False -): - logger.info('Creating the federation!') - create_director(director_path, recreate, director_config) - for col_path in collaborator_paths: - create_envoy(col_path, recreate, envoy_config, shard_descriptor) - # TODO: create mTLS - logger.info('Federation was created') - - -@dataclass -class Shard: - shard_name: str - director_host: str - director_port: int - data_path: str - - -def run_federation(shards: typing.Dict[str, Shard], director_path: str): - logger.info('Starting the experiment!') - running_processes = [] - p = subprocess.Popen( - "fx director start --disable-tls", - shell=True, - cwd=os.path.join(director_path) - ) - sleep(2) - running_processes.append(p) - for collaborator_path, shard in shards.items(): - p = subprocess.Popen( - f'fx envoy start ' - f'-n {shard.shard_name} -dh {shard.director_host} ' - f'-dp {shard.director_port} -p {shard.data_path}', - shell=True, - cwd=os.path.join(collaborator_path) - ) - running_processes.append(p) - logger.info('The federation started!') - return running_processes - - -def stop_federation(running_processes): - logger.info('Stopping the federation') - for p in running_processes: - p.terminate() - logger.info('Federation was stopped') diff --git a/tests/github/interactive_api_director/experiments/__init__.py b/tests/github/interactive_api_director/experiments/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/__init__.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/__init__.py deleted file mode 100644 index 100b310ee9..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Pytorch kvasir unet example package.""" diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/data_loader.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/data_loader.py deleted file mode 100644 index 502c806cd6..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/data_loader.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from skimage import io -import numpy as np -from openfl.utilities import validate_file_hash -import requests -from zipfile import ZipFile - - -def load_data(): - os.makedirs('data', exist_ok=True) - with open('./data/kvasir.zip', 'wb') as zf: - response = requests.get('https://datasets.simula.no/downloads/hyper-kvasir/' - 'hyper-kvasir-segmented-images.zip') - for chunk in response.iter_content(1024 * 1024 * 1024): - zf.write(chunk) - zip_sha384 = ('66cd659d0e8afd8c83408174' - '1ade2b75dada8d4648b816f2533c8748b1658efa3d49e205415d4116faade2c5810e241e') - validate_file_hash('./data/kvasir.zip', zip_sha384) - with ZipFile('./data/kvasir.zip') as f: - f.extractall('./data') - - -def read_data(image_path, mask_path): - """ - Read image and mask from disk. - """ - img = io.imread(image_path) - assert (img.shape[2] == 3) - mask = io.imread(mask_path) - return img, mask[:, :, 0].astype(np.uint8) diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py deleted file mode 100644 index 47939f9951..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -import PIL -from torch.utils.data import Dataset, DataLoader -from torchvision import transforms as tsf - -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.data_loader import read_data # noqa: E501 -from openfl.interface.interactive_api.experiment import DataInterface - - -class KvasirDataset(Dataset): - """ - Kvasir dataset contains 1000 images for all collaborators. - Args: - data_path: path to dataset on disk - collaborator_count: total number of collaborators - collaborator_num: number of current collaborator - is_validation: validation option - """ - - def __init__(self, images_path='./data/segmented-images/images/', - masks_path='./data/segmented-images/masks/', - validation_fraction=1 / 8, is_validation=False): - - self.images_path = images_path - self.masks_path = masks_path - self.images_names = [ - img_name - for img_name in sorted(os.listdir(self.images_path)) - if len(img_name) > 3 and img_name[-3:] == 'jpg' - ] - - assert (len(self.images_names) > 2), "Too few images" - - validation_size = max(1, int(len(self.images_names) * validation_fraction)) - - if is_validation: - self.images_names = self.images_names[-validation_size:] - else: - self.images_names = self.images_names[: -validation_size] - - # Prepare transforms - self.img_trans = tsf.Compose([ - tsf.ToPILImage(), - tsf.Resize((332, 332)), - tsf.ToTensor(), - tsf.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) - self.mask_trans = tsf.Compose([ - tsf.ToPILImage(), - tsf.Resize((332, 332), interpolation=PIL.Image.NEAREST), - tsf.ToTensor()]) - - def __getitem__(self, index): - name = self.images_names[index] - img, mask = read_data(self.images_path + name, self.masks_path + name) - img = self.img_trans(img).numpy() - mask = self.mask_trans(mask).numpy() - return img, mask - - def __len__(self): - return len(self.images_names) - - -class FedDataset(DataInterface): - - def _delayed_init(self, data_path='1,1'): - # With the next command the local dataset will be loaded on the collaborator node - # For this example we have the same dataset on the same path, and we will shard it - # So we use `data_path` information for this purpose. - self.rank, self.world_size = [int(part) for part in data_path.split(',')] - - validation_fraction = 1 / 8 - self.train_set = self.UserDatasetClass(validation_fraction=validation_fraction, - is_validation=False) - self.valid_set = self.UserDatasetClass(validation_fraction=validation_fraction, - is_validation=True) - - # Do the actual sharding - self._do_sharding(self.rank, self.world_size) - - def _do_sharding(self, rank, world_size): - # This method relies on the dataset's implementation - # i.e. coupled in a bad way - self.train_set.images_names = self.train_set.images_names[rank - 1:: world_size] - - def get_train_loader(self, **kwargs): - """ - Output of this method will be provided to tasks with optimizer in contract - """ - return DataLoader( - self.train_set, num_workers=8, batch_size=self.kwargs['train_bs'], shuffle=True - ) - - def get_valid_loader(self, **kwargs): - """ - Output of this method will be provided to tasks without optimizer in contract - """ - return DataLoader(self.valid_set, num_workers=8, batch_size=self.kwargs['valid_bs']) - - def get_train_data_size(self): - """ - Information for aggregation - """ - return len(self.train_set) - - def get_valid_data_size(self): - """ - Information for aggregation - """ - return len(self.valid_set) diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/director/director_config.yaml b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/director/director_config.yaml deleted file mode 100644 index ef0397f8ce..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/director/director_config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -settings: - sample_shape: [ '300', '400', '3' ] - target_shape: [ '300', '400' ] diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/envoy_config.yaml b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/envoy_config.yaml deleted file mode 100644 index c28b0487c7..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/envoy_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -params: - cuda_devices: [ ] - -optional_plugin_components: { } - -shard_descriptor: - template: kvasir_shard_descriptor.KvasirShardDescriptor - params: - data_folder: kvasir_data - rank_worldsize: 1,90 - enforce_image_hw: '300,400' \ No newline at end of file diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/kvasir_shard_descriptor.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/kvasir_shard_descriptor.py deleted file mode 100644 index 178e121c53..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/kvasir_shard_descriptor.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Kvasir shard descriptor.""" - -import os -from pathlib import Path -from zipfile import ZipFile - -import numpy as np -from PIL import Image - -from openfl.interface.interactive_api.shard_descriptor import ShardDataset -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor -from openfl.utilities import validate_file_hash -import requests - - -class KvasirShardDataset(ShardDataset): - """Kvasir Shard dataset class.""" - - def __init__(self, dataset_dir: Path, rank=1, worldsize=1, enforce_image_hw=None): - """Initialize KvasirShardDataset.""" - self.rank = rank - self.worldsize = worldsize - self.dataset_dir = dataset_dir - self.enforce_image_hw = enforce_image_hw - self.images_path = self.dataset_dir / 'segmented-images' / 'images' - self.masks_path = self.dataset_dir / 'segmented-images' / 'masks' - - self.images_names = [ - img_name - for img_name in sorted(os.listdir(self.images_path)) - if len(img_name) > 3 and img_name[-3:] == 'jpg' - ] - # Sharding - self.images_names = self.images_names[self.rank - 1::self.worldsize] - - def __getitem__(self, index): - """Return a item by the index.""" - name = self.images_names[index] - # Reading data - img = Image.open(self.images_path / name) - mask = Image.open(self.masks_path / name) - if self.enforce_image_hw is not None: - # If we need to resize data - # PIL accepts (w,h) tuple, not (h,w) - img = img.resize(self.enforce_image_hw[::-1]) - mask = mask.resize(self.enforce_image_hw[::-1]) - img = np.asarray(img) - mask = np.asarray(mask) - assert img.shape[2] == 3 - - return img, mask[:, :, 0].astype(np.uint8) - - def __len__(self): - """Return the len of the dataset.""" - return len(self.images_names) - - -class KvasirShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, data_folder: str = 'kvasir_data', - rank_worldsize: str = '1,1', - enforce_image_hw: str = None) -> None: - """Initialize KvasirShardDescriptor.""" - super().__init__() - # Settings for sharding the dataset - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - self.data_folder = Path.cwd() / data_folder - self.download_data(self.data_folder) - - # Settings for resizing data - self.enforce_image_hw = None - if enforce_image_hw is not None: - self.enforce_image_hw = tuple(int(size) for size in enforce_image_hw.split(',')) - - # Calculating data and target shapes - ds = self.get_dataset() - sample, target = ds[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = [str(dim) for dim in target.shape] - - def get_dataset(self, dataset_type='train'): - """Return a shard dataset by type.""" - return KvasirShardDataset( - dataset_dir=self.data_folder, - rank=self.rank, - worldsize=self.worldsize, - enforce_image_hw=self.enforce_image_hw - ) - - @staticmethod - def download_data(data_folder): - """Download data.""" - zip_file_path = data_folder / 'kvasir.zip' - os.makedirs(data_folder, exist_ok=True) - with open(zip_file_path.relative_to(Path.cwd()), 'wb') as zf: - response = requests.get('https://datasets.simula.no/downloads/hyper-kvasir/' - 'hyper-kvasir-segmented-images.zip') - for chunk in response.iter_content(1024 * 1024 * 1024): - zf.write(chunk) - zip_sha384 = ('66cd659d0e8afd8c83408174' - '1ade2b75dada8d4648b816f2533c8748b1658efa3d49e205415d4116faade2c5810e241e') - validate_file_hash(zip_file_path, zip_sha384) - with ZipFile(zip_file_path.relative_to(Path.cwd())) as f: - f.extractall(data_folder.relative_to(Path.cwd())) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return self._sample_shape - - @property - def target_shape(self): - """Return the target shape info.""" - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'Kvasir dataset, shard number {self.rank} ' - f'out of {self.worldsize}') - - -if __name__ == '__main__': - from openfl.interface.cli import setup_logging - - setup_logging() - - data_folder = 'data' - rank_worldsize = '1,100' - enforce_image_hw = '529,622' - - kvasir_sd = KvasirShardDescriptor( - data_folder, - rank_worldsize=rank_worldsize, - enforce_image_hw=enforce_image_hw) - - print(kvasir_sd.dataset_description) - print(kvasir_sd.sample_shape, kvasir_sd.target_shape) - - from openfl.component.envoy.envoy import Envoy - - shard_name = 'one' - director_host = 'localhost' - director_port = 50051 - - keeper = Envoy( - shard_name=shard_name, - director_host=director_host, - director_port=director_port, - shard_descriptor=kvasir_sd, - tls=True, - root_certificate='./cert/root_ca.crt', - private_key='./cert/one.key', - certificate='./cert/one.crt', - ) - - keeper.start() diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/sd_requirements.txt b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/sd_requirements.txt deleted file mode 100644 index 5e3549a040..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/envoy/sd_requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -pillow -scikit-image diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py deleted file mode 100644 index da57352169..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py +++ /dev/null @@ -1,155 +0,0 @@ -from copy import deepcopy - -import numpy as np -import PIL -import torch.optim as optim -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.model import UNet -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.tasks import task_interface # noqa: E501 -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.tasks import validate -from torch.utils.data import DataLoader -from torch.utils.data import Dataset -from torch.utils.data import SubsetRandomSampler -from torchvision import transforms as tsf - -from openfl.interface.interactive_api.experiment import DataInterface -from openfl.interface.interactive_api.experiment import FLExperiment -from openfl.interface.interactive_api.experiment import ModelInterface -from openfl.interface.interactive_api.federation import Federation - - -def run(): - federation = Federation( - client_id='frontend', - director_node_fqdn='localhost', - director_port=50051, - tls=False - ) - - dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10) - dummy_shard_dataset = dummy_shard_desc.get_dataset('') - sample, target = dummy_shard_dataset[0] - fed_dataset = KvasirSD(train_bs=4, valid_bs=8) - fed_dataset.shard_descriptor = dummy_shard_desc - for i, (sample, target) in enumerate(fed_dataset.get_train_loader()): - print(sample.shape) - - model_unet = UNet() - optimizer_adam = optim.Adam(model_unet.parameters(), lr=1e-4) - - framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin' - MI = ModelInterface(model=model_unet, optimizer=optimizer_adam, - framework_plugin=framework_adapter) - - # Save the initial model state - initial_model = deepcopy(model_unet) - - # create an experimnet in federation - experiment_name = 'kvasir_test_experiment' - fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name) - - fl_experiment.start(model_provider=MI, - task_keeper=task_interface, - data_loader=fed_dataset, - rounds_to_train=1, - opt_treatment='CONTINUE_GLOBAL') - fl_experiment.stream_metrics() - best_model = fl_experiment.get_best_model() - fl_experiment.remove_experiment_data() - best_model.inc.conv[0].weight - validate(initial_model, fed_dataset.get_valid_loader(), 'cpu') - validate(best_model, fed_dataset.get_valid_loader(), 'cpu') - - -# Register dataset -# We extract User dataset class implementation. -# Is it convinient? What if the dataset is not a class? -class KvasirShardDataset(Dataset): - - def __init__(self, dataset): - self._dataset = dataset - - # Prepare transforms - self.img_trans = tsf.Compose([ - tsf.ToPILImage(), - tsf.Resize((332, 332)), - tsf.ToTensor(), - tsf.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) - self.mask_trans = tsf.Compose([ - tsf.ToPILImage(), - tsf.Resize((332, 332), interpolation=PIL.Image.NEAREST), - tsf.ToTensor()]) - - def __getitem__(self, index): - img, mask = self._dataset[index] - img = self.img_trans(img).numpy() - mask = self.mask_trans(mask).numpy() - return img, mask - - def __len__(self): - return len(self._dataset) - - -# Now you can implement you data loaders using dummy_shard_desc -class KvasirSD(DataInterface): - - def __init__(self, validation_fraction=1 / 8, **kwargs): - super().__init__(**kwargs) - - self.validation_fraction = validation_fraction - - @property - def shard_descriptor(self): - return self._shard_descriptor - - @shard_descriptor.setter - def shard_descriptor(self, shard_descriptor): - """ - Describe per-collaborator procedures or sharding. - - This method will be called during a collaborator initialization. - Local shard_descriptor will be set by Envoy. - """ - self._shard_descriptor = shard_descriptor - self._shard_dataset = KvasirShardDataset(shard_descriptor.get_dataset('train')) - - validation_size = max(1, int(len(self._shard_dataset) * self.validation_fraction)) - - self.train_indeces = np.arange(len(self._shard_dataset) - validation_size) - self.val_indeces = np.arange(len(self._shard_dataset) - validation_size, - len(self._shard_dataset)) - - def get_train_loader(self, **kwargs): - """ - Output of this method will be provided to tasks with optimizer in contract - """ - train_sampler = SubsetRandomSampler(self.train_indeces) - return DataLoader( - self._shard_dataset, - num_workers=1, - batch_size=self.kwargs['train_bs'], - sampler=train_sampler - ) - - def get_valid_loader(self, **kwargs): - """ - Output of this method will be provided to tasks without optimizer in contract - """ - val_sampler = SubsetRandomSampler(self.val_indeces) - return DataLoader( - self._shard_dataset, - num_workers=1, - batch_size=self.kwargs['valid_bs'], - sampler=val_sampler - ) - - def get_train_data_size(self): - """ - Information for aggregation - """ - return len(self.train_indeces) - - def get_valid_data_size(self): - """ - Information for aggregation - """ - return len(self.val_indeces) diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/layers.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/layers.py deleted file mode 100644 index 5165dcc97e..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/layers.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Layers for Unet model.""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def soft_dice_loss(output, target): - """Calculate loss.""" - num = target.size(0) - m1 = output.view(num, -1) - m2 = target.view(num, -1) - intersection = m1 * m2 - score = 2.0 * (intersection.sum(1) + 1) / (m1.sum(1) + m2.sum(1) + 1) - score = 1 - score.sum() / num - return score - - -def soft_dice_coef(output, target): - """Calculate soft DICE coefficient.""" - num = target.size(0) - m1 = output.view(num, -1) - m2 = target.view(num, -1) - intersection = m1 * m2 - score = 2.0 * (intersection.sum(1) + 1) / (m1.sum(1) + m2.sum(1) + 1) - return score.sum() - - -class DoubleConv(nn.Module): - """Pytorch double conv class.""" - - def __init__(self, in_ch, out_ch): - """Initialize layer.""" - super(DoubleConv, self).__init__() - self.in_ch = in_ch - self.out_ch = out_ch - self.conv = nn.Sequential( - nn.Conv2d(in_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - nn.Conv2d(out_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - """Do forward pass.""" - x = self.conv(x) - return x - - -class Down(nn.Module): - """Pytorch nn module subclass.""" - - def __init__(self, in_ch, out_ch): - """Initialize layer.""" - super(Down, self).__init__() - self.mpconv = nn.Sequential( - nn.MaxPool2d(2), - DoubleConv(in_ch, out_ch) - ) - - def forward(self, x): - """Do forward pass.""" - x = self.mpconv(x) - return x - - -class Up(nn.Module): - """Pytorch nn module subclass.""" - - def __init__(self, in_ch, out_ch, bilinear=False): - """Initialize layer.""" - super(Up, self).__init__() - self.in_ch = in_ch - self.out_ch = out_ch - if bilinear: - self.up = nn.Upsample( - scale_factor=2, - mode='bilinear', - align_corners=True - ) - else: - self.up = nn.ConvTranspose2d(in_ch, in_ch // 2, 2, stride=2) - self.conv = DoubleConv(in_ch, out_ch) - - def forward(self, x1, x2): - """Do forward pass.""" - x1 = self.up(x1) - diff_y = x2.size()[2] - x1.size()[2] - diff_x = x2.size()[3] - x1.size()[3] - - x1 = F.pad( - x1, - (diff_x // 2, diff_x - diff_x // 2, diff_y // 2, diff_y - diff_y // 2) - ) - - x = torch.cat([x2, x1], dim=1) - x = self.conv(x) - return x diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py deleted file mode 100644 index ab5ce27a84..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py +++ /dev/null @@ -1,37 +0,0 @@ -import torch -import torch.nn as nn - -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.layers import DoubleConv, Down, Up # noqa: E501 - -""" -UNet model definition -""" - - -class UNet(nn.Module): - def __init__(self, n_channels=3, n_classes=1): - super().__init__() - self.inc = DoubleConv(n_channels, 64) - self.down1 = Down(64, 128) - self.down2 = Down(128, 256) - self.down3 = Down(256, 512) - self.down4 = Down(512, 1024) - self.up1 = Up(1024, 512) - self.up2 = Up(512, 256) - self.up3 = Up(256, 128) - self.up4 = Up(128, 64) - self.outc = nn.Conv2d(64, n_classes, 1) - - def forward(self, x): - x1 = self.inc(x) - x2 = self.down1(x1) - x3 = self.down2(x2) - x4 = self.down3(x3) - x5 = self.down4(x4) - x = self.up1(x5, x4) - x = self.up2(x, x3) - x = self.up3(x, x2) - x = self.up4(x, x1) - x = self.outc(x) - x = torch.sigmoid(x) - return x diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/run.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/run.py deleted file mode 100644 index c4f8d6bad4..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/run.py +++ /dev/null @@ -1,10 +0,0 @@ -from pathlib import Path - -from tests.github.interactive_api_director import utils - - -if __name__ == '__main__': - director = utils.start_director(Path(__file__).parent / 'director') - envoy = utils.start_envoy(Path(__file__).parent / 'envoy') - from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet import experiment - experiment.run() diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/settings.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/settings.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py deleted file mode 100644 index 555240dd6c..0000000000 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py +++ /dev/null @@ -1,67 +0,0 @@ -import tqdm -import torch -import numpy as np - -from openfl.interface.interactive_api.experiment import TaskInterface -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.layers import soft_dice_loss, soft_dice_coef # noqa: E501 - - -task_interface = TaskInterface() - - -def function_defined_in_notebook(some_parameter): - print('I will cause problems') - print(f'Also I accept a parameter and it is {some_parameter}') - - -# We do not actually need to register additional kwargs, Just serialize them -@task_interface.add_kwargs(**{'some_parameter': 42}) -@task_interface.register_fl_task(model='unet_model', data_loader='train_loader', - device='device', optimizer='optimizer') -def train(unet_model, train_loader, optimizer, device, loss_fn=soft_dice_loss, some_parameter=None): - if not torch.cuda.is_available(): - device = 'cpu' - - function_defined_in_notebook(some_parameter) - - train_loader = tqdm.tqdm(train_loader, desc="train") - - unet_model.train() - unet_model.to(device) - - losses = [] - - for data, target in train_loader: - data, target = torch.tensor(data).to(device), torch.tensor( - target).to(device, dtype=torch.float32) - optimizer.zero_grad() - output = unet_model(data) - loss = loss_fn(output=output, target=target) - loss.backward() - optimizer.step() - losses.append(loss.detach().cpu().numpy()) - - return {'train_loss': np.mean(losses), } - - -@task_interface.register_fl_task(model='unet_model', data_loader='val_loader', device='device') -def validate(unet_model, val_loader, device): - unet_model.eval() - unet_model.to(device) - - val_loader = tqdm.tqdm(val_loader, desc="validate") - - val_score = 0 - total_samples = 0 - - with torch.no_grad(): - for data, target in val_loader: - samples = target.shape[0] - total_samples += samples - data = torch.tensor(data).to(device) - target = torch.tensor(target).to(device, dtype=torch.int64) - output = unet_model(data) - val = soft_dice_coef(output, target) - val_score += val.sum().cpu().numpy() - - return {'dice_coef': val_score / total_samples, } diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/__init__.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py deleted file mode 100644 index f1967bfeb0..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py +++ /dev/null @@ -1,63 +0,0 @@ -import tensorflow as tf -import numpy as np - -from openfl.interface.interactive_api.experiment import DataInterface - - -class FedDataset(DataInterface): - def __init__(self, train_bs, valid_bs, **kwargs): - super().__init__(**kwargs) - self.train_bs = train_bs - self.valid_bs = valid_bs - - @property - def shard_descriptor(self): - return self._shard_descriptor - - @shard_descriptor.setter - def shard_descriptor(self, shard_descriptor): - """ - Describe per-collaborator procedures or sharding. - - This method will be called during a collaborator initialization. - Local shard_descriptor will be set by Envoy. - """ - self._shard_descriptor = shard_descriptor - validation_size = len(self.shard_descriptor) // 10 - self.train_indices = np.arange(len(self.shard_descriptor) - validation_size) - self.val_indices = np.arange(len(self.shard_descriptor) - validation_size, - len(self.shard_descriptor)) - - def get_train_loader(self, **kwargs): - """ - Output of this method will be provided to tasks with optimizer in contract - """ - samples, targets = [], [] - for i in self.train_indices: - sample, target = self.shard_descriptor[i] - samples.append(sample) - targets.append(target) - samples = np.array(samples) - targets = np.array(targets) - return tf.data.Dataset.from_tensor_slices((samples, targets)).batch(self.train_bs) - - def get_valid_loader(self, **kwargs): - """ - Output of this method will be provided to tasks without optimizer in contract - """ - samples, targets = zip(*[self.shard_descriptor[i] for i in self.val_indices]) - samples = np.array(samples) - targets = np.array(targets) - return tf.data.Dataset.from_tensor_slices((samples, targets)).batch(self.valid_bs) - - def get_train_data_size(self): - """ - Information for aggregation - """ - return len(self.train_indices) - - def get_valid_data_size(self): - """ - Information for aggregation - """ - return len(self.val_indices) diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/director/director_config.yaml b/tests/github/interactive_api_director/experiments/tensorflow_mnist/director/director_config.yaml deleted file mode 100644 index 734dac6f5a..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/director/director_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Director's config. -# Parameters: -# 1. sample_shape - sample shape interface unified across the Federation -# 1. target_shape - target shape interface unified across the Federation - -settings: - sample_shape: ['784'] - target_shape: ['0'] \ No newline at end of file diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/director/start_director.sh b/tests/github/interactive_api_director/experiments/tensorflow_mnist/director/start_director.sh deleted file mode 100755 index 8616553963..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/director/start_director.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx director start --disable-tls -c config.yaml \ No newline at end of file diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/envoy_config.yaml b/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/envoy_config.yaml deleted file mode 100644 index 42c5c37031..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/envoy_config.yaml +++ /dev/null @@ -1,9 +0,0 @@ -params: - cuda_devices: [] - -optional_plugin_components: {} - -shard_descriptor: - template: shard_descriptor.MNISTShardDescriptor - params: - rank_worldsize: 1,90 \ No newline at end of file diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/sd_requirements.txt b/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/sd_requirements.txt deleted file mode 100644 index 99214efe65..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/sd_requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tensorflow==2.13 diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/shard_descriptor.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/shard_descriptor.py deleted file mode 100644 index 5cf5a9edb4..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/shard_descriptor.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Kvasir shard descriptor.""" - -import numpy as np -from tensorflow import keras - -from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor - - -class MNISTShardDescriptor(ShardDescriptor): - """Shard descriptor class.""" - - def __init__(self, rank_worldsize: str = '1,1') -> None: - """Initialize KvasirShardDescriptor.""" - super().__init__() - - (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() - x_train = np.reshape(x_train, (-1, 784)) - x_test = np.reshape(x_test, (-1, 784)) - self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) - - # Sharding - self.X_train = x_train[self.rank - 1::self.worldsize] - self.y_train = y_train[self.rank - 1::self.worldsize] - self.X_test = x_test[self.rank - 1::self.worldsize] - self.y_test = y_test[self.rank - 1::self.worldsize] - - # Calculating data and target shapes - sample, _ = self[0] - self._sample_shape = [str(dim) for dim in sample.shape] - self._target_shape = ['0'] - - def __getitem__(self, index): - """Return a item by the index.""" - if index < len(self.X_train): - return self.X_train[index], self.y_train[index] - index -= len(self.X_train) + 1 - return self.X_test[index], self.y_test[index] - - def __len__(self): - """Return the len of the dataset.""" - return len(self.X_train) + len(self.X_test) - - @property - def sample_shape(self): - """Return the sample shape info.""" - return self._sample_shape - - @property - def target_shape(self): - """Return the target shape info.""" - return self._target_shape - - @property - def dataset_description(self) -> str: - """Return the dataset description.""" - return (f'MNIST dataset, shard number {self.rank}' - f' out of {self.worldsize}') - - -if __name__ == '__main__': - from openfl.interface.cli import setup_logging - setup_logging() - - data_folder = 'data' - rank_worldsize = '1,100' - - mnist_sd = MNISTShardDescriptor( - rank_worldsize=rank_worldsize) - - print(mnist_sd.dataset_description) - print(mnist_sd.sample_shape, mnist_sd.target_shape) - - from openfl.component.envoy.envoy import Envoy - - shard_name = 'one' - director_uri = 'localhost:50051' - - keeper = Envoy( - shard_name=shard_name, - director_uri=director_uri, - shard_descriptor=mnist_sd, - tls=False, - root_ca='./cert/root_ca.crt', - key='./cert/one.key', - cert='./cert/one.crt', - ) - - keeper.start() diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/start_envoy.sh b/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/start_envoy.sh deleted file mode 100755 index 1dd6591439..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/envoy/start_envoy.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50051 \ No newline at end of file diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py deleted file mode 100644 index ec7bed09da..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py +++ /dev/null @@ -1,126 +0,0 @@ -import tensorflow as tf -# Create a federation -from openfl.interface.interactive_api.federation import Federation -from openfl.interface.interactive_api.experiment import TaskInterface, ModelInterface, FLExperiment -from tests.github.interactive_api_director.experiments.tensorflow_mnist.dataset import FedDataset -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import model -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import optimizer -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import loss_fn -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import train_acc_metric # noqa: E501 -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import val_acc_metric # noqa: E501 -from tests.github.interactive_api_director.experiments.tensorflow_mnist.envoy.shard_descriptor import MNISTShardDescriptor # noqa: E501 - - -def run(): - # please use the same identificator that was used in signed certificate - client_id = 'frontend' - - # 1) Run with API layer - Director mTLS - # If the user wants to enable mTLS their must provide CA root chain, - # and signed key pair to the federation interface - # cert_chain = 'cert/root_ca.crt' - # API_certificate = 'cert/frontend.crt' - # API_private_key = 'cert/frontend.key' - - # federation = Federation(client_id='frontend', director_node_fqdn='localhost', - # director_port='50051', cert_chain=cert_chain, - # api_cert=API_certificate, api_private_key=API_private_key) - - # --------------------------------------------------------------------------------------------- - - # 2) Run with TLS disabled (trusted environment) - # Federation can also determine local fqdn automatically - federation = Federation(client_id=client_id, director_node_fqdn='localhost', - director_port='50051', tls=False) - - shard_registry = federation.get_shard_registry() - print(shard_registry) - print(federation.target_shape) - fed_dataset = FedDataset(train_bs=4, valid_bs=8) - fed_dataset.shard_descriptor = MNISTShardDescriptor() - for batch in fed_dataset.get_train_loader(): - samples, _ = batch - for sample in samples: - print(sample.shape) - - framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin' - MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter) - - def function_defined_in_notebook(some_parameter): - print(f'Also I accept a parameter and it is {some_parameter}') - - TI = TaskInterface() - - # Task interface currently supports only standalone functions. - @TI.register_fl_task(model='model', data_loader='train_dataset', - device='device', optimizer='optimizer') - def train(model, train_dataset, optimizer, device, loss_fn=loss_fn, warmup=False): - # Iterate over the batches of the dataset. - for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): - with tf.GradientTape() as tape: - logits = model(x_batch_train, training=True) - loss_value = loss_fn(y_batch_train, logits) - grads = tape.gradient(loss_value, model.trainable_weights) - optimizer.apply_gradients(zip(grads, model.trainable_weights)) - - # Update training metric. - train_acc_metric.update_state(y_batch_train, logits) - - # Log every 200 batches. - if step % 200 == 0: - print( - "Training loss (for one batch) at step %d: %.4f" - % (step, float(loss_value)) - ) - print("Seen so far: %d samples" % ((step + 1) * 64)) - if warmup: - break - - # Display metrics at the end of each epoch. - train_acc = train_acc_metric.result() - print("Training acc over epoch: %.4f" % (float(train_acc),)) - - # Reset training metrics at the end of each epoch - train_acc_metric.reset_states() - - return {'train_acc': train_acc} - - @TI.register_fl_task(model='model', data_loader='val_dataset', device='device') - def validate(model, val_dataset, device): - # Run a validation loop at the end of each epoch. - for x_batch_val, y_batch_val in val_dataset: - val_logits = model(x_batch_val, training=False) - # Update val metrics - val_acc_metric.update_state(y_batch_val, val_logits) - val_acc = val_acc_metric.result() - val_acc_metric.reset_states() - print("Validation acc: %.4f" % (float(val_acc),)) - - return {'validation_accuracy': val_acc, } - # Save the initial model state - train(model, fed_dataset.get_train_loader(), optimizer, 'cpu', warmup=True) - initial_model = tf.keras.models.clone_model(model) - - # The Interactive API supports registering functions definied in main module or imported. - - # create an experimnet in federation - experiment_name = 'mnist_test_experiment' - fl_experiment = FLExperiment(federation=federation, - experiment_name=experiment_name, - serializer_plugin='openfl.plugins.interface_serializer.' - 'keras_serializer.KerasSerializer') - # If I use autoreload I got a pickling error - - # The following command zips the workspace and python requirements - # to be transfered to collaborator nodes - fl_experiment.start(model_provider=MI, - task_keeper=TI, - data_loader=fed_dataset, - rounds_to_train=2, - opt_treatment='CONTINUE_GLOBAL') - - fl_experiment.stream_metrics() - best_model = fl_experiment.get_best_model() - fl_experiment.remove_experiment_data() - validate(initial_model, fed_dataset.get_valid_loader(), 'cpu') - validate(best_model, fed_dataset.get_valid_loader(), 'cpu') diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/run.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/run.py deleted file mode 100755 index 05e2abe809..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/run.py +++ /dev/null @@ -1,11 +0,0 @@ -from pathlib import Path - -from tests.github.interactive_api_director import utils -from tests.github.interactive_api_director.experiments.tensorflow_mnist import experiment - - -if __name__ == '__main__': - root_dir = Path(__file__).parent - director = utils.start_director(root_dir / 'director') - envoy = utils.start_envoy(root_dir / 'envoy') - experiment.run() diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/settings.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/settings.py deleted file mode 100644 index b78ab1e5f2..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/settings.py +++ /dev/null @@ -1,35 +0,0 @@ -from tensorflow import keras -from tensorflow.keras import layers -import numpy as np - - -# Describe the model and optimizer - -inputs = keras.Input(shape=(784,), name="digits") -x1 = layers.Dense(64, activation="relu")(inputs) -x2 = layers.Dense(64, activation="relu")(x1) -outputs = layers.Dense(10, name="predictions")(x2) -model = keras.Model(inputs=inputs, outputs=outputs) - -# Instantiate an optimizer. -optimizer = keras.optimizers.legacy.SGD(learning_rate=1e-3) -# Instantiate a loss function. -loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) -model.compile(optimizer, loss_fn) -# Prepare the metrics. -train_acc_metric = keras.metrics.SparseCategoricalAccuracy() -val_acc_metric = keras.metrics.SparseCategoricalAccuracy() - - -# Prepare data - -# Prepare the training dataset. -batch_size = 64 -(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() -x_train = np.reshape(x_train, (-1, 784)) -x_test = np.reshape(x_test, (-1, 784)) - -X_valid = x_train[-10000:] -y_valid = y_train[-10000:] -X_train = x_train[:-10000] -y_train = y_train[:-10000] diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/tasks.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/tasks.py deleted file mode 100644 index b71c146a27..0000000000 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/tasks.py +++ /dev/null @@ -1,55 +0,0 @@ -from openfl.interface.interactive_api.experiment import TaskInterface -from tests.github.interactive_api.experiments.tensorflow_mnist.settings import loss_fn, \ - train_acc_metric, val_acc_metric - -task_interface = TaskInterface() - - -@task_interface.register_fl_task(model='model', data_loader='train_dataset', - device='device', optimizer='optimizer') -def train(model, train_dataset, optimizer, device, loss_fn=loss_fn, warmup=False): - import tensorflow as tf - - # Iterate over the batches of the dataset. - for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): - with tf.GradientTape() as tape: - logits = model(x_batch_train, training=True) - loss_value = loss_fn(y_batch_train, logits) - grads = tape.gradient(loss_value, model.trainable_weights) - optimizer.apply_gradients(zip(grads, model.trainable_weights)) - - # Update training metric. - train_acc_metric.update_state(y_batch_train, logits) - - # Log every 200 batches. - if step % 200 == 0: - print( - "Training loss (for one batch) at step %d: %.4f" - % (step, float(loss_value)) - ) - print("Seen so far: %d samples" % ((step + 1) * 64)) - if warmup: - break - - # Display metrics at the end of each epoch. - train_acc = train_acc_metric.result() - print("Training acc over epoch: %.4f" % (float(train_acc),)) - - # Reset training metrics at the end of each epoch - train_acc_metric.reset_states() - - return {'train_acc': train_acc} - - -@task_interface.register_fl_task(model='model', data_loader='val_dataset', device='device') -def validate(model, val_dataset, device): - # Run a validation loop at the end of each epoch. - for x_batch_val, y_batch_val in val_dataset: - val_logits = model(x_batch_val, training=False) - # Update val metrics - val_acc_metric.update_state(y_batch_val, val_logits) - val_acc = val_acc_metric.result() - val_acc_metric.reset_states() - print("Validation acc: %.4f" % (float(val_acc),)) - - return {'validation_accuracy': val_acc} diff --git a/tests/github/interactive_api_director/utils.py b/tests/github/interactive_api_director/utils.py deleted file mode 100644 index a2a86eb259..0000000000 --- a/tests/github/interactive_api_director/utils.py +++ /dev/null @@ -1,37 +0,0 @@ -import subprocess -import time -import sys - - -def start_director(cwd): - director = subprocess.Popen( - 'fx director start ' - '--disable-tls ' - '-c director_config.yaml', - cwd=cwd, shell=True - ) - time.sleep(3) - if director.poll() is not None: - print('Error: failed to create director') - sys.exit(1) - return director - - -def start_envoy(cwd): - subprocess.check_call( - [sys.executable, '-m', 'pip', 'install', '-r', 'sd_requirements.txt'], cwd=cwd - ) - envoy = subprocess.Popen( - 'fx envoy start ' - '-n env_one ' - '--disable-tls ' - '--envoy-config-path envoy_config.yaml ' - '-dh localhost ' - '-dp 50051', - cwd=cwd, shell=True - ) - time.sleep(10) - if envoy.poll() is not None: - print('Error: failed to create envoy') - sys.exit(1) - return envoy diff --git a/tests/github/interactive_api_director/voc_shard_descriptor.py b/tests/github/interactive_api_director/voc_shard_descriptor.py deleted file mode 100644 index 7897dceb34..0000000000 --- a/tests/github/interactive_api_director/voc_shard_descriptor.py +++ /dev/null @@ -1,124 +0,0 @@ -import os -import numpy as np -import xml.etree.ElementTree as ET -from PIL import Image - - -class ShardDescriptor: - - def __len__(self): - raise NotImplementedError - - def get_item(self, index: int): - # -> Tuple(np.ndarray, np.ndarray) - raise NotImplementedError - - @property - def sample_shape(self): - # int( sum( [str(dim) for dim in sample.shape] ) ) - raise NotImplementedError - - @property - def target_shape(self): - raise NotImplementedError - - @property - def dataset_description(self) -> str: - return '' - - -class VOCDataset_SD(ShardDescriptor): - class_names = ('__background__', - 'aeroplane', 'bicycle', 'bird', 'boat', - 'bottle', 'bus', 'car', 'cat', 'chair', - 'cow', 'diningtable', 'dog', 'horse', - 'motorbike', 'person', 'pottedplant', - 'sheep', 'sofa', 'train', 'tvmonitor') - - def __init__(self, data_dir, split, keep_difficult=False): - """Dataset for VOC data. - Args: - data_dir: the root of the VOC2007 or VOC2012 dataset, - the directory contains the following sub-directories: - Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. - """ - self.data_dir = data_dir - self.split = split - image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "%s.txt" % self.split) - self.ids = self._read_image_ids(image_sets_file) - self.keep_difficult = keep_difficult - - self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} - - def get_item(self, index: int): - # -> Tuple(np.ndarray, np.ndarray) - img_id = self.ids[index] - img = self._read_image(img_id) - target = self.get_annotation(index)[1] - return img, target - - def __len__(self): - return len(self.ids) - - @property - def sample_shape(self): - # int( sum( [str(dim) for dim in sample.shape] ) ) - return self.get_img_info(0) - - @property - def target_shape(self) -> int: - return 1 - - @property - def dataset_description(self) -> str: - return 'VOC2007' if '2007' in self.data_dir else 'VOC2012' - - def get_annotation(self, index): - image_id = self.ids[index] - return image_id, self._get_annotation(image_id) - - @staticmethod - def _read_image_ids(image_sets_file): - ids = [] - with open(image_sets_file, encoding='utf-8') as f: - for line in f: - ids.append(line.split(' ')[0]) - return ids - - def _get_annotation(self, image_id): - annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % image_id) - objects = ET.parse(annotation_file).findall("object") - boxes = [] - labels = [] - is_difficult = [] - for obj in objects: - class_name = obj.find('name').text.lower().strip() - bbox = obj.find('bndbox') - # VOC dataset format follows Matlab, in which indexes start from 0 - x1 = float(bbox.find('xmin').text) - 1 - y1 = float(bbox.find('ymin').text) - 1 - x2 = float(bbox.find('xmax').text) - 1 - y2 = float(bbox.find('ymax').text) - 1 - boxes.append([x1, y1, x2, y2]) - labels.append(self.class_dict[class_name]) - is_difficult_str = obj.find('difficult').text - is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) - - return (np.array(boxes, dtype=np.float32), - np.array(labels, dtype=np.int64), - np.array(is_difficult, dtype=np.uint8)) - - def get_img_info(self, index): - img_id = self.ids[index] - annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id) - anno = ET.parse(annotation_file).getroot() - size = anno.find("size") - im_info = tuple(map(int, ( - size.find('height').text, size.find('width').text, size.find('depth').text))) - return np.array(im_info) - - def _read_image(self, image_id): - image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id) - image = Image.open(image_file).convert("RGB") - image = np.array(image) - return image diff --git a/tests/github/pki_insecure_client.py b/tests/github/pki_insecure_client.py deleted file mode 100644 index 6246614f39..0000000000 --- a/tests/github/pki_insecure_client.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -from pathlib import Path -import time -import sys -import shutil -import subprocess -import signal -from openfl.interface.pki import run as run_pki - -from openfl.utilities.workspace import set_directory -from threading import Thread - - -CA_PATH = Path('~/.local/ca').expanduser() -CA_URL = 'localhost:9123' -CA_PASSWORD = 'qwerty' -DIRECTOR_SUBJECT_NAME = 'localhost' -WORKSPACE_PATH = Path('tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/') -EXPECTED_ERROR = ('Handshake failed with fatal error') - - -def start_ca_server(): - def start_server(): - try: - run_pki(str(CA_PATH)) - except subprocess.CalledProcessError as e: - if signal.Signals(-e.returncode) is signal.SIGKILL: # intended stop signal - pass - else: - raise - ca_server = Thread(target=start_server, daemon=True) - ca_server.start() - time.sleep(3) # Wait for server to initialize - - if not ca_server.is_alive(): - print('CA Server failed to initialize') - sys.exit(1) - return ca_server - - -def install_ca(): - pki_install = subprocess.Popen([ - 'fx', 'pki', 'install', - '-p', str(CA_PATH), - '--password', str(CA_PASSWORD) - ], stdin=subprocess.PIPE) - time.sleep(3) - pki_install.communicate('y'.encode('utf-8')) - - -def get_token(): - token_output = subprocess.check_output([ - 'fx', 'pki', 'get-token', - '-n', DIRECTOR_SUBJECT_NAME, - '-p', str(CA_PATH) - ]) - return token_output.decode('utf-8').split('\n')[1] - - -def certify_director(token): - subprocess.check_call([ - 'fx', 'pki', 'certify', - '-n', DIRECTOR_SUBJECT_NAME, - '-t', token, - '-c', f'{CA_PATH / "cert"}', - '-p', str(CA_PATH) - ]) - - -def start_director(tls=True): - director_config_path = WORKSPACE_PATH / 'director' / 'director_config.yaml' - root_cert_path = CA_PATH / 'cert' / 'root_ca.crt' - private_key_path = CA_PATH / 'cert' / f'{DIRECTOR_SUBJECT_NAME}.key' - public_cert_path = CA_PATH / 'cert' / f'{DIRECTOR_SUBJECT_NAME}.crt' - params = [ - '-c', str(director_config_path), - '-rc', str(root_cert_path), - '-pk', str(private_key_path), - '-oc', str(public_cert_path) - ] - if not tls: - params.append('--disable-tls') - director = subprocess.Popen([ - 'fx', 'director', 'start', - *params - ], stderr=subprocess.PIPE) - return director - - -def stop_server(ca_server): - subprocess.check_call([ - 'fx', 'pki', 'uninstall', - '-p', str(CA_PATH) - ]) - if ca_server.is_alive(): - ca_server.join() - - -def start_envoy(tls=False): - envoy_folder = WORKSPACE_PATH / 'envoy' - with set_directory(envoy_folder): - subprocess.check_call([ - sys.executable, '-m', 'pip', 'install', '-r', 'sd_requirements.txt' - ]) - params = [ - '-n', 'one', - '-dh', 'localhost', - '-dp', '50051', - '--disable-tls', - '-ec', 'envoy_config.yaml' - ] - if not tls: - params.append('--disable-tls') - envoy = subprocess.Popen([ - 'fx', 'envoy', 'start', - *params - ]) - return envoy - - -if __name__ == '__main__': - shutil.rmtree(CA_PATH, ignore_errors=True) - install_ca() - ca_server = start_ca_server() - try: - token = get_token() - certify_director(token) - director = start_director(tls=True) - time.sleep(3) # Wait for Director to start - try: - envoy = start_envoy(tls=False) - time.sleep(3) # Wait for envoy to start - # Expectation is that secure server won't let insecure client connect - while True: - tmp = director.stderr.readline().decode('utf-8') - if EXPECTED_ERROR in tmp: - print('Handshake failed for insecure connections as expected.') - break - finally: - director.kill() - finally: - stop_server(ca_server) diff --git a/tests/openfl/api_layer/__init__.py b/tests/openfl/api_layer/__init__.py deleted file mode 100644 index e9f15b3a98..0000000000 --- a/tests/openfl/api_layer/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""tests.openfl.ap_layer package.""" diff --git a/tests/openfl/api_layer/test_experiment.py b/tests/openfl/api_layer/test_experiment.py deleted file mode 100644 index 4dcdd9ba41..0000000000 --- a/tests/openfl/api_layer/test_experiment.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Experiment API tests module.""" - -import pytest - -from openfl.interface.interactive_api.experiment import FLExperiment -from .test_federation import federation_object # NOQA -# TaskInterface, DataInterface, ModelInterface, - -EXPERIMENT_MAME = 'test experiment' - - -@pytest.fixture -def experiment_object(federation_object): # NOQA - """Experiment object fixture.""" - experiment_object = FLExperiment( - federation=federation_object, - experiment_name=EXPERIMENT_MAME) - return experiment_object - - -def test_initialization(experiment_object): - """Test experimnet object initialization.""" - assert not experiment_object.experiment_submitted - assert experiment_object.serializer_plugin - - -def test_get_best_model(experiment_object): - """Test get_best_model method.""" - model = experiment_object.get_best_model() - assert model is None diff --git a/tests/openfl/api_layer/test_federation.py b/tests/openfl/api_layer/test_federation.py deleted file mode 100644 index abaf436c9c..0000000000 --- a/tests/openfl/api_layer/test_federation.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Federation API tests module.""" - -from unittest import mock - -import pytest - -from openfl.interface.interactive_api.federation import Federation - -CLIENT_ID = 'id test' -SAMPLE_SHAPE = (10, 10, 3) -TARGET_SHAPE = (2,) - - -@pytest.fixture -@mock.patch('openfl.interface.interactive_api.federation.DirectorClient') -def federation_object(mock_client_class): - """Federation object fixture.""" - mock_client_instance = mock.Mock() - mock_client_class.return_value = mock_client_instance - mock_client_instance.get_dataset_info.return_value = (SAMPLE_SHAPE, TARGET_SHAPE) - return Federation(client_id=CLIENT_ID) - - -def test_federation_initialization(federation_object): - """Test Federation initialization.""" - assert federation_object.sample_shape == SAMPLE_SHAPE - assert federation_object.target_shape == TARGET_SHAPE - federation_object.dir_client.get_dataset_info.assert_called_once() - - -def test_dummy_shard_descriptor(federation_object): - """Test dummy shard descriptor object.""" - dummy_shard_desc = federation_object.get_dummy_shard_descriptor(10) - dummy_shard_dataset = dummy_shard_desc.get_dataset('') - sample, target = dummy_shard_dataset[0] - assert sample.shape == SAMPLE_SHAPE - assert target.shape == TARGET_SHAPE diff --git a/tests/openfl/communication/__init__.py b/tests/openfl/communication/__init__.py deleted file mode 100644 index 7cb606da66..0000000000 --- a/tests/openfl/communication/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""tests.openfl.communication package.""" diff --git a/tests/openfl/communication/test_api_client.py b/tests/openfl/communication/test_api_client.py deleted file mode 100644 index d781b65dc2..0000000000 --- a/tests/openfl/communication/test_api_client.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Derector API's client tests module.""" - -import sys -from unittest import mock - -import pytest - -from openfl.protocols import director_pb2 -from openfl.transport.grpc.director_client import DirectorClient - - -@pytest.fixture -@mock.patch('openfl.transport.grpc.director_client.director_pb2_grpc') -def director_client(director_pb2_grpc): - """Director client fixture.""" - director_pb2_grpc.DirectorStub.return_value = mock.Mock() - - client_id = 'one' - director_host = 'localhost' - director_port = 50051 - tls = False - root_certificate, private_key, certificate = None, None, None - director_client = DirectorClient( - director_host=director_host, - director_port=director_port, - client_id=client_id, - tls=tls, - root_certificate=root_certificate, - private_key=private_key, - certificate=certificate - ) - return director_client - - -def test_get_dataset_info(director_client): - """Test get_dataset_info RPC.""" - director_client.get_dataset_info() - director_client.stub.GetDatasetInfo.assert_called_once() - - -@pytest.mark.parametrize( - 'clients_method,model_type', [ - ('get_best_model', 'BEST_MODEL'), - ('get_last_model', 'LAST_MODEL'), - ]) -@mock.patch('openfl.transport.grpc.director_client.deconstruct_model_proto') -def test_get_best_model(deconstruct_model_proto, director_client, - clients_method, model_type): - """Test get_best_model RPC.""" - deconstruct_model_proto.return_value = {}, {} - getattr(director_client, clients_method)('test name') - director_client.stub.GetTrainedModel.assert_called_once() - - request = director_client.stub.GetTrainedModel.call_args - if sys.version_info < (3, 8): - incoming_model_type = request[0][0].model_type - else: - incoming_model_type = request.args[0].model_type - assert incoming_model_type == getattr(director_pb2.GetTrainedModelRequest, model_type) diff --git a/tests/openfl/communication/test_envoys_client.py b/tests/openfl/communication/test_envoys_client.py deleted file mode 100644 index 9304f9807a..0000000000 --- a/tests/openfl/communication/test_envoys_client.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Derector Envoy's client tests module.""" - -import sys -from unittest import mock - -import pytest - -from openfl.transport.grpc.director_client import ShardDirectorClient - - -@pytest.fixture -@mock.patch('openfl.transport.grpc.director_client.director_pb2_grpc') -def director_client(director_pb2_grpc): - """Director client fixture.""" - director_pb2_grpc.DirectorStub.return_value = mock.Mock() - - director_host = 'fqdn' - director_port = 50051 - shard_name = 'test shard' - tls = False - root_certificate, private_key, certificate = None, None, None - director_client = ShardDirectorClient( - director_host=director_host, - director_port=director_port, - shard_name=shard_name, - tls=tls, - root_certificate=root_certificate, - private_key=private_key, - certificate=certificate, - ) - return director_client - - -def test_report_shard_info(director_client): - """Test report_shard_info RPC.""" - shard_descriptor = mock.MagicMock() - shard_descriptor.dataset_description = 'description' - shard_descriptor.__len__.return_value = 10 - shard_descriptor.sample_shape = [str(dim) for dim in (1, 2)] - shard_descriptor.target_shape = [str(dim) for dim in (10,)] - - cuda_devices = () - - director_client.report_shard_info(shard_descriptor, cuda_devices) - - director_client.stub.UpdateShardInfo.assert_called_once() - if sys.version_info < (3, 8): - resp = director_client.stub.UpdateShardInfo.call_args[0][0] - else: - resp = director_client.stub.UpdateShardInfo.call_args.args[0] - assert resp.shard_info.shard_description == shard_descriptor.dataset_description - assert resp.shard_info.sample_shape == shard_descriptor.sample_shape diff --git a/tests/openfl/component/director/test_experiment_representation.py b/tests/openfl/component/director/test_experiment_representation.py deleted file mode 100644 index 45fca4dd52..0000000000 --- a/tests/openfl/component/director/test_experiment_representation.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Experiment representation class tests module.""" - -from pathlib import Path -from unittest import mock - -import pytest - -from openfl.component.director.experiment import Experiment - - -@pytest.fixture -@mock.patch('openfl.component.director.experiment.Path') -def experiment_rep(_): - """Initialize an experiment.""" - experiment = Experiment( - name='test_exp', - archive_path=mock.MagicMock(spec_set=Path), - collaborators=['test_col'], - sender='test_user', - init_tensor_dict={}, - ) - return experiment - - -@pytest.mark.asyncio -@mock.patch('openfl.component.director.experiment.ExperimentWorkspace') -@pytest.mark.parametrize('review_result,archive_unlink_call_count,experiment_status', - [(True, 0, 'pending'), (False, 1, 'rejected')] - ) -async def test_review_experiment( - ExperimentWorkspace, experiment_rep, review_result, # noqa: N803 - archive_unlink_call_count, experiment_status -): - """Review experiment method test.""" - review_callback = mock.Mock() - review_callback.return_value = review_result - - result = await experiment_rep.review_experiment(review_plan_callback=review_callback) - - assert result is review_callback.return_value - ExperimentWorkspace.assert_called_once_with( - 'test_exp', - experiment_rep.archive_path, - is_install_requirements=False, - remove_archive=False - ) - assert experiment_rep.archive_path.unlink.call_count == archive_unlink_call_count - assert experiment_rep.status == experiment_status diff --git a/tests/openfl/interface/interactive_api/test_experiment.py b/tests/openfl/interface/interactive_api/test_experiment.py deleted file mode 100644 index 3c3c0900c5..0000000000 --- a/tests/openfl/interface/interactive_api/test_experiment.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Experiment tests.""" - -from unittest import mock - -import pytest - -from openfl.component.assigner.tasks import TrainTask -from openfl.component.assigner.tasks import ValidateTask -from openfl.interface.interactive_api.experiment import FLExperiment -from openfl.interface.interactive_api.experiment import TaskKeeper - - -@pytest.fixture() -def all_registered_tasks(): - """Return all registered tasks fixture.""" - return { - 'train': TrainTask( - name='train', - function_name='train_func', - ), - 'locally_tuned_model_validate': ValidateTask( - name='locally_tuned_model_validate', - function_name='validate', - apply_local=True, - ), - 'aggregated_model_validate': ValidateTask( - name='aggregated_model_validate', - function_name='validate', - ), - } - - -def test_define_task_assigner_all_tasks(all_registered_tasks): - """Test define_task_assigner if all task types are registered.""" - task_keeper = TaskKeeper() - task_keeper.get_registered_tasks = mock.Mock(return_value=all_registered_tasks) - rounds_to_train = 10 - task_assigner_fn = FLExperiment(None).define_task_assigner(task_keeper, rounds_to_train) - tasks_by_collaborator = task_assigner_fn(['one', 'two'], 1) - assert tasks_by_collaborator['one'] == list(all_registered_tasks.values()) - - -def test_define_task_assigner_val_tasks(): - """Test define_task_assigner if only validate types are registered.""" - task_keeper = TaskKeeper() - agg_task = ValidateTask( - name='aggregated_model_validate', - function_name='validate', - ) - task_keeper.get_registered_tasks = mock.Mock(return_value={ - 'aggregated_model_validate': agg_task - }) - rounds_to_train = 1 - task_assigner_fn = FLExperiment(None).define_task_assigner(task_keeper, rounds_to_train) - tasks_by_collaborator = task_assigner_fn(['one', 'two'], 1) - assert tasks_by_collaborator['one'] == [agg_task] - - -def test_define_task_assigner_exception_validate(): - """Test define_task_assigner if only validate tasks are registered and rounds more than 1.""" - task_keeper = TaskKeeper() - agg_task = ValidateTask( - name='aggregated_model_validate', - function_name='validate', - ) - task_keeper.get_registered_tasks = mock.Mock(return_value={ - 'aggregated_model_validate': agg_task - }) - rounds_to_train = 10 - with pytest.raises(Exception): - FLExperiment(None).define_task_assigner(task_keeper, rounds_to_train) - - -def test_define_task_assigner_exception_only_train(): - """Test define_task_assigner if only train task types are registered.""" - task_keeper = TaskKeeper() - train_task = TrainTask( - name='train', - function_name='train', - ) - task_keeper.get_registered_tasks = mock.Mock(return_value={ - 'train': train_task - }) - rounds_to_train = 10 - with pytest.raises(Exception): - FLExperiment(None).define_task_assigner(task_keeper, rounds_to_train) - - -def test_define_task_assigner_exception_no_tasks(): - """Test define_task_assigner if no tasks are registered.""" - task_keeper = TaskKeeper() - task_keeper.get_registered_tasks = mock.Mock(return_value={}) - rounds_to_train = 1 - with pytest.raises(Exception): - FLExperiment(None).define_task_assigner(task_keeper, rounds_to_train) diff --git a/tests/openfl/transport/__init__.py b/tests/openfl/transport/__init__.py deleted file mode 100644 index e51592e007..0000000000 --- a/tests/openfl/transport/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""tests.openfl.transport package.""" diff --git a/tests/openfl/transport/grpc/__init__.py b/tests/openfl/transport/grpc/__init__.py deleted file mode 100644 index 2b941133b3..0000000000 --- a/tests/openfl/transport/grpc/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""tests.openfl.transport.grpc package.""" diff --git a/tests/openfl/transport/grpc/test_director_server.py b/tests/openfl/transport/grpc/test_director_server.py deleted file mode 100644 index 138443cfbc..0000000000 --- a/tests/openfl/transport/grpc/test_director_server.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""Director tests module.""" - -from pathlib import Path -from unittest import mock - -import pytest - -from openfl.component.director import Director -from openfl.transport import DirectorGRPCServer - - -@pytest.fixture -def insecure_director(): - """Initialize an insecure director mock.""" - director = DirectorGRPCServer(director_cls=Director, tls=False) - - return director - - -@pytest.fixture -def secure_director(): - """Initialize a secure director mock.""" - director = DirectorGRPCServer( - director_cls=Director, - root_certificate=Path('./cert/root_ca.crt').absolute(), - private_key=Path('./cert/localhost.key').absolute(), - certificate=Path('./cert/localhost.crt').absolute() - ) - return director - - -def test_fill_certs(insecure_director, secure_director): - """Test that fill_cert fill certificates params correctly.""" - assert insecure_director.root_certificate is None - assert insecure_director.private_key is None - assert insecure_director.certificate is None - assert isinstance(secure_director.root_certificate, Path) - assert isinstance(secure_director.private_key, Path) - assert isinstance(secure_director.certificate, Path) - with pytest.raises(Exception): - secure_director._fill_certs('.', '.', None) - with pytest.raises(Exception): - secure_director._fill_certs('.', None, '.') - with pytest.raises(Exception): - secure_director._fill_certs(None, '.', '.') - secure_director._fill_certs('.', '.', '.') - - -def test_get_caller_tls(insecure_director): - """Test that get_caller works correctly with TLS.""" - insecure_director.tls = True - context = mock.Mock() - client_id = 'client_id' - context.auth_context = mock.Mock( - return_value={'x509_common_name': [client_id.encode('utf-8')]} - ) - result = insecure_director.get_caller(context) - assert result == client_id - - -def test_get_sender_no_tls(insecure_director): - """Test that get_sender works correctly without TLS.""" - context = mock.Mock() - client_id = 'client_id' - context.invocation_metadata.return_value = (('client_id', client_id),) - result = insecure_director.get_caller(context) - assert result == client_id - - -def test_get_sender_no_tls_no_client_id(insecure_director): - """Test that get_sender works correctly without TLS and client_id.""" - context = mock.Mock() - context.invocation_metadata = mock.Mock() - context.invocation_metadata.return_value = (('key', 'value'),) - default_client_id = '__default__' - result = insecure_director.get_caller(context) - assert result == default_client_id