diff --git a/.github/workflows/task_runner_e2e.yml b/.github/workflows/task_runner_e2e.yml index 7f7f904aa3..9603db81cf 100644 --- a/.github/workflows/task_runner_e2e.yml +++ b/.github/workflows/task_runner_e2e.yml @@ -1,3 +1,4 @@ +--- #--------------------------------------------------------------------------- # Workflow to run Task Runner end to end tests # Authors - Noopur, Payal Chaurasiya @@ -6,16 +7,16 @@ name: Task Runner E2E on: schedule: - - cron: '0 0 * * *' # Run every day at midnight + - cron: "0 0 * * *" # Run every day at midnight workflow_dispatch: inputs: num_rounds: - description: 'Number of rounds to train' + description: "Number of rounds to train" required: false default: "5" type: string num_collaborators: - description: 'Number of collaborators' + description: "Number of collaborators" required: false default: "2" type: string @@ -29,16 +30,16 @@ env: NUM_COLLABORATORS: ${{ inputs.num_collaborators || '2' }} jobs: - test_run: - name: tr + test: + name: tr_tls runs-on: ubuntu-22.04 timeout-minutes: 120 # 2 hours strategy: matrix: # There are open issues for some of the models, so excluding them for now: # model_name: [ "torch_cnn_mnist", "keras_cnn_mnist", "torch_cnn_histology" ] - model_name: [ "torch_cnn_mnist", "keras_cnn_mnist" ] - python_version: [ "3.8", "3.9", "3.10" ] + model_name: ["torch_cnn_mnist", "keras_cnn_mnist"] + python_version: ["3.8", "3.9", "3.10"] fail-fast: false # do not immediately fail if one of the combinations fail env: @@ -46,50 +47,115 @@ jobs: PYTHON_VERSION: ${{ matrix.python_version }} steps: - - name: Checkout OpenFL repository - id: checkout_openfl - uses: actions/checkout@v4.1.1 - with: - fetch-depth: 2 # needed for detecting changes - submodules: "true" - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up Python - id: setup_python - uses: actions/setup-python@v3 - with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Install dependencies - id: install_dependencies - run: | - python -m pip install --upgrade pip - pip install . - pip install -r test-requirements.txt - - - name: Run Task Runner E2E tests - id: run_task_runner_tests - run: | - python -m pytest -s tests/end_to_end/test_suites/task_runner_tests.py -m ${{ env.MODEL_NAME }} --num_rounds $NUM_ROUNDS --num_collaborators $NUM_COLLABORATORS --model_name ${{ env.MODEL_NAME }} - echo "Task runner end to end test run completed" - - - name: Print test summary # Print the test summary only if the tests were run - id: print_test_summary - if: steps.run_task_runner_tests.outcome == 'success' || steps.run_task_runner_tests.outcome == 'failure' - run: | - export PYTHONPATH="$PYTHONPATH:." - python tests/end_to_end/utils/summary_helper.py - echo "Test summary printed" - - - name: Tar files # Tar the test results only if the tests were run - id: tar_files - if: steps.run_task_runner_tests.outcome == 'success' || steps.run_task_runner_tests.outcome == 'failure' - run: tar -cvf result.tar results - - - name: Upload Artifacts # Upload the test results only if the tar was created - id: upload_artifacts - uses: actions/upload-artifact@v4 - if: steps.tar_files.outcome == 'success' - with: - name: task_runner_${{ env.MODEL_NAME }}_python${{ env.PYTHON_VERSION }}_${{ github.run_id }} - path: result.tar + - name: Checkout OpenFL repository + id: checkout_openfl + uses: actions/checkout@v4.1.1 + with: + fetch-depth: 2 # needed for detecting changes + submodules: "true" + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + id: setup_python + uses: actions/setup-python@v3 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + id: install_dependencies + run: | + python -m pip install --upgrade pip + pip install . + pip install -r test-requirements.txt + + - name: Run Task Runner E2E tests with TLS + id: run_tests + run: | + python -m pytest -s tests/end_to_end/test_suites/task_runner_tests.py -m ${{ env.MODEL_NAME }} --num_rounds $NUM_ROUNDS --num_collaborators $NUM_COLLABORATORS --model_name ${{ env.MODEL_NAME }} + echo "Task runner end to end test run completed" + + - name: Print test summary # Print the test summary only if the tests were run + id: print_test_summary + if: steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure' + run: | + export PYTHONPATH="$PYTHONPATH:." + python tests/end_to_end/utils/summary_helper.py + echo "Test summary printed" + + - name: Tar files # Tar the test results only if the tests were run + id: tar_files + if: steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure' + run: tar -cvf result.tar results + + - name: Upload Artifacts # Upload the test results only if the tar was created + id: upload_artifacts + uses: actions/upload-artifact@v4 + if: steps.tar_files.outcome == 'success' + with: + name: task_runner_tls_${{ env.MODEL_NAME }}_python${{ env.PYTHON_VERSION }}_${{ github.run_id }} + path: result.tar + + test_with_non_tls: + name: tr_non_tls + runs-on: ubuntu-22.04 + timeout-minutes: 120 # 2 hours + strategy: + matrix: + # Testing non TLS scenario only for torch_cnn_mnist model and python 3.10 + # If required, this can be extended to other models and python versions + model_name: ["torch_cnn_mnist"] + python_version: ["3.10"] + fail-fast: false # do not immediately fail if one of the combinations fail + + env: + MODEL_NAME: ${{ matrix.model_name }} + PYTHON_VERSION: ${{ matrix.python_version }} + + steps: + - name: Checkout OpenFL repository + id: checkout_openfl + uses: actions/checkout@v4.1.1 + with: + fetch-depth: 2 # needed for detecting changes + submodules: "true" + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + id: setup_python + uses: actions/setup-python@v3 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + id: install_dependencies + run: | + python -m pip install --upgrade pip + pip install . + pip install -r test-requirements.txt + + - name: Run Task Runner E2E tests without TLS + id: run_tests + run: | + python -m pytest -s tests/end_to_end/test_suites/task_runner_tests.py -m ${{ env.MODEL_NAME }} --num_rounds $NUM_ROUNDS --num_collaborators $NUM_COLLABORATORS --disable_tls + echo "Task runner end to end test run completed" + + - name: Print test summary # Print the test summary only if the tests were run + id: print_test_summary + if: steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure' + run: | + export PYTHONPATH="$PYTHONPATH:." + python tests/end_to_end/utils/summary_helper.py + echo "Test summary printed" + + - name: Tar files # Tar the test results only if the tests were run + id: tar_files + if: steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure' + run: tar -cvf result.tar results + + - name: Upload Artifacts # Upload the test results only if the tar was created + id: upload_artifacts + uses: actions/upload-artifact@v4 + if: steps.tar_files.outcome == 'success' + with: + name: task_runner_non_tls_${{ env.MODEL_NAME }}_python${{ env.PYTHON_VERSION }}_${{ github.run_id }} + path: result.tar diff --git a/tests/end_to_end/README.md b/tests/end_to_end/README.md index 3971b67986..ae725a170f 100644 --- a/tests/end_to_end/README.md +++ b/tests/end_to_end/README.md @@ -36,15 +36,24 @@ pip install -r test-requirements.txt To run a specific test case, use below command: ```sh -python -m pytest tests/end_to_end/test_suites/ -k -s +python -m pytest -s tests/end_to_end/test_suites/ -k ``` ** -s will ensure all the logs are printed on screen. Ignore, if not required. -To modify the number of collaborators, rounds to train and/or model name, use below parameters: -1. --num_collaborators -2. --num_rounds -3. --model_name +Below parameters are available for modification: + +1. --num_collaborators - to modify the number of collaborators +2. --num_rounds - to modify the number of rounds to train +3. --model_name - to use a specific model +4. --disable_tls - to disable TLS communication (by default it is enabled) +5. --disable_client_auth - to disable the client authentication (by default it is enabled) + +For example, to run Task runner with - torch_cnn_mnist model, 3 collaborators, 5 rounds and non-TLS scenario: + +```sh +python -m pytest -s tests/end_to_end/test_suites/task_runner_tests.py --num_rounds 5 --num_collaborators 3 --model_name torch_cnn_mnist --disable_tls +``` ### Output Structure diff --git a/tests/end_to_end/conftest.py b/tests/end_to_end/conftest.py index 3ccffb0e0f..d2c9c20f89 100644 --- a/tests/end_to_end/conftest.py +++ b/tests/end_to_end/conftest.py @@ -17,7 +17,7 @@ # Define a named tuple to store the objects for model owner, aggregator, and collaborators federation_fixture = collections.namedtuple( "federation_fixture", - "model_owner, aggregator, collaborators, model_name, workspace_path, results_dir", + "model_owner, aggregator, collaborators, model_name, disable_client_auth, disable_tls, workspace_path, results_dir", ) @@ -50,9 +50,18 @@ def pytest_addoption(parser): "--model_name", action="store", type=str, - default=constants.DEFAULT_MODEL_NAME, help="Model name", ) + parser.addoption( + "--disable_client_auth", + action="store_true", + help="Disable client authentication", + ) + parser.addoption( + "--disable_tls", + action="store_true", + help="Disable TLS for communication", + ) @pytest.fixture(scope="session", autouse=True) @@ -199,7 +208,7 @@ def pytest_sessionfinish(session, exitstatus): log.debug(f"Cleared .pytest_cache directory at {cache_dir}") -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def fx_federation(request, pytestconfig): """ Fixture for federation. This fixture is used to create the model owner, aggregator, and collaborators. @@ -211,18 +220,29 @@ def fx_federation(request, pytestconfig): Returns: federation_fixture: Named tuple containing the objects for model owner, aggregator, and collaborators - Note: As this is a module level fixture, thus no import is required at test level. + Note: As this is a function level fixture, thus no import is required at test level. """ - log.info("Fixture for federation setup using Task Runner API on single machine.") collaborators = [] agg_domain_name = "localhost" # Parse the command line arguments args = parse_arguments() - model_name = args.model_name + # Use the model name from the test case name if not provided as a command line argument + model_name = args.model_name if args.model_name else request.node.name.split("test_")[1] results_dir = args.results_dir or pytestconfig.getini("results_dir") num_collaborators = args.num_collaborators num_rounds = args.num_rounds + disable_client_auth = args.disable_client_auth + disable_tls = args.disable_tls + + log.info( + f"Running federation setup using Task Runner API on single machine with below configurations:\n" + f"\tNumber of collaborators: {num_collaborators}\n" + f"\tNumber of rounds: {num_rounds}\n" + f"\tModel name: {model_name}\n" + f"\tClient authentication: {not disable_client_auth}\n" + f"\tTLS: {not disable_tls}" + ) # Validate the model name and create the workspace name if not model_name.upper() in constants.ModelName._member_names_: @@ -238,26 +258,42 @@ def fx_federation(request, pytestconfig): log.error(f"Failed to create the workspace: {e}") raise e - # Modify and initialize the plan + # Modify the plan try: - model_owner.modify_plan(new_rounds=num_rounds, num_collaborators=num_collaborators) + model_owner.modify_plan( + new_rounds=num_rounds, + num_collaborators=num_collaborators, + disable_client_auth=disable_client_auth, + disable_tls=disable_tls, + ) except Exception as e: log.error(f"Failed to modify the plan: {e}") raise e + # For TLS enabled (default) scenario: when the workspace is certified, the collaborators are registered as well + # For TLS disabled scenario: collaborators need to be registered explicitly + if args.disable_tls: + log.info("Disabling TLS for communication") + try: + model_owner.register_collaborators(num_collaborators) + except Exception as e: + log.error(f"Failed to register the collaborators: {e}") + raise e + else: + log.info("Enabling TLS for communication") + try: + model_owner.certify_workspace() + except Exception as e: + log.error(f"Failed to certify the workspace: {e}") + raise e + + # Initialize the plan try: model_owner.initialize_plan(agg_domain_name=agg_domain_name) except Exception as e: log.error(f"Failed to initialize the plan: {e}") raise e - # Modify and initialize the plan - try: - model_owner.certify_workspace() - except Exception as e: - log.error(f"Failed to certify the workspace: {e}") - raise e - # Create the objects for aggregator and collaborators aggregator = participants.Aggregator( agg_domain_name=agg_domain_name, workspace_path=workspace_path @@ -269,6 +305,7 @@ def fx_federation(request, pytestconfig): data_directory_path=i + 1, workspace_path=workspace_path, ) + collaborator.create_collaborator() collaborators.append(collaborator) # Return the federation fixture @@ -277,6 +314,8 @@ def fx_federation(request, pytestconfig): aggregator=aggregator, collaborators=collaborators, model_name=model_name, + disable_client_auth=disable_client_auth, + disable_tls=disable_tls, workspace_path=workspace_path, results_dir=results_dir, ) diff --git a/tests/end_to_end/models/participants.py b/tests/end_to_end/models/participants.py index 5dc582a06c..5bde7f39ec 100644 --- a/tests/end_to_end/models/participants.py +++ b/tests/end_to_end/models/participants.py @@ -112,17 +112,18 @@ def certify_collaborator(self, collaborator_name): raise e return True - def modify_plan(self, new_rounds=None, num_collaborators=None): + def modify_plan(self, new_rounds=None, num_collaborators=None, disable_client_auth=False, disable_tls=False): """ Modify the plan to train the model Args: new_rounds (int): Number of rounds to train num_collaborators (int): Number of collaborators + disable_client_auth (bool): Disable client authentication + disable_tls (bool): Disable TLS communication Returns: bool: True if successful, else False """ self.plan_path = os.path.join(self.workspace_path, "plan", "plan.yaml") - log.info(f"Modifying the plan at {self.plan_path}") # Open the file and modify the entries self.rounds_to_train = new_rounds if new_rounds else self.rounds_to_train self.num_collaborators = num_collaborators if num_collaborators else self.num_collaborators @@ -132,13 +133,13 @@ def modify_plan(self, new_rounds=None, num_collaborators=None): data["aggregator"]["settings"]["rounds_to_train"] = int(self.rounds_to_train) data["data_loader"]["settings"]["collaborator_count"] = int(self.num_collaborators) + data["network"]["settings"]["disable_client_auth"] = disable_client_auth + data["network"]["settings"]["tls"] = not disable_tls with open(self.plan_path, "w+") as write_file: yaml.dump(data, write_file) - log.info( - f"Modified the plan to train the model for collaborators {self.num_collaborators} and {self.rounds_to_train} rounds" - ) + log.info(f"Modified the plan at {self.plan_path} with provided parameters.") return True def initialize_plan(self, agg_domain_name): @@ -180,6 +181,41 @@ def certify_workspace(self): raise e return True + def register_collaborators(self, num_collaborators=None): + """ + Register the collaborators + Args: + num_collaborators (int, Optional): Number of collaborators + Returns: + bool: True if successful, else False + """ + self.cols_path = os.path.join(self.workspace_path, "plan", "cols.yaml") + log.info(f"Registering the collaborators..") + self.num_collaborators = num_collaborators if num_collaborators else self.num_collaborators + + try: + # Straightforward writing to the yaml file is not recommended here + # As the file might contain spaces and tabs which can cause issues + with open(self.cols_path, "r", encoding="utf-8") as f: + doc = yaml.load(f, Loader=yaml.FullLoader) + + if "collaborators" not in doc.keys() or not doc["collaborators"]: + doc["collaborators"] = [] # Create empty list + + for i in range(num_collaborators): + col_name = "collaborator" + str(i+1) + doc["collaborators"].append(col_name) + with open(self.cols_path, "w", encoding="utf-8") as f: + yaml.dump(doc, f) + + log.info( + f"Successfully registered collaborators in {self.cols_path}" + ) + except Exception as e: + log.error(f"Failed to register the collaborators: {e}") + raise e + return True + def certify_aggregator(self, agg_domain_name): """ Certify the aggregator request diff --git a/tests/end_to_end/test_suites/sample_tests.py b/tests/end_to_end/test_suites/sample_tests.py index 7c528277e8..a27bf76cbf 100644 --- a/tests/end_to_end/test_suites/sample_tests.py +++ b/tests/end_to_end/test_suites/sample_tests.py @@ -19,8 +19,8 @@ # 7. Start the federation using aggregator and given no of collaborators. # 8. Verify the completion of the federation run. -@pytest.mark.sample_model -def test_sample_model(fx_federation): +@pytest.mark.sample_model_name +def test_sample_model_name(fx_federation): """ Add a proper docstring here. """ diff --git a/tests/end_to_end/test_suites/task_runner_tests.py b/tests/end_to_end/test_suites/task_runner_tests.py index a80c583acf..371fee8f08 100644 --- a/tests/end_to_end/test_suites/task_runner_tests.py +++ b/tests/end_to_end/test_suites/task_runner_tests.py @@ -17,7 +17,8 @@ def test_torch_cnn_mnist(fx_federation): log.info("Testing torch_cnn_mnist model") # Setup PKI for trusted communication within the federation - assert fed_helper.setup_pki(fx_federation), "Failed to setup PKI for trusted communication" + if not fx_federation.disable_tls: + assert fed_helper.setup_pki(fx_federation), "Failed to setup PKI for trusted communication" # Start the federation results = fed_helper.run_federation(fx_federation) @@ -31,7 +32,8 @@ def test_keras_cnn_mnist(fx_federation): log.info("Testing keras_cnn_mnist model") # Setup PKI for trusted communication within the federation - assert fed_helper.setup_pki(fx_federation), "Failed to setup PKI for trusted communication" + if not fx_federation.disable_tls: + assert fed_helper.setup_pki(fx_federation), "Failed to setup PKI for trusted communication" # Start the federation results = fed_helper.run_federation(fx_federation) @@ -48,7 +50,8 @@ def test_torch_cnn_histology(fx_federation): log.info("Testing torch_cnn_histology model") # Setup PKI for trusted communication within the federation - assert fed_helper.setup_pki(fx_federation), "Failed to setup PKI for trusted communication" + if not fx_federation.disable_tls: + assert fed_helper.setup_pki(fx_federation), "Failed to setup PKI for trusted communication" # Start the federation results = fed_helper.run_federation(fx_federation) diff --git a/tests/end_to_end/utils/conftest_helper.py b/tests/end_to_end/utils/conftest_helper.py index 490a3316db..b8d70fa7ba 100644 --- a/tests/end_to_end/utils/conftest_helper.py +++ b/tests/end_to_end/utils/conftest_helper.py @@ -18,16 +18,20 @@ def parse_arguments(): - num_collaborators (int, default=2): Number of collaborators - num_rounds (int, default=5): Number of rounds to train - model_name (str, default="torch_cnn_mnist"): Model name + - disable_client_auth (bool): Disable client authentication + - disable_tls (bool): Disable TLS for communication Raises: SystemExit: If the required arguments are not provided or if any argument parsing error occurs. """ try: parser = argparse.ArgumentParser(description="Provide the required arguments to run the tests") - parser.add_argument("--results_dir", type=str, required=False, help="Directory to store the results") + parser.add_argument("--results_dir", type=str, required=False, default="results", help="Directory to store the results") parser.add_argument("--num_collaborators", type=int, default=2, help="Number of collaborators") parser.add_argument("--num_rounds", type=int, default=5, help="Number of rounds to train") - parser.add_argument("--model_name", type=str, default="torch_cnn_mnist", help="Model name") + parser.add_argument("--model_name", type=str, help="Model name") + parser.add_argument("--disable_client_auth", action="store_true", help="Disable client authentication") + parser.add_argument("--disable_tls", action="store_true", help="Disable TLS for communication") args = parser.parse_known_args()[0] return args diff --git a/tests/end_to_end/utils/federation_helper.py b/tests/end_to_end/utils/federation_helper.py index 3cb091b7ce..1da1c68012 100644 --- a/tests/end_to_end/utils/federation_helper.py +++ b/tests/end_to_end/utils/federation_helper.py @@ -33,7 +33,6 @@ def setup_pki(fed_obj): for collaborator in fed_obj.collaborators: try: log.info(f"Performing operations for {collaborator.collaborator_name}") - collaborator.create_collaborator() collaborator.generate_sign_request() # Below step will add collaborator entries in cols.yaml file. fed_obj.model_owner.certify_collaborator(collaborator.collaborator_name)