diff --git a/.flake8.other b/.flake8.other new file mode 100644 index 000000000000..5582f3331a3c --- /dev/null +++ b/.flake8.other @@ -0,0 +1,8 @@ +[flake8] +select = + F541, # f-string without any placeholders + F841, # local variable 'x' is assigned to but never used + F401, # 'x' imported but unused + E741, # ambiguous variable name 'l' + F821, # undefined name 'x' + E266, # too many leading '#' for block comment \ No newline at end of file diff --git a/.flake8.speech b/.flake8.speech new file mode 100644 index 000000000000..5582f3331a3c --- /dev/null +++ b/.flake8.speech @@ -0,0 +1,8 @@ +[flake8] +select = + F541, # f-string without any placeholders + F841, # local variable 'x' is assigned to but never used + F401, # 'x' imported but unused + E741, # ambiguous variable name 'l' + F821, # undefined name 'x' + E266, # too many leading '#' for block comment \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 67743dc0cbc0..b8e975bbaa89 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,4 @@ .github/ @pablo-garay @ko3n1g @thomasdhc @chtruong814 Dockerfile.ci @pablo-garay @ko3n1g @thomasdhc @chtruong814 +.pylintrc.* @pablo-garay @ko3n1g @thomasdhc @chtruong814 +.flake8.* @pablo-garay @ko3n1g @thomasdhc @chtruong814 \ No newline at end of file diff --git a/.github/workflows/_test_template.yml b/.github/workflows/_test_template.yml index 25753d3792e1..1f14716d7f42 100644 --- a/.github/workflows/_test_template.yml +++ b/.github/workflows/_test_template.yml @@ -108,7 +108,8 @@ jobs: echo "coverage_report=$coverage_report" >> "$GITHUB_OUTPUT" docker exec nemo_container_${{ github.run_id }} bash -c 'ls -al' - docker cp nemo_container_${{ github.run_id }}:/workspace/.coverage .coverage + docker cp nemo_container_${{ github.run_id }}:/workspace/coverage.xml coverage.xml + docker cp nemo_container_${{ github.run_id }}:/workspace/.coverage .coverage exit $EXIT_CODE @@ -117,7 +118,9 @@ jobs: if: inputs.SAVE_COVERAGE_REPORT == true with: name: ${{ steps.main.outputs.coverage_report }} - path: ${{ github.run_id }}/.coverage + path: | + ${{ github.run_id }}/coverage.xml + ${{ github.run_id }}/.coverage include-hidden-files: true - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main" diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml index da940a94b638..a32668e80d7c 100644 --- a/.github/workflows/build-test-publish-wheel.yml +++ b/.github/workflows/build-test-publish-wheel.yml @@ -26,17 +26,11 @@ defaults: jobs: build-test-publish-wheel: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.7.0 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.20.0 with: - image-name: nemo_container - dockerfile: Dockerfile.ci - image-label: nemo-core - build-args: | - IMAGE_LABEL=nemo-core - prune-filter-timerange: 24h dry-run: true python-package: nemo - container-workdir: /workspace + python-version: '3.10' environment: public secrets: TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 7cecdd513ae4..3bbbb8990cb7 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -17,8 +17,11 @@ on: branches: - 'main' - 'r**' + - 'weekly-bump' types: [ labeled ] - + push: + branches: + - main workflow_dispatch: inputs: test_to_run: @@ -37,25 +40,48 @@ jobs: runs-on: ubuntu-latest outputs: test_to_run: ${{ steps.test_to_run.outputs.main }} - all: ${{ steps.all.outputs.main }} - event_name: ${{ steps.github-event.outputs.main }} + env: + TESTS_TO_RUN: ${{ inputs.test_to_run }} + EVENT_NAME: ${{ github.event_name }} + HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }} steps: - - name: Parse test_to_run + - name: Checkout branch + uses: actions/checkout@v4 + + - name: Select tests to run id: test_to_run run: | - parsed_string=$(echo ${{ inputs.test_to_run || 'all' }} | jq -c --raw-input 'split(",")') + # For manual dispatch, we replace `all` with the actual job names + if [[ "$EVENT_NAME" == "workflow_dispatch" && "$TESTS_TO_RUN" == "all" ]]; then + TESTS_TO_RUN=$(cat .github/workflows/cicd-main.yml | yq '.jobs | [to_entries[] | .key] | join(",")') + + # For manual dispatch with provided list of tests, do nothing + elif [[ "$EVENT_NAME" == "workflow_dispatch" && "$TESTS_TO_RUN" != "all" ]]; then + TESTS_TO_RUN=$TESTS_TO_RUN + + # For correctly labeled PR, we replace `all` with the actual job names + elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then + TESTS_TO_RUN=$(cat .github/workflows/cicd-main.yml | yq '.jobs | [to_entries[] | .key] | join(",")') + + # For incorrectly labeled PR, run no tests + elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then + TESTS_TO_RUN="" + + # For push events, run only unit tests. This is so that we can generate coverage + # on branch `main`. + elif [[ "$EVENT_NAME" == "push" ]]; then + TESTS_TO_RUN=$(cat .github/workflows/cicd-main.yml | yq '.jobs | [to_entries[]] | [.[] | select(.key == "L0_Unit*") | .key] | join(",")') + + else + echo "Unsupported event_name $EVENT_NAME provided". + exit 1 + fi + + parsed_string=$(echo "$TESTS_TO_RUN" | jq -c --raw-input 'split(",")') echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT" - - name: Parse all - id: all - run: | - echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT" - - name: Infer github event - id: github-event - run: | - echo "main=${{ github.event_name }}" | tee -a "$GITHUB_OUTPUT" cicd-test-container-build: - if: ${{ github.event.label.name == 'Run CICD' || needs.pre-flight.outputs.event_name == 'workflow_dispatch' }} + if: ${{ needs.pre-flight.outputs.test_to_run != '' }} uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.14.0 needs: pre-flight with: @@ -67,9 +93,9 @@ jobs: prune-filter-timerange: 24h cicd-import-tests: + if: ${{ needs.pre-flight.outputs.test_to_run != '' }} needs: [cicd-test-container-build, pre-flight] runs-on: self-hosted-azure-gpus-1 - if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }} steps: - name: Run some checks run: | @@ -89,111 +115,111 @@ jobs: L0_Unit_Tests_GPU_ASR: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_ASR') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_ASR') with: RUNNER: self-hosted-azure-gpus-1 TIMEOUT: 20 # TODO: remove this hack SCRIPT: | - python -c "from nemo.collections.asr.models import ASRModel" && NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + python -c "from nemo.collections.asr.models import ASRModel" && NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_Audio: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Audio') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Audio') with: RUNNER: self-hosted-azure-gpus-1 TIMEOUT: 20 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_Common: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Common') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Common') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/common -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/collections/common -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_LLM: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_LLM') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_LLM') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/llm -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/collections/llm -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_Multimodal: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Multimodal') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Multimodal') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_NLP: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_NLP') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_NLP') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_TTS: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_TTS') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_TTS') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo OPTIONAL_L0_Unit_Tests_GPU_Core: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_Core') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_Core') with: RUNNER: self-hosted-azure-gpus-1 TIMEOUT: 20 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/core -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/core -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo IS_OPTIONAL: true L0_Unit_Tests_GPU_Hydra: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Hydra') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Hydra') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/hydra -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/hydra -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_Lightning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Lightning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Lightning') with: RUNNER: self-hosted-azure SAVE_COVERAGE_REPORT: true SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/lightning -m "not pleasefixme" --with_downloads --cov-report=term --cov=nemo + NEMO_NUMBA_MINVER=0.53 pytest tests/lightning -m "not pleasefixme" --with_downloads --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_GPU_Others: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Others') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Others') with: RUNNER: self-hosted-azure-gpus-1 SAVE_COVERAGE_REPORT: true @@ -211,117 +237,117 @@ jobs: --ignore=tests/hydra \ --ignore=tests/lightning \ --ignore=tests/utils \ - --cov-report=term --cov=nemo + --cov-report=term --cov-branch --cov-report=xml --cov=nemo # L0: CPU unit tests L0_Unit_Tests_CPU_ASR: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_ASR') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_ASR') with: RUNNER: self-hosted-azure-cpu TIMEOUT: 20 SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Audio: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Audio') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Audio') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Common: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Common') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Common') with: RUNNER: self-hosted-azure-cpu TIMEOUT: 20 SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/common -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/common -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_LLM: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_LLM') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_LLM') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/llm -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/llm -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Multimodal: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Multimodal') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Multimodal') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_NLP: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_NLP') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_NLP') with: RUNNER: self-hosted-azure-cpu TIMEOUT: 20 SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_TTS: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_TTS') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_TTS') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Core: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Core') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Core') with: RUNNER: self-hosted-azure-cpu TIMEOUT: 20 SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/core tests/core_ptl -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/core tests/core_ptl -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Hydra: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Hydra') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Hydra') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/hydra -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/hydra -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Lightning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Lightning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Lightning') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true SCRIPT: | - CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/lightning -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov=nemo + CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/lightning -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_CPU_Others: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Others') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Others') with: RUNNER: self-hosted-azure-cpu SAVE_COVERAGE_REPORT: true @@ -339,7 +365,7 @@ jobs: --ignore=tests/hydra \ --ignore=tests/lightning \ --ignore=tests/utils \ - --cov-report=term --cov=nemo + --cov-report=term --cov-branch --cov-report=xml --cov=nemo L0_Unit_Tests_Coverage: runs-on: self-hosted-azure-cpu @@ -367,26 +393,95 @@ jobs: - L0_Unit_Tests_CPU_Lightning - L0_Unit_Tests_CPU_Others steps: - - - name: Download artifacts + - name: Download coverage reports of current branch uses: actions/download-artifact@v4 with: - path: ${{ github.run_id }} - - - name: Combine + path: ${{ github.run_id }}/current + + - name: Get total coverage of current branch shell: bash -x -e -u -o pipefail {0} + if: always() + id: total-current-branch run: | - pip install coverage - cd ${{ github.run_id }} + cd ${{ github.run_id }}/current ls -al . ls -al coverage-*/ coverage combine --keep $(ls coverage-*/.coverage) coverage report + REPORT=$(coverage report) + TOTAL=$(echo "$REPORT" | grep "TOTAL" | awk '{print substr($NF, 1, length($NF)-1)}') + echo "main=${TOTAL}" | tee -a "$GITHUB_OUTPUT" + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + directory: ${{ github.run_id }}/current + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + if: always() + with: + name: coverage-summary-${{ github.run_id }} + path: ${{ github.run_id }}/current/.coverage + include-hidden-files: true + + # Enable this after we have a first successful run on `main` + # - name: Get last successful run of main + # id: get_run_id + # env: + # GH_TOKEN: ${{ github.token }} + # run: | + # WORKFLOWS=$(curl -L \ + # -H "Accept: application/vnd.github+json" \ + # -H "Authorization: Bearer $GH_TOKEN" \ + # -H "X-GitHub-Api-Version: 2022-11-28" \ + # https://api.github.com/repos/NVIDIA/NeMo/actions/workflows) + + # WORKFLOW_ID=$(echo -E "$WORKFLOWS" | jq '.workflows[] | select(.path==".github/workflows/cicd-main.yml") | .id') + + # RUNS=$(curl -L \ + # -H "Accept: application/vnd.github+json" \ + # -H "Authorization: Bearer $GH_TOKEN" \ + # -H "X-GitHub-Api-Version: 2022-11-28" \ + # "https://api.github.com/repos/NVIDIA/NeMo/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success") + + # RUN_ID=$(echo -E "$RUNS" | jq '.workflow_runs[0].id') + + # echo "main=$RUN_ID" | tee -a "$GITHUB_OUTPUT" + + # - name: Download coverage summary of main branch + # uses: actions/download-artifact@v4 + # with: + # path: ${{ github.run_id }}/main + # run-id: ${{ steps.get_run_id.outputs.main }} + + # - name: Get total coverage of main branch + # shell: bash -x -e -u -o pipefail {0} + # if: always() + # id: total-main-branch + # run: | + # cd ${{ github.run_id }}/main + # coverage combine --keep $(ls coverage-summary-*/.coverage) + # coverage report + + # REPORT=$(coverage report) + # TOTAL=$(echo "$REPORT" | grep "TOTAL" | awk '{print substr($NF, 1, length($NF)-1)}') + # echo "main=${TOTAL}" | tee -a "$GITHUB_OUTPUT" + + # - name: Compare coverage change + # shell: bash -x -e -u -o pipefail {0} + # run: | + # TOTAL_MAIN_BRANCH=${{ steps.total-main-branch.outputs.main }} + # TOTAL_CURRENT_BRANCH=${{ steps.total-current-branch.outputs.main }} + + # test $TOTAL_CURRENT_BRANCH -ge $TOTAL_MAIN_BRANCH + L0_Setup_Test_Data_And_Models: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Setup_Test_Data_And_Models') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Setup_Test_Data_And_Models') with: RUNNER: self-hosted-azure SCRIPT: | @@ -398,7 +493,7 @@ jobs: L2_Community_LLM_Checkpoints_tests_Bert: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Bert') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Bert') with: RUNNER: self-hosted-azure SCRIPT: | @@ -409,7 +504,7 @@ jobs: L2_Community_LLM_Checkpoints_tests_Mamba2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Mamba2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Mamba2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -422,7 +517,7 @@ jobs: L2_Community_LLM_Checkpoints_tests_Llama: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Llama') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Llama') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -434,7 +529,7 @@ jobs: L2_Community_LLM_Checkpoints_tests_Llama3: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Llama3') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Llama3') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -446,7 +541,7 @@ jobs: L2_Community_LLM_Checkpoints_tests_StarCoder: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_StarCoder') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_StarCoder') with: RUNNER: self-hosted-azure SCRIPT: | @@ -458,7 +553,7 @@ jobs: L2_Community_LLM_Checkpoints_tests_Falcon: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Falcon') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Falcon') with: RUNNER: self-hosted-azure SCRIPT: | @@ -470,7 +565,7 @@ jobs: L2_Community_vita_Checkpoints_tests_Llama3: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_vita_Checkpoints_tests_Llama3') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_vita_Checkpoints_tests_Llama3') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -516,7 +611,7 @@ jobs: L2_PTQ_Llama2_FP8: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_PTQ_Llama2_FP8') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_PTQ_Llama2_FP8') with: RUNNER: self-hosted-azure SCRIPT: | @@ -580,7 +675,7 @@ jobs: L2_Distill_Llama2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Distill_Llama2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Distill_Llama2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -607,7 +702,7 @@ jobs: L2_Prune_Width_Llama2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Prune_Width_Llama2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Prune_Width_Llama2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -628,7 +723,7 @@ jobs: L2_Prune_Depth_Llama2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Prune_Depth_Llama2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Prune_Depth_Llama2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -646,7 +741,7 @@ jobs: ASR_dev_run_Speech_to_Text: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -661,7 +756,7 @@ jobs: ASR_dev_run_Speech_to_Text_WPE_-_CitriNet: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_CitriNet') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_CitriNet') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -679,7 +774,7 @@ jobs: ASR_dev_run_Speech_Pre-training_-_CitriNet: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_Pre-training_-_CitriNet') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_Pre-training_-_CitriNet') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -695,7 +790,7 @@ jobs: ASR_dev_run_Speech_To_Text_Finetuning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_To_Text_Finetuning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_To_Text_Finetuning') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -713,7 +808,7 @@ jobs: ASR_dev_run_Speech_To_Text_HF_Finetuning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_To_Text_HF_Finetuning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_To_Text_HF_Finetuning') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: |- @@ -749,7 +844,7 @@ jobs: ASR_dev_run_Speech_to_Text_WPE_-_Conformer: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_Conformer') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_Conformer') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -770,7 +865,7 @@ jobs: ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -791,7 +886,7 @@ jobs: L2_Speech_to_Text_EMA: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_to_Text_EMA') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_to_Text_EMA') with: RUNNER: self-hosted-azure SCRIPT: | @@ -807,7 +902,7 @@ jobs: L2_Speech_to_Text_AED: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_to_Text_AED') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_to_Text_AED') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -847,7 +942,7 @@ jobs: L2_Speaker_dev_run_Speaker_Recognition: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Recognition') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Recognition') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -866,7 +961,7 @@ jobs: L2_Speaker_dev_run_Speaker_Diarization: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -886,7 +981,7 @@ jobs: L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -901,7 +996,7 @@ jobs: L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -913,7 +1008,7 @@ jobs: L2_Speaker_dev_run_Speech_to_Label: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speech_to_Label') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speech_to_Label') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -936,7 +1031,7 @@ jobs: L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -954,7 +1049,7 @@ jobs: L2_Speaker_dev_run_Clustering_Diarizer_Inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Clustering_Diarizer_Inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Clustering_Diarizer_Inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -971,7 +1066,7 @@ jobs: L2_Speaker_dev_run_Neural_Diarizer_Inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Neural_Diarizer_Inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Neural_Diarizer_Inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -985,7 +1080,7 @@ jobs: L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1001,7 +1096,7 @@ jobs: L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1018,7 +1113,7 @@ jobs: L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1043,7 +1138,7 @@ jobs: L2_ASR_Adapters_Linear_Adapters: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Adapters_Linear_Adapters') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Adapters_Linear_Adapters') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1062,7 +1157,7 @@ jobs: L2_ASR_Adapters_RelPos_MHA_Adapters: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Adapters_RelPos_MHA_Adapters') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Adapters_RelPos_MHA_Adapters') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1083,7 +1178,7 @@ jobs: L2_Speech_Estimate_Duration_Bins: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Estimate_Duration_Bins') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Estimate_Duration_Bins') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1115,7 +1210,7 @@ jobs: L2_Speech_Batch_Size_OOMptimizer: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1134,7 +1229,7 @@ jobs: L2_Speech_Batch_Size_OOMptimizer_Canary: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer_Canary') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer_Canary') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1147,7 +1242,7 @@ jobs: L2_Speech_Transcription_Speech_to_Text_Transcribe: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Speech_to_Text_Transcribe') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Speech_to_Text_Transcribe') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1161,7 +1256,7 @@ jobs: L2_Speech_Transcription_Canary_Transcribe_Full_Manifest: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Full_Manifest') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Full_Manifest') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1180,7 +1275,7 @@ jobs: L2_Speech_Transcription_Canary_Transcribe_With_Prompt: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_With_Prompt') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_With_Prompt') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1203,7 +1298,7 @@ jobs: L2_Speech_Transcription_Canary_Transcribe_Audio_Dir: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Audio_Dir') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Audio_Dir') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1223,7 +1318,7 @@ jobs: L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1242,7 +1337,7 @@ jobs: L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1262,7 +1357,7 @@ jobs: L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1316,7 +1411,7 @@ jobs: L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1343,7 +1438,7 @@ jobs: L2_Pretraining_BERT_pretraining_from_Text: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Pretraining_BERT_pretraining_from_Text') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Pretraining_BERT_pretraining_from_Text') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1373,7 +1468,7 @@ jobs: L2_Pretraining_BERT_from_Preprocessed: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Pretraining_BERT_from_Preprocessed') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Pretraining_BERT_from_Preprocessed') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1406,7 +1501,7 @@ jobs: L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1471,7 +1566,7 @@ jobs: L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1499,7 +1594,7 @@ jobs: L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1526,7 +1621,7 @@ jobs: L2_NMT_Attention_is_All_You_Need_Inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Inference') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1542,7 +1637,7 @@ jobs: L2_NMT_Attention_is_All_You_Need_Finetuning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Finetuning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Finetuning') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1573,7 +1668,7 @@ jobs: L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -1602,7 +1697,7 @@ jobs: L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1621,7 +1716,7 @@ jobs: L2_Megatron_NMT_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_NMT_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_NMT_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1719,7 +1814,7 @@ jobs: L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1787,7 +1882,7 @@ jobs: L2_Megatron_Core_Bert_Pretraining_and_Resume_Training: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_Bert_Pretraining_and_Resume_Training') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_Bert_Pretraining_and_Resume_Training') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1857,7 +1952,7 @@ jobs: L2_RAG_Pipeline_Indexing: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_RAG_Pipeline_Indexing') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_RAG_Pipeline_Indexing') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1875,7 +1970,7 @@ jobs: L2_RAG_Pipeline_Generating: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_RAG_Pipeline_Generating') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_RAG_Pipeline_Generating') with: RUNNER: self-hosted-azure SCRIPT: | @@ -1893,7 +1988,7 @@ jobs: L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure-gpus-2-h100 SCRIPT: | @@ -2003,7 +2098,7 @@ jobs: L2_Megatron_GPT_Skip_Train: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Skip_Train') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Skip_Train') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2031,7 +2126,7 @@ jobs: L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2206,7 +2301,7 @@ jobs: L2_Megatron_LM_To_NeMo_Conversion: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_LM_To_NeMo_Conversion') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_LM_To_NeMo_Conversion') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2321,7 +2416,7 @@ jobs: L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2391,7 +2486,7 @@ jobs: L2_Megatron_GPT_with_Drop_Optimizer_States_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_Drop_Optimizer_States_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_Drop_Optimizer_States_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2429,7 +2524,7 @@ jobs: L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2510,7 +2605,7 @@ jobs: L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2592,7 +2687,7 @@ jobs: OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2') with: RUNNER: self-hosted-azure-gpus-2-h100 SCRIPT: | @@ -2704,7 +2799,7 @@ jobs: L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -2731,7 +2826,7 @@ jobs: L2_Megatron_GPT_Finetuning_PP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Finetuning_PP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Finetuning_PP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2798,7 +2893,7 @@ jobs: L2_Megatron_GPT_Finetuning_StarCoder_PP1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Finetuning_StarCoder_PP1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Finetuning_StarCoder_PP1') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -2829,7 +2924,7 @@ jobs: L2_Megatron_GPT_Reranker: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Reranker') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Reranker') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2852,7 +2947,7 @@ jobs: L2_Megatron_GPT_Embedding: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Embedding') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Embedding') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2890,7 +2985,7 @@ jobs: L2_Megatron_GPT_PEFT_Lora_PP2_O2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_PP2_O2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_PP2_O2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2939,7 +3034,7 @@ jobs: L2_Megatron_GPT_PEFT_Lora_TP2_O1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_TP2_O1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_TP2_O1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -2985,7 +3080,7 @@ jobs: L2_Megatron_GPT_PEFT_Lora_TP2SP1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_TP2SP1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_TP2SP1') with: RUNNER: self-hosted-azure-gpus-2-h100 SCRIPT: | @@ -3039,7 +3134,7 @@ jobs: L2_Megatron_GPT_Eval: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Eval') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Eval') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3053,7 +3148,7 @@ jobs: L2_Megatron_GPT_Eval_PP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Eval_PP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_Eval_PP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3069,7 +3164,7 @@ jobs: L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3121,7 +3216,7 @@ jobs: L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3138,7 +3233,7 @@ jobs: L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3155,7 +3250,7 @@ jobs: L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3230,7 +3325,7 @@ jobs: L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3325,7 +3420,7 @@ jobs: L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3420,7 +3515,7 @@ jobs: OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3490,7 +3585,7 @@ jobs: L2_Megatron_T5_w_Mixture_of_Expert_Pretraining: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_T5_w_Mixture_of_Expert_Pretraining') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_T5_w_Mixture_of_Expert_Pretraining') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3532,7 +3627,7 @@ jobs: L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3611,7 +3706,7 @@ jobs: L2_Megatron_Core_T5_Eval: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_T5_Eval') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_T5_Eval') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3623,7 +3718,7 @@ jobs: L2_Megatron_Core_T5_PEFT_Lora_TP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_T5_PEFT_Lora_TP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Core_T5_PEFT_Lora_TP2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3671,7 +3766,7 @@ jobs: L2_VLM_HF_Transformer_PEFT: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3682,7 +3777,7 @@ jobs: L2_VLM_HF_Transformer_PEFT_FSDP: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_FSDP') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_FSDP') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3693,7 +3788,7 @@ jobs: L2_VLM_HF_Transformer_PEFT_4bit: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_4bit') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_4bit') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3704,7 +3799,7 @@ jobs: L2_VLM_HF_Transformer_SFT_FSDP2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_SFT_FSDP2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_SFT_FSDP2') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3715,7 +3810,7 @@ jobs: L2_HF_Transformer_PEFT: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3726,7 +3821,7 @@ jobs: L2_HF_Transformer_PEFT_nemorun: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_nemorun') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_nemorun') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3737,7 +3832,7 @@ jobs: L2_HF_Transformer_PEFT_2gpu: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3748,7 +3843,7 @@ jobs: L2_HF_Transformer_PEFT_2gpu_nemorun: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_nemorun') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_nemorun') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3759,7 +3854,7 @@ jobs: L2_HF_Transformer_SFT_2gpu: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3770,7 +3865,7 @@ jobs: L2_HF_Transformer_SFT_FSDP2_2gpu: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_FSDP2_2gpu') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_FSDP2_2gpu') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3781,7 +3876,7 @@ jobs: L2_HF_Transformer_PT_2gpu: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3792,7 +3887,7 @@ jobs: L2_HF_Transformer_SFT_2gpu_nemorun: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3803,7 +3898,7 @@ jobs: L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3814,7 +3909,7 @@ jobs: L2_HF_Transformer_PT_2gpu_nemorun: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu_nemorun') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu_nemorun') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3825,7 +3920,7 @@ jobs: L2_HF_Transformer_PT: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3836,7 +3931,7 @@ jobs: L2_HF_Transformer_PT_nemorun: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_nemorun') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_nemorun') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3847,7 +3942,7 @@ jobs: L2_HF_Transformer_SFT: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3858,7 +3953,7 @@ jobs: L2_HF_Transformer_SFT_nemorun: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_nemorun') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_nemorun') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3869,7 +3964,7 @@ jobs: L2_HF_Transformer_SFT_TE_Acceleration: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_TE_Acceleration') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_TE_Acceleration') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3880,7 +3975,7 @@ jobs: L2_HF_Transformer_PT_TE_Acceleration: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_TE_Acceleration') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_TE_Acceleration') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3888,11 +3983,23 @@ jobs: AFTER_SCRIPT: | rm -rf nemo_experiments + # L2: SpeechLM tests + L2_HF_Transformer_SpeechLM_SFT_2gpu: + needs: [pre-flight, cicd-test-container-build] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SpeechLM_SFT_2gpu') || needs.pre-flight.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + TRANSFORMERS_OFFLINE=1 python tests/collections/speechlm/hf/sft.py --model /home/TestData/speechlm/whisper-small/ --max-steps 10 --devices 2 --strategy ddp + AFTER_SCRIPT: | + rm -rf nemo_experiments + # L2: Megatron Mock Data Generation L2_Megatron_Mock_Data_Generation_MockGPTDataset: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Mock_Data_Generation_MockGPTDataset') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Mock_Data_Generation_MockGPTDataset') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3908,7 +4015,7 @@ jobs: L2_Megatron_Mock_Data_Generation_MockT5Dataset: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Mock_Data_Generation_MockT5Dataset') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_Mock_Data_Generation_MockT5Dataset') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3926,7 +4033,7 @@ jobs: L2_TTS_Fast_dev_runs_1_Tacotron_2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Tacotron_2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Tacotron_2') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -3952,7 +4059,7 @@ jobs: L2_TTS_Fast_dev_runs_1_WaveGlow: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_WaveGlow') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_WaveGlow') with: RUNNER: self-hosted-azure SCRIPT: | @@ -3974,7 +4081,7 @@ jobs: L2_TTS_Fast_dev_runs_1_FastPitch: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_FastPitch') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_FastPitch') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4048,7 +4155,7 @@ jobs: L2_TTS_Fast_dev_runs_1_Hifigan: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Hifigan') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Hifigan') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4100,7 +4207,7 @@ jobs: Speech_Checkpoints_tests: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'Speech_Checkpoints_tests') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'Speech_Checkpoints_tests') with: RUNNER: self-hosted-azure-gpus-1 TIMEOUT: 20 @@ -4116,7 +4223,7 @@ jobs: L2_Stable_Diffusion_Training: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Stable_Diffusion_Training') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Stable_Diffusion_Training') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -4166,7 +4273,7 @@ jobs: L2_NeMo_2_GPT_Pretraining_no_transformer_engine: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_Pretraining_no_transformer_engine') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_Pretraining_no_transformer_engine') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4199,7 +4306,7 @@ jobs: L2_NeMo_2_llama3_pretraining_recipe: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_pretraining_recipe') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_pretraining_recipe') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4227,7 +4334,7 @@ jobs: L2_NeMo_2_llama3_fault_tolerance_plugin: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_fault_tolerance_plugin') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_fault_tolerance_plugin') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4247,7 +4354,7 @@ jobs: L2_NeMo_2_llama3_straggler_detection: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_straggler_detection') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_straggler_detection') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4267,7 +4374,7 @@ jobs: L2_NeMo_2_GPT_DDP_Param_Parity_check: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_DDP_Param_Parity_check') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_DDP_Param_Parity_check') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4284,7 +4391,7 @@ jobs: L2_NeMo_2_SSM_Pretraining: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_SSM_Pretraining') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_SSM_Pretraining') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -4298,7 +4405,7 @@ jobs: L2_NeMo_2_SSM_Finetuning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_SSM_Finetuning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_SSM_Finetuning') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -4312,7 +4419,7 @@ jobs: L2_NeMo_2_HF_MODEL_IMPORT: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_HF_MODEL_IMPORT') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_HF_MODEL_IMPORT') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4325,7 +4432,7 @@ jobs: L2_NeMo_2_jit_callback: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_jit_callback') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_jit_callback') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4335,7 +4442,7 @@ jobs: L2_NeMo_2_T5_Pretraining: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_Pretraining') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_Pretraining') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4359,7 +4466,7 @@ jobs: L2_NeMo_2_T5_Finetuning: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_Finetuning') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_Finetuning') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4374,7 +4481,7 @@ jobs: L2_NeMo_2_T5_LoRA: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_LoRA') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_LoRA') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4390,7 +4497,7 @@ jobs: L2_NeMo_2_NEVA_MOCK_TRAINING: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_TRAINING') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_TRAINING') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4402,7 +4509,7 @@ jobs: L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4415,7 +4522,7 @@ jobs: L2_NeMo_2_MLLAMA_MOCK_TRAINING: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_MLLAMA_MOCK_TRAINING') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_MLLAMA_MOCK_TRAINING') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4428,7 +4535,7 @@ jobs: L2_NeMo_2_Mixtral_Pretraining: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_Pretraining') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_Pretraining') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4439,7 +4546,7 @@ jobs: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4468,7 +4575,7 @@ jobs: L2_NeMo_2_GPT_SFT_TP1PP1_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4497,7 +4604,7 @@ jobs: L2_NeMo_2_GPT_SFT_TP1PP2_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP2_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP2_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4526,7 +4633,7 @@ jobs: L2_NeMo_2_GPT_SFT_TP2PP1_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP2PP1_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP2PP1_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4555,7 +4662,7 @@ jobs: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4584,7 +4691,7 @@ jobs: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4613,7 +4720,7 @@ jobs: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4642,7 +4749,7 @@ jobs: L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4671,7 +4778,7 @@ jobs: L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4699,7 +4806,7 @@ jobs: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4727,7 +4834,7 @@ jobs: L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4755,7 +4862,7 @@ jobs: L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4782,7 +4889,7 @@ jobs: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4812,7 +4919,7 @@ jobs: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4826,7 +4933,7 @@ jobs: L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4841,7 +4948,7 @@ jobs: OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4857,7 +4964,7 @@ jobs: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4872,7 +4979,7 @@ jobs: L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4887,7 +4994,7 @@ jobs: L2_NEMO_2_LoRA_MERGE: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_MERGE') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_MERGE') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4899,7 +5006,7 @@ jobs: L2_NEMO_2_LoRA_Export: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_Export') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_Export') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -4911,7 +5018,7 @@ jobs: L2_NEMO_2_LoRA_Inference: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_Inference') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_Inference') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -4928,7 +5035,7 @@ jobs: L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4937,7 +5044,7 @@ jobs: L2_NeMo_2_PTQ_Llama2_FP8: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_PTQ_Llama2_FP8') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_PTQ_Llama2_FP8') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4952,7 +5059,7 @@ jobs: L2_NeMo_2_Export_In_Framework: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Export_In_Framework') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Export_In_Framework') with: RUNNER: self-hosted-azure SCRIPT: | @@ -4981,7 +5088,7 @@ jobs: L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING') with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -4996,7 +5103,7 @@ jobs: L2_NeMo_2_VLLM_EXPORT: needs: [pre-flight, cicd-test-container-build] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_VLLM_EXPORT') || needs.pre-flight.outputs.all == 'true' + if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_VLLM_EXPORT') with: RUNNER: self-hosted-azure SCRIPT: | @@ -5024,7 +5131,7 @@ jobs: rm -rf /tmp/vllm_from_nemo2 Nemo_CICD_Test: - needs: + needs: - pre-flight - cicd-import-tests @@ -5164,6 +5271,7 @@ jobs: - L2_HF_Transformer_PT_2gpu - L2_HF_Transformer_PT_2gpu_nemorun - L2_HF_Transformer_PT_TE_Acceleration + - L2_HF_Transformer_SpeechLM_SFT_2gpu - L2_NeMo_2_SSM_Pretraining - L2_NeMo_2_SSM_Finetuning - L2_NeMo_2_T5_Pretraining @@ -5212,7 +5320,7 @@ jobs: - L2_HF_Transformer_SFT_FSDP2_2gpu - L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2 - L2_NeMo_2_VLLM_EXPORT - if: always() + if: always() && github.event != 'push' runs-on: ubuntu-latest steps: - name: Evaluate conclusion diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 3730e0bcf955..b8d38b24b319 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -1,4 +1,4 @@ -name: Isort and Black Formatting; PyLint Docs check +name: Isort and Black Formatting # Incrementally reformat only changed files with black, all files with isort # # Replaces pre-commit.ci, since it reformats all the files. @@ -71,145 +71,3 @@ jobs: with: message: Apply isort and black reformatting commit: --signoff - - check_pylint: - name: "check_pylint (strict-mode: ${{ matrix.strict-mode }})" - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - env: - THRESHOLD: 1730937600 # On this date (2024/11/07) we decided to add Pylint. It shall only run in strict mode for files added past this date. For files prior to this date, we will only add a PR comment with PyLint's stdout. - strategy: - matrix: - strict-mode: ["true", "false"] - steps: - - name: Checkout branch - uses: actions/checkout@v4 - with: - # setup repository and ref for PRs, see - # https://github.com/EndBug/add-and-commit?tab=readme-ov-file#working-with-prs - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - fetch-depth: 0 - - # https://github.com/tj-actions/changed-files - - name: Get changed files - id: changed-files - uses: tj-actions/changed-files@v44 - with: - files: | - **.py - - - name: Setup Python env - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: pylint - if: ${{ steps.changed-files.outputs.any_changed == 'true' && !contains( github.event.pull_request.labels.*.name, 'skip-docs') }} - id: pylint - env: - # only *.py files included - STRICT_MODE: ${{ matrix.strict-mode }} - CHANGED_FILES: "${{ steps.changed-files.outputs.all_changed_files }}" - run: | - pip install pylint - - FILTERED=() - for file in $CHANGED_FILES; do - DATE=$(git log --format=%ad --date=unix "$file" | tail -1) - - if [[ "$STRICT_MODE" == "true" ]]; then - if [[ "$DATE" -gt "$THRESHOLD" ]]; then - FILTERED+=("$file") - fi - else - if [[ "$DATE" -le "$THRESHOLD" ]]; then - FILTERED+=("$file") - fi - fi - done - - if [ ${#FILTERED[@]} -eq 0 ]; then - echo "No files to check." - exit 0 - fi - - echo "Will run on these files: - ${FILTERED[@]}" - - set +e - LOG=$(pylint ${FILTERED[@]}) - EXIT_CODE=$? - set -e - - set +x - echo "OUTPUT<> $GITHUB_ENV - echo "$LOG" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - echo "log=$LOG" - set -x - - echo "exit-code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT" - - if [[ "${{ matrix.strict-mode }}" == "true" ]]; then - HEADER="🚨 The following files must be fixed before merge!" - else - HEADER="🙏 The following files have warnings. In case you are familiar with these, please try helping us to improve the code base." - fi - echo "header=$HEADER" | tee -a "$GITHUB_OUTPUT" - - exit $([[ "$EXIT_CODE" -ne 0 && "$STRICT_MODE" == "true" ]] && echo $EXIT_CODE || echo 0) - - - name: Find Comment - if: ${{ always() }} - uses: peter-evans/find-comment@v3 - id: fc - with: - issue-number: ${{ github.event.number }} - body-includes: - - - name: Delete comment - if: ${{ always() && steps.fc.outputs.comment-id != '' }} - env: - GH_TOKEN: ${{ secrets.github_token }} - REPOSITORY: ${{ github.repository }} - COMMENT_ID: ${{ steps.fc.outputs.comment-id }} - run: | - curl -L \ - -X DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $GH_TOKEN" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/$REPOSITORY/issues/comments/$COMMENT_ID - - - name: Add PR comment for PyLint - if: ${{ always() && steps.pylint.outputs.exit-code != '0' }} - uses: peter-evans/create-or-update-comment@v4 - with: - issue-number: ${{ github.event.number }} - body: | - - - beep boop 🤖: ${{ steps.pylint.outputs.header }} - - --- - - Your code was analyzed with PyLint. The following annotations have been identified: - - ``` - ${{ env.OUTPUT }} - ``` - - --- - - Mitigation guide: - - * Add sensible and useful docstrings to functions and methods - * For trivial methods like getter/setters, consider adding `# pylint: disable=C0116` inside the function itself - * To disable multiple functions/methods at once, put a `# pylint: disable=C0116` before the first and a `# pylint: enable=C0116` after the last. - - By applying these rules, we reduce the occurance of this message in future. - - Thank you for improving NeMo's documentation! diff --git a/.github/workflows/code-linting.yml b/.github/workflows/code-linting.yml new file mode 100644 index 000000000000..e8025e481fe4 --- /dev/null +++ b/.github/workflows/code-linting.yml @@ -0,0 +1,137 @@ +name: PyLint and flake8 linting + +on: + pull_request: + paths: + - '**.py' + types: [ opened, synchronize, reopened, labeled, unlabeled ] + +jobs: + linting: + name: 'Domain: ${{ matrix.domain }}' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + domain: [speech, other] + env: + DOMAIN: ${{ matrix.domain }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Select filter + id: filter + run: | + if [[ "$DOMAIN" == "speech" ]]; then + FILTER=$(jq -crn '[ + "nemo/collections/asr/**", + "nemo/collections/tts/**", + "nemo/collections/audio/**", + "nemo/collections/multimodal/speech_llm/**", + "nemo/collections/speechlm/**" + ] | join(",")') + + else + FILTER=$(jq -crn '[ + "nemo/**", + "!nemo/collections/asr/**", + "!nemo/collections/tts/**", + "!nemo/collections/audio/**", + "!nemo/collections/multimodal/speech_llm/**", + "!nemo/collections/speechlm/**" + ] | join(",")') + fi + + echo "main=$FILTER" | tee -a "$GITHUB_OUTPUT" + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v44 + with: + files: ${{ steps.filter.outputs.main }} + files_separator: "," + separator: " " + + - name: Run PyLint + id: pylint + env: + CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + if [[ -z "$CHANGED_FILES" ]]; then + echo Nothing to lint. + echo "exit-code=0" | tee -a "$GITHUB_OUTPUT" + exit 0 + fi + + pip install pylint + set +e + pylint --output "pylintrc.$DOMAIN.txt" --rcfile ".pylintrc.$DOMAIN" ${CHANGED_FILES[@]} + echo "exit-code=$?" | tee -a "$GITHUB_OUTPUT" + + - name: Run flake8 + id: flake8 + env: + CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + if [[ -z "$CHANGED_FILES" ]]; then + echo Nothing to lint. + echo "exit-code=0" | tee -a "$GITHUB_OUTPUT" + exit 0 + fi + + pip install flake8 + set +e + flake8 --output "flake8.$DOMAIN.txt" --config ".flake8.$DOMAIN" ${CHANGED_FILES[@]} + echo "exit-code=$?" | tee -a "$GITHUB_OUTPUT" + + - name: Summary + env: + PYLINT: ${{ steps.pylint.outputs.exit-code == 0 }} + FLAKE8: ${{ steps.flake8.outputs.exit-code == 0 }} + run: | + + if [[ "$PYLINT" != "true" ]]; then + echo "Pylint output:" | tee -a $GITHUB_STEP_SUMMARY + + echo '```' | tee -a $GITHUB_STEP_SUMMARY + cat pylintrc.$DOMAIN.txt | tee -a $GITHUB_STEP_SUMMARY + echo '```' | tee -a $GITHUB_STEP_SUMMARY + fi + + if [[ "$FLAKE8" != "true" ]]; then + echo "Flake8 output:" | tee -a $GITHUB_STEP_SUMMARY + + echo '```' | tee -a $GITHUB_STEP_SUMMARY + cat flake8.$DOMAIN.txt | tee -a $GITHUB_STEP_SUMMARY + echo '```' | tee -a $GITHUB_STEP_SUMMARY + fi + + if [[ "$PYLINT" != "true" || "$FLAKE8" != "true" ]]; then + echo "The following directories got scanned:" | tee -a $GITHUB_STEP_SUMMARY + + echo '```' | tee -a $GITHUB_STEP_SUMMARY + echo ${{ steps.filter.outputs.main }} | tee -a $GITHUB_STEP_SUMMARY + echo '```' | tee -a $GITHUB_STEP_SUMMARY + + exit 1 + fi + + Nemo_Linting_Test: + needs: linting + runs-on: ubuntu-latest + if: always() + steps: + - name: Main + env: + RESULTS: ${{ toJson(needs.linting) }} + run: | + RESULT=$(echo "$RESULTS" | jq -r '.result') + + if [[ "$RESULT" == "success" ]]; then + echo "All passed." + exit 0 + else + echo "Some linting domains failed." + exit 1 + fi \ No newline at end of file diff --git a/.github/workflows/monitor-vms.yml b/.github/workflows/monitor-vms.yml index 0bb54524847a..722a4720b0e9 100644 --- a/.github/workflows/monitor-vms.yml +++ b/.github/workflows/monitor-vms.yml @@ -4,10 +4,11 @@ on: schedule: - cron: 0/15 * * * * workflow_dispatch: - + jobs: pre-flight: runs-on: ubuntu-latest + if: github.repository_owner == 'NVIDIA' outputs: list-of-vms: ${{ steps.main.outputs.main }} environment: main @@ -25,13 +26,13 @@ jobs: MATRIX=$(echo $RUNNERS \ | jq -c '[ - .runners[] + .runners[] | select(.status == "online") | select(.name | contains("cpu") | not) | { - "vm": .name, + "vm": .name, "n_gpus": [ - .labels[] + .labels[] | select(.name | endswith("gpu")) | .name ][0][:1] } diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c1cd763a0501..de4ff6f1bdc3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,17 +32,11 @@ on: jobs: release: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.18.3 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.20.1 with: release-ref: ${{ inputs.release-ref }} - image-name: nemo_container - dockerfile: Dockerfile.ci - image-label: nemo-core - build-args: | - IMAGE_LABEL=nemo-core - prune-filter-timerange: 24h python-package: nemo - container-workdir: /workspace + python-version: '3.10' library-name: Neural Modules dry-run: ${{ inputs.dry-run }} version-bump-branch: ${{ inputs.version-bump-branch }} diff --git a/.pylintrc b/.pylintrc.other similarity index 100% rename from .pylintrc rename to .pylintrc.other diff --git a/.pylintrc.speech b/.pylintrc.speech new file mode 100644 index 000000000000..9c8830567460 --- /dev/null +++ b/.pylintrc.speech @@ -0,0 +1,9 @@ +[MAIN] +ignore-paths=tests +max-line-length=119 + +[MESSAGES CONTROL] +disable=all + +enable=W0611 +# W0611: unused-import diff --git a/examples/speechlm/sft/hf.py b/examples/speechlm/sft/hf.py index 96e785dac97f..3a64ea62dcd3 100755 --- a/examples/speechlm/sft/hf.py +++ b/examples/speechlm/sft/hf.py @@ -27,6 +27,17 @@ class LhotseHfNeMoDataset(torch.utils.data.Dataset): + """Class for a speechLM dataset + + Args: + processor (AutoProcessor): the processor to use + tokenizer (AutoTokenizer): the tokenizer to use + decoder_mask_fill (int): Value to fill in decoder mask + + Returns: + pl.LightningDataModule: the dataset to train with. + """ + def __init__(self, processor, tokenizer, decoder_mask_fill=-100): super().__init__() self.processor = processor @@ -69,6 +80,7 @@ def __getitem__(self, cuts): # Models can be one of the supported ones by AutoModelForSpeechSeq2Seq such as # openai/whisper-large-v3 and facebook/s2t-small-librispeech-asr parser.add_argument('--model', default='openai/whisper-large-v3') + parser.add_argument('--data-path', type=str, required=True) parser.add_argument('--strategy', type=str, default='auto', choices=['auto', 'ddp', 'fsdp']) parser.add_argument('--devices', default=1) parser.add_argument('--accelerator', default='gpu', choices=['gpu']) @@ -83,7 +95,7 @@ def __getitem__(self, cuts): config = OmegaConf.create( { - "cuts_path": "/opt/checkpoints/lhotse/libri/libri-train-5.jsonl.gz", + "cuts_path": args.data_path, "sample_rate": 16000, "shuffle": True, "num_workers": 2, diff --git a/nemo/collections/audio/parts/submodules/flow.py b/nemo/collections/audio/parts/submodules/flow.py index 748d4c6c6d3b..56e77389b2e0 100644 --- a/nemo/collections/audio/parts/submodules/flow.py +++ b/nemo/collections/audio/parts/submodules/flow.py @@ -234,7 +234,7 @@ def forward( if state_length is not None: state = mask_sequence_tensor(state, state_length) - for t in time_steps: + for t in time_steps[:-1]: time = t * torch.ones(state.shape[0], device=state.device) if estimator_condition is None: diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py index 5d558c2b451f..306484331fb7 100644 --- a/nemo/collections/llm/__init__.py +++ b/nemo/collections/llm/__init__.py @@ -36,6 +36,7 @@ from nemo.collections.llm.gpt.data import ( AlpacaDataModule, ChatDataModule, + CustomRetrievalDataModule, DollyDataModule, FineTuningDataModule, HFDatasetDataModule, @@ -91,7 +92,9 @@ Llama31Config405B, Llama32Config1B, Llama32Config3B, + Llama32EmbeddingConfig1B, LlamaConfig, + LlamaEmbeddingModel, LlamaModel, MaskedTokenLossReduction, MistralConfig7B, @@ -150,6 +153,7 @@ __all__ = [ "MockDataModule", "T5MockDataModule", + "CustomRetrievalDataModule", "GPTModel", "GPTConfig", "gpt_data_step", @@ -185,6 +189,8 @@ "Nemotron4Config15B", "Nemotron4Config340B", "NemotronConfig", + "LlamaEmbeddingModel", + "Llama32EmbeddingConfig1B", "Phi3Config", "Phi3ConfigMini", "Phi3Model", diff --git a/nemo/collections/llm/bert/loss.py b/nemo/collections/llm/bert/loss.py index 6fd34a4d3fa3..3bbbdfbd8e49 100644 --- a/nemo/collections/llm/bert/loss.py +++ b/nemo/collections/llm/bert/loss.py @@ -99,6 +99,89 @@ def reduce(self, losses_reduced_per_micro_batch) -> torch.Tensor: return torch.tensor(0.0, device=torch.cuda.current_device()) +class HardNegativeRankingLoss(MegatronLossReduction): + """ + This loss uses hard-negative samples. + The difference of this loss to the default MultipleNegativesRankingLoss + from Sentence Transformers is that the latter shares the hard negatives + as negatives for all examples, whereas this loss uses hard negatives + exclusively for the example they are associated. + """ + + def __init__( + self, + validation_step: bool = False, + val_drop_last: bool = True, + num_hard_negatives: int = 1, + scale: float = 50, + label_smoothing: float = 0.0, + ) -> None: + super().__init__() + self.validation_step = validation_step + self.val_drop_last = val_drop_last + self.num_hard_negatives = num_hard_negatives + self.scale = scale + self.cross_entropy_loss = nn.CrossEntropyLoss(label_smoothing=label_smoothing) + + def forward( + self, batch: Dict[str, torch.Tensor], forward_out: torch.Tensor + ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + from megatron.core import parallel_state + + cp_size = parallel_state.get_context_parallel_world_size() + if cp_size != 1: + raise NotImplementedError(f'CP is not supported for {self.__class__} yet.') + + num_tensors_per_example = 2 + self.num_hard_negatives # 1 query, 1 pos, num_hard_negatives negs + current_train_n_passages = 1 + self.num_hard_negatives + batch_size = forward_out.shape[0] // num_tensors_per_example + # Get Query, Key (Positives, Negatives) + # forward_out was chunked [(q1, k1), (q2, k2), ...] + chunks = forward_out.chunk(batch_size) + query = torch.stack([item[0] for item in chunks]) + key = torch.cat([item[1:] for item in chunks]) + + assert key.shape[0] % query.shape[0] == 0, '{} % {} > 0'.format(key.shape[0], query.shape[0]) + assert key.shape[0] / query.shape[0] == current_train_n_passages, '{} / {} != {}'.format( + key.shape[0], query.shape[0], current_train_n_passages + ) + query_shape = query.shape + repeated_query = query.repeat(1, 1, current_train_n_passages).reshape( + query_shape[0] * current_train_n_passages, query_shape[1] + ) + scores = torch.sum(repeated_query * key, dim=-1).reshape(query_shape[0], current_train_n_passages) + labels = torch.zeros(query_shape[0], dtype=torch.long, device=query.device) + scores *= self.scale + ce_loss = self.cross_entropy_loss(scores, labels) + reduced_loss = average_losses_across_data_parallel_group([ce_loss]) + return ce_loss, {"avg": reduced_loss} + + def reduce(self, losses_reduced_per_micro_batch) -> torch.Tensor: + """Taken from: https://github.com/NVIDIA/NeMo/blob/main + /nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py#L535-L552 .""" + if losses_reduced_per_micro_batch: + if "avg" in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced["avg"] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.concat(loss_tensors_list) + + return loss_tensor.mean() + + # Get the total loss since micro batches sizes are not uniform + loss_sum_tensors_list: List[torch.Tensor] = [ + loss_sum["loss_sum_and_ub_size"] + for loss_sum in losses_reduced_per_micro_batch + if loss_sum["loss_sum_and_ub_size"][1] > 0 + ] + loss_sum = ( + torch.vstack(loss_sum_tensors_list).sum(dim=0) + if len(loss_sum_tensors_list) > 0 + else torch.tensor([0.0, 0.0], device=torch.cuda.current_device()) + ) + return loss_sum + + return torch.tensor(0.0, device=torch.cuda.current_device()) + + class BERTInBatchExclusiveHardNegativesRankingLoss(MegatronLossReduction): """ This loss uses in-batch negative samples + hard-negative samples. diff --git a/nemo/collections/llm/gpt/data/__init__.py b/nemo/collections/llm/gpt/data/__init__.py index 89b5a3dc4b54..fd8935d9c11a 100644 --- a/nemo/collections/llm/gpt/data/__init__.py +++ b/nemo/collections/llm/gpt/data/__init__.py @@ -19,6 +19,7 @@ from nemo.collections.llm.gpt.data.hf_dataset import HFDatasetDataModule from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.data.pre_training import PreTrainingDataModule, build_pretraining_datamodule +from nemo.collections.llm.gpt.data.retrieval import CustomRetrievalDataModule from nemo.collections.llm.gpt.data.squad import SquadDataModule __all__ = [ @@ -31,4 +32,5 @@ "PreTrainingDataModule", "build_pretraining_datamodule", "SquadDataModule", + "CustomRetrievalDataModule", ] diff --git a/nemo/collections/llm/gpt/data/retrieval.py b/nemo/collections/llm/gpt/data/retrieval.py new file mode 100644 index 000000000000..058068e811e0 --- /dev/null +++ b/nemo/collections/llm/gpt/data/retrieval.py @@ -0,0 +1,110 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os.path +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from datasets import Dataset + +from nemo.collections.llm.bert.data.fine_tuning import FineTuningDataModule +from nemo.collections.llm.gpt.data.core import get_dataset_root +from nemo.utils import logging + +if TYPE_CHECKING: + from nemo.collections.common.tokenizers import TokenizerSpec + from nemo.collections.llm.gpt.data.packed_sequence import PackedSequenceSpecs + + +# Custom Retrieval Data Module loaded with json file +class CustomRetrievalDataModule(FineTuningDataModule): + """ """ + + def __init__( + self, + data_root: str, + dataset_identifier: str = "custom_retrieval_dataset", + seq_length: int = 2048, + tokenizer: Optional["TokenizerSpec"] = None, + micro_batch_size: int = 4, + global_batch_size: int = 8, + rampup_batch_size: Optional[List[int]] = None, + force_redownload: bool = False, + delete_raw: bool = True, + seed: int = 1234, + memmap_workers: int = 1, + num_workers: int = 8, + pin_memory: bool = True, + persistent_workers: bool = False, + packed_sequence_specs: Optional["PackedSequenceSpecs"] = None, + query_key: str = "question", + pos_doc_key: str = "pos_doc", + neg_doc_key: str = "neg_doc", + dataset_kwargs: Optional[Dict[str, Any]] = None, + ): + self.force_redownload = force_redownload + self.delete_raw = delete_raw + + assert packed_sequence_specs is None, "RetrievalDataModule does not support packed sequences." + assert os.path.exists(data_root), "Data root does not exist." + self.query_key = query_key + self.pos_doc_key = pos_doc_key + self.neg_doc_key = neg_doc_key + self.unprocessed_root = data_root + super().__init__( + dataset_root=get_dataset_root(dataset_identifier), + seq_length=seq_length, + tokenizer=tokenizer, + micro_batch_size=micro_batch_size, + global_batch_size=global_batch_size, + rampup_batch_size=rampup_batch_size, + seed=seed, + memmap_workers=memmap_workers, + num_workers=num_workers, + pin_memory=pin_memory, + persistent_workers=persistent_workers, + dataset_kwargs=dataset_kwargs, + ) + + def prepare_data(self) -> None: + """Prepare data if not split already.""" + if not self.train_path.exists() or self.force_redownload: + self._preprocess_and_split_data() + super().prepare_data() + + def _preprocess_and_split_data(self, train_ratio: float = 0.95, val_ratio: float = 0.04): + logging.info(f"Preprocessing {self.__class__.__name__} to jsonl format and splitting...") + + test_ratio = 1 - train_ratio - val_ratio + save_splits = {} + dataset = Dataset.from_list(json.load(open(self.unprocessed_root, 'r'))) + split_dataset = dataset.train_test_split(test_size=val_ratio + test_ratio, seed=self.seed) + split_dataset2 = split_dataset['test'].train_test_split( + test_size=test_ratio / (val_ratio + test_ratio), seed=self.seed + ) + save_splits['training'] = split_dataset['train'] + save_splits['validation'] = split_dataset2['train'] + save_splits['test'] = split_dataset2['test'] + + for split_name, dataset in save_splits.items(): + output_file = self.dataset_root / f"{split_name}.jsonl" + with output_file.open("w", encoding="utf-8") as f: + for o in dataset: + # We only write one positive document for now + # All negative document are written + pos_doc = o[self.pos_doc_key][0] if isinstance(o[self.pos_doc_key], list) else o[self.pos_doc_key] + neg_doc = o[self.neg_doc_key] if isinstance(o[self.pos_doc_key], list) else [o[self.neg_doc_key]] + f.write(json.dumps({"query": o[self.query_key], "pos_doc": pos_doc, "neg_doc": neg_doc}) + "\n") + + logging.info(f"{split_name} split saved to {output_file}") diff --git a/nemo/collections/llm/gpt/model/__init__.py b/nemo/collections/llm/gpt/model/__init__.py index 4e9448eaef2c..d9ab48e0ea51 100644 --- a/nemo/collections/llm/gpt/model/__init__.py +++ b/nemo/collections/llm/gpt/model/__init__.py @@ -64,6 +64,7 @@ LlamaConfig, LlamaModel, ) +from nemo.collections.llm.gpt.model.llama_embedding import Llama32EmbeddingConfig1B, LlamaEmbeddingModel from nemo.collections.llm.gpt.model.mistral import MistralConfig7B, MistralModel, MistralNeMoConfig12B from nemo.collections.llm.gpt.model.mixtral import ( MixtralConfig, @@ -145,6 +146,8 @@ "Nemotron3Config22B", "Nemotron4Config340B", "NemotronModel", + "LlamaEmbeddingModel", + "Llama32EmbeddingConfig1B", "Phi3Config", "Phi3ConfigMini", "Phi3Model", diff --git a/nemo/collections/llm/gpt/model/hf_llama_embedding.py b/nemo/collections/llm/gpt/model/hf_llama_embedding.py new file mode 100644 index 000000000000..ba89626ff45f --- /dev/null +++ b/nemo/collections/llm/gpt/model/hf_llama_embedding.py @@ -0,0 +1,190 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Tuple, Union + +import torch +from torch import Tensor +from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss +from transformers.cache_utils import Cache +from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask +from transformers.modeling_outputs import SequenceClassifierOutputWithPast +from transformers.models.llama.configuration_llama import LlamaConfig +from transformers.models.llama.modeling_llama import LlamaForSequenceClassification, LlamaModel +from transformers.utils import logging + +logger = logging.get_logger(__name__) + + +def pool(last_hidden_states: Tensor, attention_mask: Tensor, pool_type: str) -> Tensor: + """Pooling on last_hidden_states without pad tokens.""" + last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) + + if pool_type == "avg": + emb = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None] + elif pool_type == "weighted_avg": + emb = last_hidden.sum(dim=1) + elif pool_type == "cls": + emb = last_hidden[:, 0] + elif pool_type == "last": + left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0] + if left_padding: + emb = last_hidden[:, -1] + else: + sequence_lengths = attention_mask.sum(dim=1) - 1 + batch_size = last_hidden.shape[0] + emb = last_hidden[torch.arange(batch_size, device=last_hidden.device), sequence_lengths] + else: + raise ValueError(f"pool_type {pool_type} not supported") + + return emb + + +class LlamaBidirectionalConfig(LlamaConfig): + """LLamaBidirectionalConfig for LlamaBidirectionalModel.""" + + model_type = "llama_bidirec" + + def __init__( + self, + pooling="avg", + temperature=1.0, + **kwargs, + ): + self.pooling = pooling + self.temperature = temperature + super().__init__( + **kwargs, + ) + + +class LlamaBidirectionalModel(LlamaModel): + """LlamaBidirectionalModel. + Attention has been adjusted to bidirectional. + """ + + config_class = LlamaBidirectionalConfig + + def __init__(self, config: LlamaConfig): + super().__init__(config) + for layer in self.layers: + layer.self_attn.is_causal = False + self.config._attn_implementation = "eager" + + def _update_causal_mask( + self, + attention_mask: torch.Tensor, + input_tensor: torch.Tensor, + cache_position: torch.Tensor, + past_key_values: Cache, + output_attentions: bool, + ): + # Generates bi-directional attention. + causal_mask = _prepare_4d_attention_mask(attention_mask, input_tensor.dtype) + return causal_mask + + +class LlamaBidirectionalForSequenceClassification(LlamaForSequenceClassification): + """The LLaMa Model transformer with a sequence classification head on top (linear layer).""" + + config_class = LlamaBidirectionalConfig + + def __init__(self, config): + super().__init__(config) + # Releasing the parameters of LlamaModel + # created by parent LlamaForSequenceClassification + del self.model + + self.model = LlamaBidirectionalModel(config) + + # Initialize weights and apply final processing + self.post_init() + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + transformer_outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + hidden_states = transformer_outputs[0] + + pooled_hidden_states = pool( + last_hidden_states=hidden_states, + attention_mask=attention_mask, + pool_type=self.config.pooling, + ) + + pooled_logits = self.score(pooled_hidden_states) + pooled_logits = pooled_logits / self.config.temperature + + loss = None + if labels is not None: + labels = labels.to(logits.device) + if self.config.problem_type is None: + if self.num_labels == 1: + self.config.problem_type = "regression" + elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int): + self.config.problem_type = "single_label_classification" + else: + self.config.problem_type = "multi_label_classification" + + if self.config.problem_type == "regression": + loss_fct = MSELoss() + if self.num_labels == 1: + loss = loss_fct(pooled_logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(pooled_logits, labels) + elif self.config.problem_type == "single_label_classification": + loss_fct = CrossEntropyLoss() + loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1)) + elif self.config.problem_type == "multi_label_classification": + loss_fct = BCEWithLogitsLoss() + loss = loss_fct(pooled_logits, labels) + if not return_dict: + output = (pooled_logits,) + transformer_outputs[1:] + return ((loss,) + output) if loss is not None else output + + return SequenceClassifierOutputWithPast( + loss=loss, + logits=pooled_logits, + past_key_values=transformer_outputs.past_key_values, + hidden_states=transformer_outputs.hidden_states, + attentions=transformer_outputs.attentions, + ) diff --git a/nemo/collections/llm/gpt/model/llama_embedding.py b/nemo/collections/llm/gpt/model/llama_embedding.py new file mode 100644 index 000000000000..3d8edcc5121a --- /dev/null +++ b/nemo/collections/llm/gpt/model/llama_embedding.py @@ -0,0 +1,401 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Annotated, Callable, Dict, Literal, Optional, Union + +import einops +import lightning.pytorch as L +import torch +import torch.nn.functional as F +from megatron.core import parallel_state +from megatron.core.transformer.enums import AttnMaskType +from megatron.core.transformer.spec_utils import ModuleSpec +from torch import Tensor, nn + +import nemo.collections.llm.gpt.model.base as GPTBase +from nemo.collections.llm.bert.loss import BERTInBatchExclusiveHardNegativesRankingLoss, HardNegativeRankingLoss +from nemo.collections.llm.gpt.model import GPTConfig +from nemo.collections.llm.gpt.model.llama import HFLlamaImporter, Llama32Config1B, LlamaConfig, LlamaModel +from nemo.collections.llm.utils import Config +from nemo.lightning import OptimizerModule, io +from nemo.lightning.pytorch.utils import dtype_from_hf +from nemo.utils.import_utils import safe_import + +if TYPE_CHECKING: + from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel + + from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec +_, HAVE_TE = safe_import("transformer_engine") + + +def _local_layer_spec(config: "GPTConfig") -> ModuleSpec: + gpt_layer_spec = GPTBase.local_layer_spec(config) + gpt_layer_spec.submodules.self_attention.params['attn_mask_type'] = AttnMaskType.padding + return gpt_layer_spec + + +def _transformer_engine_layer_spec(config: "GPTConfig") -> ModuleSpec: + gpt_layer_spec = GPTBase.transformer_engine_layer_spec(config) + gpt_layer_spec.submodules.self_attention.params['attn_mask_type'] = AttnMaskType.padding + return gpt_layer_spec + + +def get_nv_embedding_layer_spec(config): + """Customized Layer Spec for NV Embedding Llama Model. + Bidirectional attention is enabled instead of causal masking. + """ + if HAVE_TE: + return _transformer_engine_layer_spec(config) + else: + return _local_layer_spec(config) + + +def nv_embedding_data_step(dataloder_iter) -> Dict[str, torch.Tensor]: + """Setup NVEmbedding Llama Model dataloader batch.""" + batch = next(dataloder_iter) + + _batch: dict + if isinstance(batch, tuple) and len(batch) == 3: + _batch = batch[0] + else: + _batch = batch + + required_keys = set() + required_keys.add("attention_mask") + + if parallel_state.is_pipeline_first_stage(): + required_keys.add("input_ids") + required_keys.add("position_ids") + + _batch = {key: val.cuda(non_blocking=True) if key in required_keys else None for key, val in _batch.items()} + # slice batch along sequence dimension for context parallelism + output = GPTBase.get_batch_on_this_context_parallel_rank(_batch) + + return output + + +def nv_embedding_forward_step(model: L.LightningModule, batch: Dict[str, torch.Tensor]) -> torch.Tensor: + """ + This subsets the batch keys to the ones actually used by forward pass of the model, + and then calls the model's forward pass. if "cu_seqsens" are defined in the batch, + then the packed sequence parameters are also passed to the model for forward pass efficiency. + """ + forward_args = { + "input_ids": batch["input_ids"], + "attention_mask": batch["attention_mask"], + "position_ids": batch["position_ids"], + } + emb = model.encode(**forward_args) + return emb + + +@dataclass +class Llama32EmbeddingConfig1B(Llama32Config1B): + """Llama3.2 Embedding 1B Config""" + + transformer_layer_spec: Union[ModuleSpec, Callable[["GPTConfig"], ModuleSpec]] = get_nv_embedding_layer_spec + forward_step_fn: Callable = nv_embedding_forward_step + data_step_fn: Callable = nv_embedding_data_step + + # Training Configs + truncation_method: Literal["left", "right"] = 'right' + num_hard_negatives: int = 4 + ce_loss_scale: float = 50 + label_smoothing: float = 0.0 + in_batch_negatives: bool = False + negative_sample_strategy: Literal["random", "first"] = 'first' + add_bos: bool = True + add_eos: bool = False + + def configure_model(self, tokenizer, pre_process=None, post_process=None) -> "MCoreGPTModel": + """Configure the NV Embedding Llama3.2 1B Model""" + model = super().configure_model(tokenizer, pre_process, post_process) + # post_process need to be overwritten to False after model init because + # final_layernorm is still needed and it will only be initialized when post_process is True in Mcore. + # And for forward(), we do not want to run through output_layer thus setting post_process to False. + model.post_process = False + return model + + +def _average_pool(last_hidden_states: Tensor, attention_mask: Tensor): + """Average the hidden states on the non-masking tokens.""" + # [sq, b, h] -> [b, sq, h] + last_hidden_states = einops.rearrange(last_hidden_states, 's b h -> b s h') + last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) + return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None] + + +class LlamaEmbeddingModel(LlamaModel): + """NV Embedding Llama Model""" + + def __init__( + self, + config: Annotated[Optional[LlamaConfig], Config[LlamaConfig]] = None, + optim: Optional[OptimizerModule] = None, + tokenizer: Optional["TokenizerSpec"] = None, + model_transform: Optional[Callable[[nn.Module], nn.Module]] = None, + ): + super().__init__(config or LlamaConfig(), optim=optim, tokenizer=tokenizer, model_transform=model_transform) + + @property + def dataset_kwargs(self): + """Getter for dataset_kwargs from model config""" + return { + 'num_hard_negatives': self.config.num_hard_negatives, + 'negative_sample_strategy': self.config.negative_sample_strategy, + 'add_bos': self.config.add_bos, + 'add_eos': self.config.add_eos, + } + + def encode( + self, + input_ids: torch.LongTensor, + position_ids: torch.LongTensor, + attention_mask: torch.LongTensor, + decoder_input: Optional[torch.Tensor] = None, + ): + """Generate the embedding for the inputs. + It runs the forward and apply average pooling on the last hidden states of the model. + """ + if attention_mask.ndim == 2: + # extend attention mask to [b, 1, 1, sq] + # Also convert attention mask to binary + extended_mask = attention_mask.unsqueeze(1).unsqueeze(1) < 0.5 + elif attention_mask.ndim == 4: + assert attention_mask.shape[1] == 1 and attention_mask.shape[2] == 1, "Attention mask shape incorrect" + extended_mask = attention_mask + # Squeeze attention mask to [b, sq] for averaging pooling later + + attention_mask = extended_mask.squeeze() < 0.5 + else: + raise ValueError("Attention_mask shape incorrect") + + output = self.forward( + input_ids=input_ids, + position_ids=position_ids, + attention_mask=extended_mask, + decoder_input=decoder_input, + ) + embeddings = _average_pool(output, attention_mask) + embeddings = F.normalize(embeddings, p=2, dim=1) + return embeddings + + @property + def training_loss_reduction(self) -> BERTInBatchExclusiveHardNegativesRankingLoss: # pylint: disable=C0115,C0116 + if not self._training_loss_reduction: + if self.config.in_batch_negatives: + loss_func = BERTInBatchExclusiveHardNegativesRankingLoss + else: + loss_func = HardNegativeRankingLoss + self._training_loss_reduction = loss_func( + validation_step=False, + num_hard_negatives=self.config.num_hard_negatives, + scale=self.config.ce_loss_scale, + label_smoothing=self.config.label_smoothing, + ) + + return self._training_loss_reduction + + @property + def validation_loss_reduction(self) -> BERTInBatchExclusiveHardNegativesRankingLoss: # pylint: disable=C0115,C0116 + if not self._validation_loss_reduction: + if self.config.in_batch_negatives: + loss_func = BERTInBatchExclusiveHardNegativesRankingLoss + else: + loss_func = HardNegativeRankingLoss + self._validation_loss_reduction = loss_func( + validation_step=True, + num_hard_negatives=self.config.num_hard_negatives, + scale=self.config.ce_loss_scale, + label_smoothing=self.config.label_smoothing, + ) + + return self._validation_loss_reduction + + +@io.model_importer(LlamaEmbeddingModel, "hf") +class LlamaEmbeddingImporter(HFLlamaImporter): + """HF Importer for Llama Embedding Model""" + + def init(self) -> LlamaEmbeddingModel: + return LlamaEmbeddingModel(self.config, tokenizer=self.tokenizer) + + @property + def config(self) -> Llama32Config1B: + # pylint : disable=C0116 + from transformers import LlamaConfig as HFLlamaConfig + + source = HFLlamaConfig.from_pretrained(str(self)) + + def make_vocab_size_divisible_by(vocab_size): + base = 128 + while vocab_size % base != 0: + base //= 2 + return base + + output = Llama32EmbeddingConfig1B( + num_layers=source.num_hidden_layers, + hidden_size=source.hidden_size, + ffn_hidden_size=source.intermediate_size, + num_attention_heads=source.num_attention_heads, + init_method_std=source.initializer_range, + layernorm_epsilon=source.rms_norm_eps, + num_query_groups=source.num_key_value_heads, + rotary_base=source.rope_theta, + gated_linear_unit=True, + make_vocab_size_divisible_by=make_vocab_size_divisible_by(source.vocab_size), + share_embeddings_and_output_weights=getattr(source, "tie_word_embeddings", False), + fp16=(dtype_from_hf(source) == torch.float16), + bf16=(dtype_from_hf(source) == torch.bfloat16), + params_dtype=dtype_from_hf(source), + ) + + return output + + +@io.model_exporter(LlamaEmbeddingModel, "hf") +class LlamaEmbeddingExporter(io.ModelConnector[LlamaEmbeddingModel, "LlamaBidirectionalModel"]): + """HF Exporter for NV Embedding Llama Model. + Note that NV Embedding LLama uses customized LlamaBidirectionalConfig config. + """ + + def init(self, dtype=torch.bfloat16) -> "LlamaForCausalLM": + from transformers.modeling_utils import no_init_weights + + from nemo.collections.llm.gpt.model.hf_llama_embedding import LlamaBidirectionalModel + + LlamaBidirectionalModel.register_for_auto_class("AutoModel") + with no_init_weights(True): + return LlamaBidirectionalModel._from_config(self.config, torch_dtype=dtype) + + def apply(self, output_path: Path) -> Path: + source, _ = self.nemo_load(str(self)) + source_dtype = source.module.embedding.word_embeddings.weight.dtype + target = self.init(source_dtype) + target = self.convert_state(source, target) + + target = target.cpu() + target.save_pretrained(output_path) + try: + tokenizer = self.tokenizer.tokenizer + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + tokenizer.padding_side = source.config.truncation_method + + tokenizer.save_pretrained(output_path) + except Exception: + logging.warning("Failed to save tokenizer") + + return output_path + + @property + def config(self): + """Get HF NV Embedding Llama Config.""" + source: LlamaConfig = io.load_context(str(self), subpath="model.config") + + from nemo.collections.llm.gpt.model.hf_llama_embedding import LlamaBidirectionalConfig + + LlamaBidirectionalConfig.register_for_auto_class("AutoConfig") + return LlamaBidirectionalConfig( + num_hidden_layers=source.num_layers, + hidden_size=source.hidden_size, + intermediate_size=source.ffn_hidden_size, + num_attention_heads=source.num_attention_heads, + max_position_embeddings=source.seq_length, + initializer_range=source.init_method_std, + rms_norm_eps=source.layernorm_epsilon, + num_key_value_heads=source.num_query_groups, + rope_theta=source.rotary_base, + vocab_size=self.tokenizer.vocab_size, + tie_word_embeddings=source.share_embeddings_and_output_weights, + ) + + def convert_state(self, source, target): + """Convert NeMo State dict to HF.""" + mapping = { + "decoder.layers.*.self_attention.linear_proj.weight": "layers.*.self_attn.o_proj.weight", + "decoder.layers.*.mlp.linear_fc2.weight": "layers.*.mlp.down_proj.weight", + "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "layers.*.input_layernorm.weight", + "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "layers.*.post_attention_layernorm.weight", + "decoder.final_layernorm.weight": "norm.weight", + } + transforms = [_export_qkv, _export_linear_fc1, _export_embedding] + + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=transforms, + ) + + @property + def tokenizer(self) -> "TokenizerSpec": + """Get NeMo Tokenizer""" + return io.load_context(str(self), subpath="model").tokenizer + + +@io.state_transform( + source_key="decoder.layers.*.self_attention.linear_qkv.weight", + target_key=( + "layers.*.self_attn.q_proj.weight", + "layers.*.self_attn.k_proj.weight", + "layers.*.self_attn.v_proj.weight", + ), +) +def _export_qkv(ctx: io.TransformCTX, linear_qkv): + megatron_config = ctx.source.config + + head_num = megatron_config.num_attention_heads + num_query_groups = megatron_config.num_query_groups + heads_per_group = head_num // num_query_groups + hidden_size = megatron_config.hidden_size + head_size = megatron_config.kv_channels + qkv_total_dim = head_num + 2 * num_query_groups + + linear_qkv = linear_qkv.reshape([qkv_total_dim, head_size, hidden_size]) + q_slice = torch.cat( + [ + torch.arange((heads_per_group + 2) * i, (heads_per_group + 2) * i + heads_per_group) + for i in range(num_query_groups) + ] + ) + k_slice = torch.arange(heads_per_group, qkv_total_dim, (heads_per_group + 2)) + v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2)) + + q_proj = linear_qkv[q_slice].reshape(-1, hidden_size).cpu() + k_proj = linear_qkv[k_slice].reshape(-1, hidden_size).cpu() + v_proj = linear_qkv[v_slice].reshape(-1, hidden_size).cpu() + + return q_proj, k_proj, v_proj + + +@io.state_transform( + source_key="embedding.word_embeddings.weight", + target_key="embed_tokens.weight", +) +def _export_embedding(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] + + +@io.state_transform( + source_key="decoder.layers.*.mlp.linear_fc1.weight", + target_key=("layers.*.mlp.gate_proj.weight", "layers.*.mlp.up_proj.weight"), +) +def _export_linear_fc1(linear_fc1): + gate_proj, up_proj = torch.chunk(linear_fc1, 2, dim=0) + + return gate_proj, up_proj diff --git a/nemo/collections/llm/inference/base.py b/nemo/collections/llm/inference/base.py index 0a87480f31d9..42e70d8c31d6 100644 --- a/nemo/collections/llm/inference/base.py +++ b/nemo/collections/llm/inference/base.py @@ -149,6 +149,7 @@ def _setup_trainer_and_restore_model(path: Path, trainer: nl.Trainer, model: pl. trainer.strategy._setup_optimizers = False trainer.ckpt_path = None trainer.strategy.connect(model) + model.trainer = trainer if trainer.strategy.launcher is not None: trainer.strategy.launcher.launch(lambda: None, trainer=trainer) trainer.strategy.setup_environment() diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index 09291e4165be..0892bb10f16b 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -38,6 +38,7 @@ llama31_405b, llama32_1b, llama32_3b, + llama_embedding_1b, mamba2_1_3b, mamba2_2_7b, mamba2_8b, diff --git a/nemo/collections/llm/recipes/llama_embedding_1b.py b/nemo/collections/llm/recipes/llama_embedding_1b.py new file mode 100644 index 000000000000..4a26fcc563d3 --- /dev/null +++ b/nemo/collections/llm/recipes/llama_embedding_1b.py @@ -0,0 +1,286 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Optional + +import lightning.pytorch as pl +import nemo_run as run +import torch +from lightning.pytorch.callbacks.callback import Callback +from megatron.core.distributed import DistributedDataParallelConfig + +from nemo import lightning as nl +from nemo.collections import llm +from nemo.collections.llm import Llama32EmbeddingConfig1B, LlamaEmbeddingModel +from nemo.collections.llm.api import finetune +from nemo.collections.llm.peft import PEFT_STR2CLS +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed +from nemo.lightning.pytorch.callbacks.garbage_collection import GarbageCollectionCallback +from nemo.lightning.pytorch.callbacks.megatron_comm_overlap import MegatronCommOverlapCallback +from nemo.utils.exp_manager import TimingCallback + +NAME = "nvembed_llama_1b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a NVEmbed Llama3.2 1B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the NVEmbed Llama3.2 1B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=nvembed_llama_1b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + return run.Config(LlamaEmbeddingModel, config=run.Config(Llama32EmbeddingConfig1B)) + + +def trainer( + tensor_parallelism: int = 1, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 2, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for NVEmbed Llama3.2 1B model. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + + Examples: + CLI usage: + $ nemo llm pretrain trainer=nvembed_llama_1b ... + + Python API usage: + >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8) + >>> print(trainer_config) + + Note: + For more information on distributed training strategies, refer to the + NeMo documentation on multi-GPU and multi-node training. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ddp=run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=True, + overlap_grad_reduce=True, + overlap_param_gather=True, + average_in_collective=True, + ), + ) + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + accumulate_grad_batches=1, + callbacks=callbacks, + devices=num_gpus_per_node, + limit_test_batches=50, + limit_val_batches=32, + log_every_n_steps=10, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=bf16_mixed(), + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=2000, + ) + + return trainer + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + resume_path: str = "meta-llama/Llama-3.2-1B", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + micro_batch_size: int = 4, + global_batch_size: int = 64, + peft_scheme: Optional[str] = 'lora', + seq_length: Optional[int] = None, + packed_sequence: Optional[bool] = None, +) -> run.Partial: + """ + Create a fine-tuning recipe for NVEmbed Llama3.2 1B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + resume_path (str): Path to the Huggingface model or pretrained distributed checkpoint for resume + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + micro_batch_size (int): Size of micro batch. + global_batch_size (int): Size of global batch. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. + Allowed values: 'lora'/'dora'/'none'/None. + seq_length (int): Maximum number of tokens per microbatch. + packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given + maximum seq_length for better efficiency. pack sequence is not supported for embedding model training. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory nvembed_llama_1b + + Python API usage: + >>> recipe = finetune_recipe(name="nvembed_llama_1b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SPECTER dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + if seq_length is None: + seq_length = 512 + + assert packed_sequence is None, 'pack_sequence is not supported for Embedding model finetuning.' + recipe = default_finetune_recipe(model(), resume_path, dir, name, num_nodes, num_gpus_per_node, packed_sequence) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 1 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() in ['lora', 'dora']: + recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()]) + recipe.peft.dim = 8 + recipe.peft.alpha = 16 + recipe.optim.config.use_distributed_optimizer = False + + # some settings currently do not function correctly with LoRA + recipe.model.config.cross_entropy_loss_fusion = False + + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # Sequence length settings in the model and dataset must agree + recipe.model.config.seq_length = seq_length + # Use Specter Dataset as the default for finetuning + recipe.data = run.Config( + llm.SpecterDataModule, + seq_length=seq_length, + micro_batch_size=micro_batch_size, + global_batch_size=global_batch_size, + dataset_kwargs={ + 'num_hard_negatives': recipe.model.config.num_hard_negatives, + 'negative_sample_strategy': recipe.model.config.negative_sample_strategy, + 'add_bos': recipe.model.config.add_bos, + 'add_eos': recipe.model.config.add_eos, + }, + ) + + return recipe + + +def finetune_performance_optimizations( + recipe: run.Partial, + peft_scheme: str, +) -> run.Partial: + """ + Modify the given recipe to optimize settings for performance. + + This method enables performance optimizations that may not be suitable for all use cases. + Intended to build upon the standard fine-tuning recipe. + + Args: + recipe (run.Partial): Base fine-tuning recipe to which performance optimizations will be added + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. + Allowed values: 'lora'/'dora'/'none'/None. + + Returns: + run.Partial: Partial configuration for performance-optimized fine-tuning. + + Note: + Use this method with caution and only when you need maximum performance. + It may not be suitable for all hardware configurations or use cases. + """ + recipe.trainer.strategy.tensor_model_parallel_size = 1 + + if not hasattr(recipe.trainer, "callbacks"): + recipe.trainer.callbacks = [] + + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.plugins.grad_reduce_in_fp32 = False + recipe.trainer.strategy.ddp = run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=False, + overlap_grad_reduce=True, + overlap_param_gather=True, + average_in_collective=True, + ) + recipe.trainer.callbacks.append( + run.Config( + MegatronCommOverlapCallback, + tp_comm_overlap=False, + ) + ) + else: + recipe.peft.target_modules = ['linear_qkv'] + + recipe.trainer.callbacks.append(run.Config(TimingCallback)) + recipe.trainer.callbacks.append( + run.Config( + GarbageCollectionCallback, + 100, + 100, + ) + ) + + return recipe diff --git a/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py b/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py index 8bca618dce3d..0da7af6ed96d 100644 --- a/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py +++ b/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. from random import choices, sample -from typing import Mapping, Optional +from typing import Literal, Mapping, Optional import datasets import numpy as np @@ -32,6 +32,10 @@ class BertEmbeddingDataset(Dataset): + """ + Embedding Dataset Class. + """ + def __init__( self, file_path: str, @@ -49,19 +53,28 @@ def __init__( special_tokens: Optional[Mapping[str, str]] = None, # special tokens, a dictory of {token_type: token} data_type: str = 'train', # train, query or doc num_hard_negatives: int = 4, + negative_sample_strategy: Literal["random", "first"] = 'first', ): """ file_path: Path to a JSONL dataset with (query,pos_doc,neg_doc) triplets in jsonl format. - tokenizer: Tokenizer for the dataset. Instance of a class that inherits TokenizerSpec (ex: YTTM, SentencePiece). - max_seq_length (int): maximum sequence length for each dataset examples. Examples will either be truncated to fit this length or dropped if they cannot be truncated. - min_seq_length (int): min length of each data example in the dataset. Data examples will be dropped if they do not meet the min length requirements. + tokenizer: Tokenizer for the dataset. Instance of a class that inherits TokenizerSpec + (ex: YTTM, SentencePiece). + max_seq_length (int): maximum sequence length for each dataset examples. + Examples will either be truncated to fit this length or dropped if they cannot be truncated. + min_seq_length (int): min length of each data example in the dataset. + Data examples will be dropped if they do not meet the min length requirements. add_bos (bool): Whether to add a beginning of sentence token to each data example add_eos (bool): Whether to add an end of sentence token to each data example seed: Random seed for data shuffling. - max_num_samples: Maximum number of samples to load. This can be > dataset length if you want to oversample data. If None, all samples will be loaded. - index_mapping_dir: Directory to save the index mapping to. If None, will write to the same folder as the dataset. + max_num_samples: Maximum number of samples to load. This can be > dataset length + if you want to oversample data. If None, all samples will be loaded. + index_mapping_dir: Directory to save the index mapping to. + If None, will write to the same folder as the dataset. truncation_method: Truncation from which position. Options: ['left', 'right'] - special_tokens: special tokens for the chat prompts, a dictionary of {token_type: token}. Default: {'system_turn_start': '', 'turn_start': '', 'label_start': '', 'end_of_turn': '\n', "end_of_name": "\n"} + special_tokens: special tokens for the chat prompts, a dictionary of {token_type: token}. + Default: {'system_turn_start': '', 'turn_start': '', + 'label_start': '', 'end_of_turn': '\n', "end_of_name": "\n"} + negative_sample_strategy: Strategy for negative samples. Options: ['random', 'first'] """ # TODO: lot of copy-paste from GPTSFDDataset, should refactor both to use a common base class (@adithyare) self.tokenizer = tokenizer @@ -75,6 +88,14 @@ def __init__( self.index_mapping_dir = index_mapping_dir self.virtual_tokens = virtual_tokens self.truncation_method = truncation_method + self.pad_token_id = self.tokenizer.pad_id if self.tokenizer.pad_id else self.tokenizer.eos_id + self.negative_sample_strategy = negative_sample_strategy + assert ( + truncation_method == 'left' or truncation_method == 'right' + ), 'truncation_method must be either "left" or "right"' + assert ( + negative_sample_strategy == 'random' or negative_sample_strategy == 'first' + ), 'negative_sample_strategy must be either "random" or "first"' if special_tokens is None: self.special_tokens = { "system_turn_start": "", @@ -98,6 +119,13 @@ def __init__( # Will be None after this call if `max_num_samples` is None self.samples_mapping = None self._build_samples_mapping() + logging.info( + f"Creating EmbeddingDataset with seed={self.seed},\n" + f"add_bos={self.add_bos}, add_eos={self.add_eos},\n" + f"max_seq_length={self.max_seq_length}, min_seq_length={self.min_seq_length},\n" + f"pad_token_id={self.pad_token_id}, negative_sample_strategy={self.negative_sample_strategy},\n" + f"num_hard_negatives={self.num_hard_negatives}." + ) def _build_samples_mapping(self): if self.max_num_samples is not None: @@ -169,8 +197,13 @@ def _process_example(self, example): # sample rest with replacement nd = nd + choices(example['neg_doc'], k=self.num_hard_negatives - len(example['neg_doc'])) else: - # sample without replacement - nd = sample(example['neg_doc'], k=self.num_hard_negatives) + if self.negative_sample_strategy == 'random': + # sample without replacement + # Choose the first self.num_hard_negatives + nd = sample(example['neg_doc'], k=self.num_hard_negatives) + else: + # Choose the first self.num_hard_negatives samples + nd = example['neg_doc'][: self.num_hard_negatives] assert len(nd) == self.num_hard_negatives, "Error in sampling required number of hard negatives" nd = [self.tokenizer.text_to_ids("passage: " + ex.strip()) for ex in nd] @@ -228,27 +261,17 @@ def _maybe_cast_to_list(self, x): def _ceil_to_nearest(self, n, m): return (n + m - 1) // m * m - def _collate_item(self, item, max_length, pad_id): + def _collate_item(self, item, max_length): item = self._maybe_cast_to_list(item) - # max_length = max([len(x) for x in item]) if item else 0 - # here [0] should be tokenizer.pad_id - item = [x + [pad_id] * (max_length - len(x)) for x in item] + pad_id = self.pad_token_id + if self.truncation_method == 'left': + item = [[pad_id] * (max_length - len(x)) + x for x in item] + else: + item = [x + [pad_id] * (max_length - len(x)) for x in item] return item @torch.no_grad() - def _create_attention_mask(self, max_length): - """Create `attention_mask`. - Args: - input_ids: A 1D tensor that holds the indices of tokens. - """ - # seq_length = len(input_ids) - # `attention_mask` has the shape of [1, seq_length, seq_length] - attention_mask = torch.tril(torch.ones((max_length, max_length))).unsqueeze(0) - attention_mask = attention_mask < 0.5 - return attention_mask - - @torch.no_grad() - def _create_attention_mask2(self, max_length, item_lengh): + def _create_attention_mask2(self, max_length, item_length): """Create `attention_mask`. Args: input_ids: A 1D tensor that holds the indices of tokens. @@ -256,10 +279,20 @@ def _create_attention_mask2(self, max_length, item_lengh): # seq_length = len(input_ids) # `attention_mask` has the shape of [1, seq_length, seq_length] attention_mask = torch.zeros(max_length) - attention_mask[:item_lengh] = 1 + if self.truncation_method == 'left': + # input ids: [pad] [pad] token token | + # attention mask: 0 0 1 1 + attention_mask[max_length - item_length :] = 1 + else: + # input ids: token token [pad] [pad] | + # attention mask: 1 1 0 0 + attention_mask[:item_length] = 1 return attention_mask - def collate_fn(self, batch): + def _collate_fn(self, batch): + """ + Collate query passage together + """ input_ids = [] metadata = [] lengths = [] @@ -295,7 +328,7 @@ def collate_fn(self, batch): attention_mask = torch.stack(attention_mask) position_ids = [list(range(max_length)) for _ in batch] position_ids = torch.LongTensor(position_ids) - input_ids = torch.LongTensor(self._collate_item(input_ids, max_length=max_length, pad_id=0)) + input_ids = torch.LongTensor(self._collate_item(input_ids, max_length=max_length)) lengths = torch.LongTensor(lengths) - 1 # subtract 1 to account for the eos token processed_batch = { @@ -303,6 +336,7 @@ def collate_fn(self, batch): 'token_type_ids': torch.zeros_like(input_ids), 'attention_mask': attention_mask, 'metadata': metadata, + 'position_ids': position_ids, } return processed_batch diff --git a/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py b/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py index 7c3f3c194f14..bc4d103285a7 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py @@ -111,6 +111,7 @@ def forward( attention_bias=None, inference_params=None, packed_seq_params=None, + sequence_len_offset=None, ): # hidden_states: [s, b, h] diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py index 1def214113ee..955bc659eb42 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py @@ -255,6 +255,7 @@ def forward( attention_bias=None, inference_params=None, packed_seq_params=None, # TODO: handle this + sequence_len_offset=None, # TODO: handle this ): # Use is_first_microbatch argument during CUDA graph capture. Use self.is_first_microbatch otherwise. hidden_states = super().forward( diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py b/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py index bba990750adb..e426212bcca6 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py @@ -85,6 +85,7 @@ def forward( attention_bias: Tensor = None, inference_params: InferenceParams = None, packed_seq_params: PackedSeqParams = None, + sequence_len_offset=None, ): hidden_states = super().forward( hidden_states=hidden_states, @@ -234,6 +235,7 @@ def forward( rotary_pos_cos=None, rotary_pos_sin=None, attention_bias=None, + sequence_len_offset=None, ): # hidden_states: [sq, b, h] diff --git a/nemo/collections/vlm/mllama/model/vision.py b/nemo/collections/vlm/mllama/model/vision.py index bb58ad093cd6..6ecd51ecf29d 100644 --- a/nemo/collections/vlm/mllama/model/vision.py +++ b/nemo/collections/vlm/mllama/model/vision.py @@ -23,7 +23,6 @@ import torch.nn.functional as F from megatron.core import InferenceParams, parallel_state, tensor_parallel from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add - from megatron.core.packed_seq_params import PackedSeqParams from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules from megatron.core.transformer.custom_layers.transformer_engine import ( @@ -494,6 +493,7 @@ def forward( attention_bias=None, inference_params=None, packed_seq_params=None, + sequence_len_offset=None, ): """Forward.""" # hidden_states: [s, b, h] diff --git a/nemo/lightning/pytorch/utils.py b/nemo/lightning/pytorch/utils.py index 8072f10d7a45..a1f774cd11bc 100644 --- a/nemo/lightning/pytorch/utils.py +++ b/nemo/lightning/pytorch/utils.py @@ -38,7 +38,7 @@ def dtype_from_str(dtype): assert isinstance(dtype, str) if dtype in ["float16", "fp16", "16", "16-mixed"]: return torch.float16 - elif dtype == ["bfloat16", "bf16-mixed"]: + elif dtype in ["bfloat16", "bf16-mixed"]: return torch.bfloat16 else: return torch.float32 @@ -62,11 +62,7 @@ def is_trainer_attached(model: pl.LightningModule): """ Returns true if trainer is attached to a model """ - try: - trainer = model.trainer - return True - except (AttributeError, RuntimeError): - return False + return hasattr(model, 'trainer') def get_huggingface_model_from_trainer(trainer: 'lightning.pytorch.Trainer') -> 'nn.Module': diff --git a/nemo/lightning/run/plugins.py b/nemo/lightning/run/plugins.py index 7f98ae8f85d2..664d8cd1961f 100644 --- a/nemo/lightning/run/plugins.py +++ b/nemo/lightning/run/plugins.py @@ -24,10 +24,9 @@ from lightning.pytorch.loggers import WandbLogger from nemo_run.core.serialization.yaml import YamlSerializer -from nemo.lightning.pytorch.callbacks import NsysCallback, PreemptionCallback +from nemo.lightning.pytorch.callbacks import MemoryProfileCallback, NsysCallback, PreemptionCallback from nemo.lightning.pytorch.strategies.megatron_strategy import MegatronStrategy from nemo.utils import logging - from nemo.utils.import_utils import safe_import res_module, HAVE_RES = safe_import('nvidia_resiliency_ext.ptl_resiliency') @@ -174,6 +173,34 @@ def setup(self, task: run.Partial | run.Script, executor: run.Executor): launcher.nsys_trace = self.nsys_trace or ["nvtx", "cuda"] +@dataclass(kw_only=True) +class MemoryProfilePlugin(run.Plugin): + """ + A plugin for memory profiling. + + The MemoryProfilePlugin allows you to profile a timeline of memory allocations during you run. + The memory profiling plugin creates snapshots during the entire training. You can specify which ranks to run the profiling. + + Args: + dir (str): Directory to store the memory profile dump .pickle files + ranks (Optional[list[int]]): The ranks on which to run the memory profiling. If not specified, + profiling will be run on rank 0. + """ + + dir: str + ranks: Optional[list[int]] = None + + def setup(self, task: run.Partial | run.Script, executor: run.Executor): + if isinstance(task, run.Partial): + memprof_callback = run.Config( + MemoryProfileCallback, + dir=self.dir, + ranks=self.ranks or [0], + ) + callbacks: list[run.Config[Callback]] = [memprof_callback] # type: ignore + _merge_callbacks(task, callbacks=callbacks) + + @dataclass(kw_only=True) class WandbPlugin(run.Plugin): """ diff --git a/nemo/package_info.py b/nemo/package_info.py index 1d69deff96d3..01fac3e22139 100644 --- a/nemo/package_info.py +++ b/nemo/package_info.py @@ -16,7 +16,7 @@ MAJOR = 2 MINOR = 2 PATCH = 0 -PRE_RELEASE = 'rc0' +PRE_RELEASE = 'rc1' # Use the following formatting: (major, minor, patch, pre-release) VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) diff --git a/reinstall.sh b/reinstall.sh index 030e6252a0c6..51b0749f6353 100755 --- a/reinstall.sh +++ b/reinstall.sh @@ -24,7 +24,7 @@ export NEMO_RUN_TAG=34259bd3e752fef94045a9a019e4aaf62bd11ce2 export APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c export CAUSAL_CONV_TAG=v1.2.2.post1 export MAMBA_TAG=v2.2.0 -export MCORE_TAG=4dc8977167d71f86bdec47a60a98e85c4cfa0031 +export MCORE_TAG=0e85db539cf16816ffced6e7dac644d91ffadc04 export NV_RESILIENCY_EXT_TAG=97aad77609d2e25ed38ac5c99f0c13f93c48464e if [ -n "${NVIDIA_PYTORCH_VERSION}" ]; then diff --git a/requirements/requirements_deploy.txt b/requirements/requirements_deploy.txt index a65b651a76c8..e557bdc08a01 100644 --- a/requirements/requirements_deploy.txt +++ b/requirements/requirements_deploy.txt @@ -3,4 +3,4 @@ nvidia-pytriton pydantic-settings tensorstore uvicorn -zarr +zarr>=2.18.2,<3.0.0 diff --git a/requirements/requirements_infer.txt b/requirements/requirements_infer.txt index 47daf571d26f..0a38d92555db 100644 --- a/requirements/requirements_infer.txt +++ b/requirements/requirements_infer.txt @@ -5,4 +5,4 @@ nvidia-pytriton pydantic-settings tensorstore uvicorn -zarr +zarr>=2.18.2,<3.0.0 diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index 6a0ae8adf66c..91005637c365 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -22,4 +22,4 @@ sacrebleu # manually install sacrebleu[ja] for Japanese support; MeCab is unsup sentence_transformers tensorstore tiktoken==0.7.0 -zarr +zarr>=2.18.2,<3.0.0 diff --git a/tests/collections/audio/test_audio_flowmatching.py b/tests/collections/audio/test_audio_flowmatching.py new file mode 100644 index 000000000000..baafc3d31076 --- /dev/null +++ b/tests/collections/audio/test_audio_flowmatching.py @@ -0,0 +1,54 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass + +import pytest +import torch + +from nemo.collections.audio.parts.submodules.flow import ConditionalFlowMatchingEulerSampler + +NUM_STEPS = [1, 5, 10, 20, 100] + + +@pytest.mark.parametrize("num_steps", NUM_STEPS) +def test_euler_sampler_nfe(num_steps): + """ + For this specific solver the number of steps should be equal to the number of function (estimator) evaluations + """ + + class IdentityEstimator(torch.nn.Module): + def forward(self, input, input_length, condition): + return input, input_length + + @dataclass + class ForwardCounterHook: + counter: int = 0 + + def __call__(self, *args, **kwargs): + self.counter += 1 + + estimator = IdentityEstimator() + counter_hook = ForwardCounterHook() + estimator.register_forward_hook(counter_hook) + + sampler = ConditionalFlowMatchingEulerSampler(estimator=estimator, num_steps=num_steps) + + b, c, d, l = 2, 3, 4, 5 + lengths = [5, 3] + init_state = torch.randn(b, c, d, l) + init_state_length = torch.LongTensor(lengths) + + sampler.forward(state=init_state, estimator_condition=None, state_length=init_state_length) + + assert counter_hook.counter == sampler.num_steps diff --git a/tests/collections/llm/megatron_mixtral_pretraining.py b/tests/collections/llm/megatron_mixtral_pretraining.py index 2a7b1fdfdad6..ee506474ab00 100644 --- a/tests/collections/llm/megatron_mixtral_pretraining.py +++ b/tests/collections/llm/megatron_mixtral_pretraining.py @@ -337,7 +337,15 @@ def main(args): for key, (shape, dtype, device) in expected_ckpt.items(): assert key in ckpt, f"Expected {key} to be in ckpt" assert isinstance(ckpt[key], torch.Tensor), f"Expected {key} to be a tensor" - assert ckpt[key].shape == shape, f"Expected {key} shapes to match {ckpt[key].shape} & {shape}" + + if len(shape) == 1 and key.startswith('optimizer.state'): + assert ckpt[key].shape == ( + 1, + shape[0], + ), f"Expected {key} shapes to match {ckpt[key].shape} & (1, {shape[0]})" + else: + assert ckpt[key].shape == shape, f"Expected {key} shapes to match {ckpt[key].shape} & {shape}" + assert ckpt[key].dtype == dtype, f"Expected {key} dtype to match {ckpt[key].dtype} & {dtype}" assert str(ckpt[key].device) == device, f"Expected {key} device to match {ckpt[key].device} & {device}" diff --git a/tests/collections/speechlm/hf/sft.py b/tests/collections/speechlm/hf/sft.py new file mode 100755 index 000000000000..41f626f10852 --- /dev/null +++ b/tests/collections/speechlm/hf/sft.py @@ -0,0 +1,129 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fiddle as fdl +import torch +from lhotse.dataset.collation import collate_matrices, collate_vectors +from omegaconf import OmegaConf + +from nemo import lightning as nl +from nemo.collections import speechlm +from nemo.collections.common.data.lhotse import get_lhotse_dataloader_from_config +from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer +from nemo.collections.speechlm.models import HFAutoModelForSpeechSeq2Seq + +torch.set_float32_matmul_precision("medium") + + +class LhotseHfNeMoDataset(torch.utils.data.Dataset): + def __init__(self, processor, tokenizer, decoder_mask_fill=-100): + super().__init__() + self.processor = processor + self.tokenizer = tokenizer + self.decoder_mask_fill = decoder_mask_fill + + def __getitem__(self, cuts): + features = [] + for cut in cuts: + audio = cut.load_audio() + features.append( + self.processor( + audio, + sampling_rate=cut.sampling_rate, + return_tensors="pt", + text=cut.supervisions[0].text, + ) + ) + + input_features = collate_matrices(tensors=[f["input_features"].squeeze(0) for f in features]) + labels = collate_vectors(tensors=[c.supervisions[0].tokens for c in cuts]) + decoder_input_ids = labels[:, :-1] + decoder_input_ids = decoder_input_ids.masked_fill( + decoder_input_ids == self.decoder_mask_fill, self.tokenizer.pad_id + ) + labels = labels[:, 1:].reshape(-1) + + return { + "input_features": input_features, + "labels": labels, + "decoder_input_ids": decoder_input_ids, + } + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + + # Models can be one of the supported ones by AutoModelForSpeechSeq2Seq such as + # openai/whisper-large-v3 and facebook/s2t-small-librispeech-asr + parser.add_argument('--model', default='openai/whisper-large-v3') + parser.add_argument('--strategy', type=str, default='auto', choices=['auto', 'ddp', 'fsdp']) + parser.add_argument('--devices', default=1) + parser.add_argument('--accelerator', default='gpu', choices=['gpu']) + parser.add_argument('--max-steps', type=int, default=100) + parser.add_argument('--model-save-path', type=str, default=None) + args = parser.parse_args() + + model = HFAutoModelForSpeechSeq2Seq(model_name=args.model) + model = model.to(torch.float) + processor = model.processor + tokenizer = AutoTokenizer(args.model, include_special_tokens=True) + + config = OmegaConf.create( + { + "cuts_path": "/home/TestData/speechlm/lhotse/libri/libri-train-5.jsonl.gz", + "sample_rate": 16000, + "shuffle": True, + "num_workers": 2, + "batch_size": 4, + "shuffle_buffer_size": 100, + } + ) + + train_dataloader = get_lhotse_dataloader_from_config( + config, + global_rank=0, + world_size=1, + dataset=LhotseHfNeMoDataset( + processor=processor, + tokenizer=tokenizer, + ), + tokenizer=tokenizer, + ) + + speechlm.api.finetune( + model=model, + data=train_dataloader, + trainer=nl.Trainer( + devices=args.devices, + max_steps=args.max_steps, + accelerator=args.accelerator, + strategy=args.strategy, + precision="bf16-mixed", + log_every_n_steps=1, + limit_val_batches=0.0, + num_sanity_val_steps=0, + accumulate_grad_batches=10, + gradient_clip_val=0.5, + use_distributed_sampler=False, + callbacks=[], + logger=None, + ), + optim=fdl.build(speechlm.adam.pytorch_adam_with_flat_lr(lr=1e-5)), + log=None, + ) + + if args.model_save_path is not None: + model.save_pretrained(args.model_save_path)