Merge branch 'main' into athitten/inference_bug

Signed-off-by: Chen Cui <[email protected]>
NVIDIA · Feb 7, 2025 · 07a1725 · 07a1725
2 parents a2a9ba9 + 1446c89
commit 07a1725
Show file tree

Hide file tree

Showing 76 changed files with 4,876 additions and 403 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,14 +1,21 @@
+> [!IMPORTANT]  
+> The `Update branch` button must only be pressed in very rare occassions.
+> An outdated branch is never blocking the merge of a PR.
+> Please reach out to the automation team before pressing that button.
+
 # What does this PR do ?
 
 Add a one line overview of what this PR aims to accomplish.
 
 **Collection**: [Note which collection this PR will affect]
 
-# Changelog 
+# Changelog
+
 - Add specific line by line info of high level changes in this PR.
 
 # Usage
-* You can potentially add a usage example below
+
+- You can potentially add a usage example below
 
 ```python
 # Add a code snippet demonstrating how to use this 
@@ -23,25 +30,28 @@ To re-run CI remove and add the label again.
 To run CI on an untrusted fork, a NeMo user with write access must first click "Approve and run".
 
 # Before your PR is "Ready for review"
+
 **Pre checks**:
+
 - [ ] Make sure you read and followed [Contributor guidelines](https://github.com/NVIDIA/NeMo/blob/main/CONTRIBUTING.md)
 - [ ] Did you write any new necessary tests?
 - [ ] Did you add or update any necessary documentation?
 - [ ] Does the PR affect components that are optional to install? (Ex: Numba, Pynini, Apex etc)
   - [ ] Reviewer: Does the PR have correct import guards for all optional libraries?
 
 **PR Type**:
+
 - [ ] New Feature
 - [ ] Bugfix
 - [ ] Documentation
 
 If you haven't finished some of the above items you can still open "Draft" PR.
 
-
 ## Who can review?
 
-Anyone in the community is free to review the PR once the checks have passed. 
+Anyone in the community is free to review the PR once the checks have passed.
 [Contributor guidelines](https://github.com/NVIDIA/NeMo/blob/main/CONTRIBUTING.md) contains specific people who can review PRs to various areas.
 
 # Additional Information
-* Related to # (issue)
+
+- Related to # (issue)
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
@@ -5062,6 +5062,36 @@ jobs:
         rm -rf /tmp/nemo2_ckpt
         rm -rf /tmp/nemo2_ptq_engine
 
+  L2_NeMo_2_Distill_Llama3_TP1PP2:
+    needs: [pre-flight, cicd-test-container-build]
+    uses: ./.github/workflows/_test_template.yml
+    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Distill_Llama3_TP1PP2') || needs.pre-flight.outputs.all == 'true'
+    with:
+      RUNNER: self-hosted-azure
+      SCRIPT: |
+        python tests/collections/llm/gpt_distillation.py \
+          --name nemo2_llama_distill \
+          --teacher_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
+          --student_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
+          --tp_size 1 \
+          --cp_size 1 \
+          --pp_size 2 \
+          --devices 2 \
+          --log_dir /tmp/distill_logs \
+          --max_steps 5 \
+          --gbs 4 \
+          --mbs 1 \
+          --data_paths 1.0 /home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
+          --index_mapping_dir examples/nlp/language_modeling/gpt_index_mappings \
+          --seq_length 2048 \
+          --warmup_steps 1 \
+          --val_check_interval 5 \
+          --log_interval 5 \
+          --limit_val_batches 2 
+
+      AFTER_SCRIPT: |
+        rm -rf /tmp/distill_logs
+
   L2_NeMo_2_Export_In_Framework:
     needs: [pre-flight, cicd-test-container-build]
     uses: ./.github/workflows/_test_template.yml
@@ -5321,6 +5351,7 @@ jobs:
       - L2_Megatron_GPT_Reranker
       - L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
       - L2_NeMo_2_PTQ_Llama2_FP8
+      - L2_NeMo_2_Distill_Llama3_TP1PP2
       - L2_NeMo_2_Export_In_Framework
       - L2_NeMo_2_jit_callback
       - L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING

diff --git a/.github/workflows/code-linting.yml b/.github/workflows/code-linting.yml
@@ -2,15 +2,15 @@ name: PyLint and flake8 linting
 
 on:
   pull_request:
-    types: [ opened, synchronize, reopened, labeled, unlabeled ]
-  
+    types: [opened, synchronize, reopened, labeled, unlabeled]
+
 jobs:
   linting:
     name: 'Domain: ${{ matrix.domain }}'
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      matrix: 
+      matrix:
         domain: [speech, other]
     env:
       DOMAIN: ${{ matrix.domain }}
@@ -32,25 +32,25 @@ jobs:
 
           else
             FILTER=$(jq -crn '[
-              "nemo/**",
+              "nemo/**/*.py",
               "!nemo/collections/asr/**/*.py",
               "!nemo/collections/tts/**/*.py",
               "!nemo/collections/audio/**/*.py",
               "!nemo/collections/multimodal/speech_llm/**/*.py",
               "!nemo/collections/speechlm/**/*.py"
             ] | join(",")')
           fi
-          
+
           echo "main=$FILTER" | tee -a "$GITHUB_OUTPUT"
 
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files@v44
         with:
           files: ${{ steps.filter.outputs.main }}
-          files_separator: ","
-          separator: " "
-    
+          files_separator: ','
+          separator: ' '
+
       - name: Run PyLint
         id: pylint
         env:
@@ -95,7 +95,7 @@ jobs:
           PYLINT: ${{ steps.pylint.outputs.exit-code == 0 }}
           FLAKE8: ${{ steps.flake8.outputs.exit-code == 0 }}
         run: |
-          
+
           if [[ "$PYLINT" != "true" ]]; then
             echo "Pylint output:" | tee -a $GITHUB_STEP_SUMMARY
 
@@ -139,4 +139,4 @@ jobs:
           else
             echo "Some linting domains failed."
             exit 1
-          fi
+          fi
diff --git a/docs/source/asr/speaker_diarization/api.rst b/docs/source/asr/speaker_diarization/api.rst
@@ -4,6 +4,7 @@ NeMo Speaker Diarization API
 
 Model Classes
 -------------
+
 .. autoclass:: nemo.collections.asr.models.ClusteringDiarizer
     :show-inheritance:
     :members:  
@@ -12,9 +13,18 @@ Model Classes
     :show-inheritance:
     :members: add_speaker_model_config, _init_segmentation_info, _init_speaker_model, setup_training_data, setup_validation_data, setup_test_data, get_ms_emb_seq, get_cluster_avg_embs_model, get_ms_mel_feat, forward, forward_infer, training_step, validation_step, compute_accuracies
 
+.. autoclass:: nemo.collections.asr.models.SortformerEncLabelModel
+    :show-inheritance:
+    :members: list_available_models, setup_training_data, setup_validation_data, setup_test_data, process_signal, forward, forward_infer, frontend_encoder, diarize, training_step, validation_step, multi_validation_epoch_end, _get_aux_train_evaluations, _get_aux_validation_evaluations, _init_loss_weights, _init_eval_metrics, _reset_train_metrics, _reset_valid_metrics, _setup_diarize_dataloader, _diarize_forward, _diarize_output_processing, test_batch, _get_aux_test_batch_evaluations, on_validation_epoch_end
+
 Mixins
 ------
-.. autoclass:: nemo.collections.asr.parts.mixins.mixins.DiarizationMixin
+
+.. autoclass:: nemo.collections.asr.parts.mixins.DiarizationMixin
     :show-inheritance:
     :members:
 
+.. autoclass:: nemo.collections.asr.parts.mixins.diarization.SpkDiarizationMixin
+    :show-inheritance:
+    :members: diarize, diarize_generator, _diarize_on_begin, _diarize_input_processing, _diarize_input_manifest_processing, _setup_diarize_dataloader, _diarize_forward, _diarize_output_processing, _diarize_on_end, _input_audio_to_rttm_processing, get_value_from_diarization_config
+