[MoE] Add type annotation for mixtral configs (#12126)

* add type annotation Signed-off-by: gdeng <[email protected]> * Update mixtral.py Signed-off-by: Alexandros Koumparoulis <[email protected]> --------- Signed-off-by: gdeng <[email protected]> Signed-off-by: Alexandros Koumparoulis <[email protected]> Co-authored-by: Alexandros Koumparoulis <[email protected]>
NVIDIA · Feb 10, 2025 · 2d27cc1 · 2d27cc1
1 parent 3242c9e
commit 2d27cc1
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/nemo/collections/llm/gpt/model/mixtral.py b/nemo/collections/llm/gpt/model/mixtral.py
@@ -72,10 +72,10 @@ class MixtralConfig(GPTConfig):
     params_dtype: torch.dtype = torch.bfloat16
 
     # fusions
-    apply_rope_fusion = True
-    bias_activation_fusion = True
-    bias_dropout_fusion = True
-    masked_softmax_fusion = False
+    apply_rope_fusion: bool = True
+    bias_activation_fusion: bool = True
+    bias_dropout_fusion: bool = True
+    masked_softmax_fusion: bool = False
 
 
 @dataclass
@@ -361,8 +361,8 @@ def config(self) -> "MixtralConfig":
             # transformer config
             num_attention_heads=source.num_attention_heads,
             num_key_value_heads=source.num_query_groups,
-            num_local_experts=config.num_moe_experts,
-            num_experts_per_tok=config.moe_router_topk,
+            num_local_experts=source.num_moe_experts,
+            num_experts_per_tok=source.moe_router_topk,
             # norm
             rms_norm_eps=source.layernorm_epsilon,
             # init