loss

Signed-off-by: Mayank Mishra <[email protected]>
IBM · Nov 4, 2024 · 9bb02f3 · 9bb02f3
1 parent 4de6193
commit 9bb02f3
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/dolomite_engine/model_wrapper/pretraining.py b/dolomite_engine/model_wrapper/pretraining.py
@@ -92,7 +92,7 @@ def __init__(
             assert not self.reset_attention_mask, "reset_attention_mask is not supported with pipeline parallelism"
             assert not self.reset_position_ids, "reset_position_ids is not supported with pipeline parallelism"
 
-    def forward(self, batch: dict, loss_multiplier: float = 1) -> dict:
+    def forward(self, batch: dict) -> dict:
         """forward function for a batch
 
         Args:
@@ -117,11 +117,11 @@ def forward(self, batch: dict, loss_multiplier: float = 1) -> dict:
 
         # without pipeline parallel, we compute the loss outside
         if not self.is_pipeline_parallel_enabled:
-            model_outputs = self.get_loss(model_outputs, labels, loss_multiplier=loss_multiplier)
+            model_outputs = self.get_loss(model_outputs, labels)
 
         return model_outputs
 
-    def get_loss(self, model_outputs, labels: torch.Tensor, loss_multiplier: float) -> torch.Tensor:
+    def get_loss(self, model_outputs, labels: torch.Tensor) -> torch.Tensor:
         if isinstance(model_outputs, torch.Tensor):
             logits = model_outputs
         else: