Skip to content

Commit

Permalink
loss
Browse files Browse the repository at this point in the history
Signed-off-by: Mayank Mishra <[email protected]>
  • Loading branch information
mayank31398 committed Nov 4, 2024
1 parent 4de6193 commit 9bb02f3
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions dolomite_engine/model_wrapper/pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(
assert not self.reset_attention_mask, "reset_attention_mask is not supported with pipeline parallelism"
assert not self.reset_position_ids, "reset_position_ids is not supported with pipeline parallelism"

def forward(self, batch: dict, loss_multiplier: float = 1) -> dict:
def forward(self, batch: dict) -> dict:
"""forward function for a batch
Args:
Expand All @@ -117,11 +117,11 @@ def forward(self, batch: dict, loss_multiplier: float = 1) -> dict:

# without pipeline parallel, we compute the loss outside
if not self.is_pipeline_parallel_enabled:
model_outputs = self.get_loss(model_outputs, labels, loss_multiplier=loss_multiplier)
model_outputs = self.get_loss(model_outputs, labels)

return model_outputs

def get_loss(self, model_outputs, labels: torch.Tensor, loss_multiplier: float) -> torch.Tensor:
def get_loss(self, model_outputs, labels: torch.Tensor) -> torch.Tensor:
if isinstance(model_outputs, torch.Tensor):
logits = model_outputs
else:
Expand Down

0 comments on commit 9bb02f3

Please sign in to comment.