Add tok/sec calculation to pretraining script

Lightning-AI · Oct 24, 2024 · 9990730 · 9990730
1 parent 32673d0
commit 9990730
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 3 deletions.
diff --git a/litgpt/pretrain.py b/litgpt/pretrain.py
@@ -237,9 +237,22 @@ def main(
     # Save final checkpoint
     save_checkpoint(fabric, state, tokenizer_dir, out_dir / "final" / "lit_model.pth")
 
-    fabric.print(f"Training time: {(time.perf_counter()-train_time):.2f}s")
+    total_tokens = state["iter_num"] * train.micro_batch_size * model.max_seq_length * fabric.world_size
+
+    # Print formatted output
+    separator = "-" * 40
+    fabric.print(separator)
+    fabric.print("| Performance")
+    fabric.print(f"| - Total tokens  : {total_tokens:,}")
+    fabric.print(f"| - Training Time : {(time.perf_counter()-train_time):.2f} s")
+    fabric.print(f"| - Tok/sec       : {total_tokens / train_time:.2f} tok/s")
+    fabric.print("| " + "-" * 40)
+
     if fabric.device.type == "cuda":
-        fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB")
+        memory_used = torch.cuda.max_memory_allocated() / 1e9
+        fabric.print("| Memory Usage")
+        fabric.print(f"| - Memory Used   : {memory_used:.2f} GB")
+    fabric.print(separator)
 
 
 def fit(

diff --git a/litgpt/utils.py b/litgpt/utils.py
@@ -782,7 +782,7 @@ def create_finetuning_performance_report(training_time, token_counts, device_typ
         memory_used = torch.cuda.max_memory_allocated() / 1e9
         output += f"| Memory Usage                                                                 \n"
         output += f"| - Memory Used               :  {memory_used:.02f} GB                                        \n"
-    output += "=======================================================\n"
+    output += "-------------------------------------------------------\n"
 
     return output