[Core] Avoid metrics log noise when idle

Prior to this change, there was always a log message every 10 seconds, even when the system is idle. The noise when the metrics are all zeros doesn't seem very interesting, so this change makes it so the log message is only emitted while active. It will log a message whenever metrics are non-zero, or when returning to idle from non-idle. Repeated messages during idle now go to DEBUG instead of INFO. Signed-off-by: Russell Bryant <[email protected]>
vllm-project · Oct 25, 2024 · 1a789ae · 1a789ae
1 parent 9645b9f
commit 1a789ae
Showing 1 changed file with 23 additions and 7 deletions.
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
@@ -335,6 +335,11 @@ def get_throughput(tracked_stats: List[int], now: float,
 class LoggingStatLogger(StatLoggerBase):
     """LoggingStatLogger is used in LLMEngine to log to Stdout."""
 
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.last_prompt_throughput: Optional[float] = None
+        self.last_generation_throughput: Optional[float] = None
+
     def log(self, stats: Stats) -> None:
         """Called by LLMEngine.
            Logs to Stdout every self.local_interval seconds."""
@@ -359,8 +364,14 @@ def log(self, stats: Stats) -> None:
                 now=stats.now,
                 last_log=self.last_local_log)
 
-            # Log to stdout.
-            logger.info(
+            log_fn = logger.info
+            if not any((prompt_throughput, generation_throughput,
+                        self.last_prompt_throughput,
+                        self.last_generation_throughput)):
+                # Avoid log noise on an idle production system
+                log_fn = logger.debug
+
+            log_fn(
                 "Avg prompt throughput: %.1f tokens/s, "
                 "Avg generation throughput: %.1f tokens/s, "
                 "Running: %d reqs, Swapped: %d reqs, "
@@ -386,11 +397,16 @@ def log(self, stats: Stats) -> None:
                     self._format_spec_decode_metrics_str(
                         self.spec_decode_metrics))
 
-            # Reset tracked stats for next interval.
-            self.num_prompt_tokens = []
-            self.num_generation_tokens = []
-            self.last_local_log = stats.now
-            self.spec_decode_metrics = None
+            self._reset(stats, prompt_throughput, generation_throughput)
+
+    def _reset(self, stats, prompt_throughput, generation_throughput) -> None:
+        # Reset tracked stats for next interval.
+        self.num_prompt_tokens = []
+        self.num_generation_tokens = []
+        self.last_local_log = stats.now
+        self.spec_decode_metrics = None
+        self.last_prompt_throughput = prompt_throughput
+        self.last_generation_throughput = generation_throughput
 
     def _format_spec_decode_metrics_str(
             self, metrics: "SpecDecodeWorkerMetrics") -> str: