[Bugfix] fix output parsing error for trtllm backend (vllm-project#4137)

Co-authored-by: Roger Wang <[email protected]>
team-telnyx · Apr 17, 2024 · fe3b5bb · fe3b5bb
1 parent 8438e05
commit fe3b5bb
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
@@ -135,6 +135,7 @@ async def async_request_trt_llm(
                                               "data:")
 
                         data = json.loads(chunk)
+                        output.generated_text += data["text_output"]
                         timestamp = time.perf_counter()
                         # First token
                         if ttft == 0.0:
@@ -149,7 +150,6 @@ async def async_request_trt_llm(
                         most_recent_timestamp = timestamp
 
                     output.latency = most_recent_timestamp - st
-                    output.generated_text = json.loads(data)["text_output"]
                     output.success = True
 
                 else: