diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 58dcc6167efa6..52386b8cd62b3 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -68,9 +68,13 @@ async def async_request_tgi( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") - chunk = remove_prefix(chunk_bytes.decode("utf-8"), - "data:") + #NOTE: Sometimes TGI returns a ping response without + # any data, we should skip it. + if chunk_bytes.startswith(":"): + continue + chunk = remove_prefix(chunk_bytes, "data:") data = json.loads(chunk) timestamp = time.perf_counter()