align vllm-ray response format to tgi response format (opea-project#452)

* align vllm-ray response format to tgi response format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: changwangss <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
BaoHuiling · Aug 13, 2024 · 24f48e0 · 24f48e0
1 parent 94ba02b
commit 24f48e0
Showing 1 changed file with 4 additions and 22 deletions.
diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py
@@ -21,18 +21,6 @@
 from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
 
 
-@traceable(run_type="tool")
-def post_process_text(text: str):
-    if text == " ":
-        return "data: @#$\n\n"
-    if text == "\n":
-        return "data: <br/>\n\n"
-    if text.isspace():
-        return None
-    new_text = text.replace(" ", "@#$")
-    return f"data: {new_text}\n\n"
-
-
 @register_microservice(
     name="opea_service@llm_vllm_ray",
     service_type=ServiceType.LLM,
@@ -56,19 +44,13 @@ def llm_generate(input: LLMParamsDoc):
 
     if input.streaming:
 
-        async def stream_generator():
+        def stream_generator():
             chat_response = ""
-            async for text in llm.astream(input.query):
+            for text in llm.stream(input.query):
                 text = text.content
                 chat_response += text
-                processed_text = post_process_text(text)
-                if text and processed_text:
-                    if "</s>" in text:
-                        res = text.split("</s>")[0]
-                        if res != "":
-                            yield res
-                        break
-                    yield processed_text
+                chunk_repr = repr(text.encode("utf-8"))
+                yield f"data: {chunk_repr}\n\n"
             print(f"[llm - chat_stream] stream response: {chat_response}")
             yield "data: [DONE]\n\n"