Merge branch 'opea-project:main' into main

siddhivelankar23 · Aug 9, 2024 · 1e63e5a · 1e63e5a
2 parents de3f844 + 8f0f2b0
commit 1e63e5a
Show file tree

Hide file tree

Showing 29 changed files with 340 additions and 54 deletions.
diff --git a/.github/workflows/docker/compose/asr-compose.yaml b/.github/workflows/docker/compose/asr-compose.yaml
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
+# images used by GenAIExamples: asr,whisper,whisper-gaudi
 services:
   asr:
     build:

diff --git a/.github/workflows/docker/compose/dataprep-compose.yaml b/.github/workflows/docker/compose/dataprep-compose.yaml
@@ -1,11 +1,18 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+# this file should be run in the root of the repo
+# images used by GenAIExamples: dataprep,dataprep-qdrant
+# others: dataprep-redis-llama-index,dataprep-on-ray-redis
 services:
   dataprep-redis:
     build:
       dockerfile: comps/dataprep/redis/langchain/docker/Dockerfile
     image: ${REGISTRY}opea/dataprep-redis:${TAG:-latest}
+  dataprep-qdrant:
+    build:
+      dockerfile: comps/dataprep/qdrant/docker/Dockerfile
+    image: ${REGISTRY}opea/dataprep-qdrant:${TAG:-latest}
   dataprep-redis-llama-index:
     build:
       dockerfile: comps/dataprep/redis/llama_index/docker/Dockerfile

diff --git a/.github/workflows/docker/compose/embeddings-compose.yaml b/.github/workflows/docker/compose/embeddings-compose.yaml
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
+# images used by GenAIExamples: embedding-tei
 services:
   embedding-tei:
     build:

diff --git a/.github/workflows/docker/compose/guardrails-compose.yaml b/.github/workflows/docker/compose/guardrails-compose.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+# images used by GenAIExamples: guardrails-tgi
+# others: guardrails-pii-detection
+services:
+  guardrails-tgi:
+    build:
+      dockerfile: comps/guardrails/llama_guard/docker/Dockerfile
+    image: ${REGISTRY}opea/guardrails-tgi:${TAG:-latest}
+  guardrails-pii-detection:
+    build:
+      dockerfile: comps/guardrails/pii_detection/docker/Dockerfile
+    image: ${REGISTRY}opea/guardrails-pii-detection:${TAG:-latest}
diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -2,11 +2,16 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
+# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-ray
 services:
   llm-tgi:
     build:
       dockerfile: comps/llms/text-generation/tgi/Dockerfile
     image: ${REGISTRY}opea/llm-tgi:${TAG:-latest}
+  llm-ollama:
+    build:
+      dockerfile: comps/llms/text-generation/ollama/Dockerfile
+    image: ${REGISTRY}opea/llm-ollama:${TAG:-latest}
   llm-docsum-tgi:
     build:
       dockerfile: comps/llms/summarization/tgi/Dockerfile
@@ -15,3 +20,11 @@ services:
     build:
       dockerfile: comps/llms/faq-generation/tgi/Dockerfile
     image: ${REGISTRY}opea/llm-faqgen-tgi:${TAG:-latest}
+  llm-vllm:
+    build:
+      dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.microservice
+    image: ${REGISTRY}opea/llm-vllm:${TAG:-latest}
+  llm-vllm-ray:
+    build:
+      dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice
+    image: ${REGISTRY}opea/llm-vllm-ray:${TAG:-latest}
diff --git a/.github/workflows/docker/compose/reranks-compose.yaml b/.github/workflows/docker/compose/reranks-compose.yaml
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
+# images used by GenAIExamples: reranking-tei
 services:
   reranking-tei:
     build:

diff --git a/.github/workflows/docker/compose/retrievers-compose.yaml b/.github/workflows/docker/compose/retrievers-compose.yaml
@@ -1,8 +1,13 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+# this file should be run in the root of the repo
 services:
   retriever-redis:
     build:
       dockerfile: comps/retrievers/langchain/redis/docker/Dockerfile
     image: ${REGISTRY}opea/retriever-redis:${TAG:-latest}
+  retriever-qdrant:
+    build:
+      dockerfile: comps/retrievers/haystack/qdrant/docker/Dockerfile
+    image: ${REGISTRY}opea/retriever-qdrant:${TAG:-latest}
diff --git a/.github/workflows/docker/compose/tts-compose.yaml b/.github/workflows/docker/compose/tts-compose.yaml
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
+# images used by GenAIExamples: reranking-tei
 services:
   asr:
     build:

diff --git a/.github/workflows/docker/compose/web_retrievers-compose.yaml b/.github/workflows/docker/compose/web_retrievers-compose.yaml
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # this file should be run in the root of the repo
+# images used by GenAIExamples: web-retriever-chroma
 services:
   web-retriever-chroma:
     build:

diff --git a/.github/workflows/image-build-on-manual.yml b/.github/workflows/image-build-on-manual.yml
@@ -7,8 +7,8 @@ on:
   workflow_dispatch:
     inputs:
       services:
-        default: "asr"
-        description: "List of services to build including [asr,dataprep,embeddings,llms,reranks,retrievers,tts,web_retrievers]"
+        default: "asr,dataprep"
+        description: "List of services to build including [asr,dataprep,embeddings,guardrails,llms,reranks,retrievers,tts,web_retrievers]"
         required: true
         type: string
       tag:

diff --git a/.github/workflows/image-build-on-schedule.yml b/.github/workflows/image-build-on-schedule.yml
@@ -39,7 +39,8 @@ jobs:
     if: ${{ needs.check-build.outputs.run_build == 'true' }}
     strategy:
       matrix:
-        service: ["asr", "dataprep", "embeddings", "llms", "reranks", "retrievers", "tts", "web_retrievers"]
+        service:
+          ["asr", "dataprep", "embeddings", "guardrails", "llms", "reranks", "retrievers", "tts", "web_retrievers"]
         node: ["docker-build-xeon", "docker-build-gaudi"]
     runs-on: ${{ matrix.node }}
     continue-on-error: true

diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml
@@ -43,6 +43,7 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
           GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
+          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
           service_path: ${{ matrix.service }}
           hardware: ${{ matrix.hardware }}
         run: |

diff --git a/comps/__init__.py b/comps/__init__.py
@@ -37,6 +37,7 @@
     SearchQnAGateway,
     AudioQnAGateway,
     FaqGenGateway,
+    VisualQnAGateway,
 )
 
 # Telemetry

diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py
@@ -1,6 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import base64
+
+import requests
 from fastapi import Request
 from fastapi.responses import StreamingResponse
 
@@ -75,6 +78,8 @@ def _handle_message(self, messages):
             prompt = messages
         else:
             messages_dict = {}
+            system_prompt = ""
+            prompt = ""
             for message in messages:
                 msg_role = message["role"]
                 if msg_role == "system":
@@ -84,20 +89,41 @@ def _handle_message(self, messages):
                         text = ""
                         text_list = [item["text"] for item in message["content"] if item["type"] == "text"]
                         text += "\n".join(text_list)
-                        messages_dict[msg_role] = text
+                        image_list = [
+                            item["image_url"]["url"] for item in message["content"] if item["type"] == "image_url"
+                        ]
+                        if image_list:
+                            messages_dict[msg_role] = (text, image_list)
+                        else:
+                            messages_dict[msg_role] = text
                     else:
                         messages_dict[msg_role] = message["content"]
                 elif msg_role == "assistant":
                     messages_dict[msg_role] = message["content"]
                 else:
                     raise ValueError(f"Unknown role: {msg_role}")
-            prompt = system_prompt + "\n"
+            if system_prompt:
+                prompt = system_prompt + "\n"
+            images = []
             for role, message in messages_dict.items():
-                if message:
-                    prompt += role + ": " + message + "\n"
+                if isinstance(message, tuple):
+                    text, image_list = message
+                    if text:
+                        prompt += role + ": " + text + "\n"
+                    else:
+                        prompt += role + ":"
+                    for img in image_list:
+                        response = requests.get(img)
+                        images.append(base64.b64encode(response.content).decode("utf-8"))
                 else:
-                    prompt += role + ":"
-        return prompt
+                    if message:
+                        prompt += role + ": " + message + "\n"
+                    else:
+                        prompt += role + ":"
+        if images:
+            return prompt, images
+        else:
+            return prompt
 
 
 class ChatQnAGateway(Gateway):
@@ -439,3 +465,47 @@ async def handle_request(self, request: Request):
             )
         )
         return ChatCompletionResponse(model="faqgen", choices=choices, usage=usage)
+
+
+class VisualQnAGateway(Gateway):
+    def __init__(self, megaservice, host="0.0.0.0", port=8888):
+        super().__init__(
+            megaservice, host, port, str(MegaServiceEndpoint.VISUAL_QNA), ChatCompletionRequest, ChatCompletionResponse
+        )
+
+    async def handle_request(self, request: Request):
+        data = await request.json()
+        stream_opt = data.get("stream", False)
+        chat_request = ChatCompletionRequest.parse_obj(data)
+        prompt, images = self._handle_message(chat_request.messages)
+        parameters = LLMParams(
+            max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
+            top_k=chat_request.top_k if chat_request.top_k else 10,
+            top_p=chat_request.top_p if chat_request.top_p else 0.95,
+            temperature=chat_request.temperature if chat_request.temperature else 0.01,
+            repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03,
+            streaming=stream_opt,
+        )
+        result_dict, runtime_graph = await self.megaservice.schedule(
+            initial_inputs={"prompt": prompt, "image": images[0]}, llm_parameters=parameters
+        )
+        for node, response in result_dict.items():
+            # Here it suppose the last microservice in the megaservice is LVM.
+            if (
+                isinstance(response, StreamingResponse)
+                and node == list(self.megaservice.services.keys())[-1]
+                and self.megaservice.services[node].service_type == ServiceType.LVM
+            ):
+                return response
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]["text"]
+        choices = []
+        usage = UsageInfo()
+        choices.append(
+            ChatCompletionResponseChoice(
+                index=0,
+                message=ChatMessage(role="assistant", content=response),
+                finish_reason="stop",
+            )
+        )
+        return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage)
diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py
@@ -156,23 +156,24 @@ def register_microservice(
     provider_endpoint: Optional[str] = None,
 ):
     def decorator(func):
-        micro_service = MicroService(
-            name=name,
-            service_role=service_role,
-            service_type=service_type,
-            protocol=protocol,
-            host=host,
-            port=port,
-            ssl_keyfile=ssl_keyfile,
-            ssl_certfile=ssl_certfile,
-            endpoint=endpoint,
-            input_datatype=input_datatype,
-            output_datatype=output_datatype,
-            provider=provider,
-            provider_endpoint=provider_endpoint,
-        )
-        micro_service.app.router.add_api_route(endpoint, func, methods=["POST"])
-        opea_microservices[name] = micro_service
+        if name not in opea_microservices:
+            micro_service = MicroService(
+                name=name,
+                service_role=service_role,
+                service_type=service_type,
+                protocol=protocol,
+                host=host,
+                port=port,
+                ssl_keyfile=ssl_keyfile,
+                ssl_certfile=ssl_certfile,
+                endpoint=endpoint,
+                input_datatype=input_datatype,
+                output_datatype=output_datatype,
+                provider=provider,
+                provider_endpoint=provider_endpoint,
+            )
+            opea_microservices[name] = micro_service
+        opea_microservices[name].app.router.add_api_route(endpoint, func, methods=["POST"])
         return func
 
     return decorator
diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
@@ -47,7 +47,7 @@ async def schedule(self, initial_inputs: Dict, llm_parameters: LLMParams = LLMPa
         timeout = aiohttp.ClientTimeout(total=1000)
         async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session:
             pending = {
-                asyncio.create_task(self.execute(session, node, initial_inputs, runtime_graph))
+                asyncio.create_task(self.execute(session, node, initial_inputs, runtime_graph, llm_parameters))
                 for node in self.ind_nodes()
             }
             ind_nodes = self.ind_nodes()
@@ -117,7 +117,10 @@ async def execute(
             if inputs.get(field) != value:
                 inputs[field] = value
 
-        if self.services[cur_node].service_type == ServiceType.LLM and llm_parameters.streaming:
+        if (
+            self.services[cur_node].service_type == ServiceType.LLM
+            or self.services[cur_node].service_type == ServiceType.LVM
+        ) and llm_parameters.streaming:
             # Still leave to sync requests.post for StreamingResponse
             response = requests.post(
                 url=endpoint, data=json.dumps(inputs), proxies={"http": None}, stream=True, timeout=1000

diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
@@ -143,3 +143,9 @@ class LVMDoc(BaseDoc):
     image: str
     prompt: str
     max_new_tokens: conint(ge=0, le=1024) = 512
+    top_k: int = 10
+    top_p: float = 0.95
+    typical_p: float = 0.95
+    temperature: float = 0.01
+    repetition_penalty: float = 1.03
+    streaming: bool = False
diff --git a/comps/dataprep/milvus/config.py b/comps/dataprep/milvus/config.py
@@ -12,7 +12,7 @@
 MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530))
 COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus")
 
-MOSEC_EMBEDDING_MODEL = "/root/bce-embedding-base_v1"
+MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/root/bce-embedding-base_v1")
 MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "")
 os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
 os.environ["OPENAI_API_KEY"] = "Dummy key"
diff --git a/comps/dataprep/redis/README.md b/comps/dataprep/redis/README.md
@@ -136,13 +136,13 @@ docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https
 - option 1: Start single-process version (for 1-10 files processing)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest
+docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest
 ```
 
 - option 2: Start multi-process version (for >10 files processing)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
+docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
 ```
 
 ## 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
@@ -250,7 +250,7 @@ To get uploaded file structures, use the following command:
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    http://localhost:6008/v1/dataprep/get_file
+    http://localhost:6007/v1/dataprep/get_file
 ```
 
 Then you will get the response JSON like this:
@@ -283,17 +283,17 @@ The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API.
 curl -X POST \
     -H "Content-Type: application/json" \
     -d '{"file_path": "https://www.ces.tech/.txt"}' \
-    http://10.165.57.68:6009/v1/dataprep/delete_file
+    http://localhost:6007/v1/dataprep/delete_file
 
 # delete file
 curl -X POST \
     -H "Content-Type: application/json" \
     -d '{"file_path": "uploaded_file_1.txt"}' \
-    http://10.165.57.68:6009/v1/dataprep/delete_file
+    http://localhost:6007/v1/dataprep/delete_file
 
 # delete all files and links
 curl -X POST \
     -H "Content-Type: application/json" \
     -d '{"file_path": "all"}' \
-    http://10.165.57.68:6009/v1/dataprep/delete_file
+    http://localhost:6007/v1/dataprep/delete_file
 ```