Skip to content

Commit

Permalink
Merge branch 'opea-project:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
siddhivelankar23 authored Aug 9, 2024
2 parents de3f844 + 8f0f2b0 commit 1e63e5a
Show file tree
Hide file tree
Showing 29 changed files with 340 additions and 54 deletions.
1 change: 1 addition & 0 deletions .github/workflows/docker/compose/asr-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: asr,whisper,whisper-gaudi
services:
asr:
build:
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/docker/compose/dataprep-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: dataprep,dataprep-qdrant
# others: dataprep-redis-llama-index,dataprep-on-ray-redis
services:
dataprep-redis:
build:
dockerfile: comps/dataprep/redis/langchain/docker/Dockerfile
image: ${REGISTRY}opea/dataprep-redis:${TAG:-latest}
dataprep-qdrant:
build:
dockerfile: comps/dataprep/qdrant/docker/Dockerfile
image: ${REGISTRY}opea/dataprep-qdrant:${TAG:-latest}
dataprep-redis-llama-index:
build:
dockerfile: comps/dataprep/redis/llama_index/docker/Dockerfile
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/docker/compose/embeddings-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: embedding-tei
services:
embedding-tei:
build:
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/docker/compose/guardrails-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: guardrails-tgi
# others: guardrails-pii-detection
services:
guardrails-tgi:
build:
dockerfile: comps/guardrails/llama_guard/docker/Dockerfile
image: ${REGISTRY}opea/guardrails-tgi:${TAG:-latest}
guardrails-pii-detection:
build:
dockerfile: comps/guardrails/pii_detection/docker/Dockerfile
image: ${REGISTRY}opea/guardrails-pii-detection:${TAG:-latest}
13 changes: 13 additions & 0 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-ray
services:
llm-tgi:
build:
dockerfile: comps/llms/text-generation/tgi/Dockerfile
image: ${REGISTRY}opea/llm-tgi:${TAG:-latest}
llm-ollama:
build:
dockerfile: comps/llms/text-generation/ollama/Dockerfile
image: ${REGISTRY}opea/llm-ollama:${TAG:-latest}
llm-docsum-tgi:
build:
dockerfile: comps/llms/summarization/tgi/Dockerfile
Expand All @@ -15,3 +20,11 @@ services:
build:
dockerfile: comps/llms/faq-generation/tgi/Dockerfile
image: ${REGISTRY}opea/llm-faqgen-tgi:${TAG:-latest}
llm-vllm:
build:
dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.microservice
image: ${REGISTRY}opea/llm-vllm:${TAG:-latest}
llm-vllm-ray:
build:
dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice
image: ${REGISTRY}opea/llm-vllm-ray:${TAG:-latest}
1 change: 1 addition & 0 deletions .github/workflows/docker/compose/reranks-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: reranking-tei
services:
reranking-tei:
build:
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/docker/compose/retrievers-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
services:
retriever-redis:
build:
dockerfile: comps/retrievers/langchain/redis/docker/Dockerfile
image: ${REGISTRY}opea/retriever-redis:${TAG:-latest}
retriever-qdrant:
build:
dockerfile: comps/retrievers/haystack/qdrant/docker/Dockerfile
image: ${REGISTRY}opea/retriever-qdrant:${TAG:-latest}
1 change: 1 addition & 0 deletions .github/workflows/docker/compose/tts-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: reranking-tei
services:
asr:
build:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: web-retriever-chroma
services:
web-retriever-chroma:
build:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/image-build-on-manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ on:
workflow_dispatch:
inputs:
services:
default: "asr"
description: "List of services to build including [asr,dataprep,embeddings,llms,reranks,retrievers,tts,web_retrievers]"
default: "asr,dataprep"
description: "List of services to build including [asr,dataprep,embeddings,guardrails,llms,reranks,retrievers,tts,web_retrievers]"
required: true
type: string
tag:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/image-build-on-schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ jobs:
if: ${{ needs.check-build.outputs.run_build == 'true' }}
strategy:
matrix:
service: ["asr", "dataprep", "embeddings", "llms", "reranks", "retrievers", "tts", "web_retrievers"]
service:
["asr", "dataprep", "embeddings", "guardrails", "llms", "reranks", "retrievers", "tts", "web_retrievers"]
node: ["docker-build-xeon", "docker-build-gaudi"]
runs-on: ${{ matrix.node }}
continue-on-error: true
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/microservice-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
service_path: ${{ matrix.service }}
hardware: ${{ matrix.hardware }}
run: |
Expand Down
1 change: 1 addition & 0 deletions comps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
SearchQnAGateway,
AudioQnAGateway,
FaqGenGateway,
VisualQnAGateway,
)

# Telemetry
Expand Down
82 changes: 76 additions & 6 deletions comps/cores/mega/gateway.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import base64

import requests
from fastapi import Request
from fastapi.responses import StreamingResponse

Expand Down Expand Up @@ -75,6 +78,8 @@ def _handle_message(self, messages):
prompt = messages
else:
messages_dict = {}
system_prompt = ""
prompt = ""
for message in messages:
msg_role = message["role"]
if msg_role == "system":
Expand All @@ -84,20 +89,41 @@ def _handle_message(self, messages):
text = ""
text_list = [item["text"] for item in message["content"] if item["type"] == "text"]
text += "\n".join(text_list)
messages_dict[msg_role] = text
image_list = [
item["image_url"]["url"] for item in message["content"] if item["type"] == "image_url"
]
if image_list:
messages_dict[msg_role] = (text, image_list)
else:
messages_dict[msg_role] = text
else:
messages_dict[msg_role] = message["content"]
elif msg_role == "assistant":
messages_dict[msg_role] = message["content"]
else:
raise ValueError(f"Unknown role: {msg_role}")
prompt = system_prompt + "\n"
if system_prompt:
prompt = system_prompt + "\n"
images = []
for role, message in messages_dict.items():
if message:
prompt += role + ": " + message + "\n"
if isinstance(message, tuple):
text, image_list = message
if text:
prompt += role + ": " + text + "\n"
else:
prompt += role + ":"
for img in image_list:
response = requests.get(img)
images.append(base64.b64encode(response.content).decode("utf-8"))
else:
prompt += role + ":"
return prompt
if message:
prompt += role + ": " + message + "\n"
else:
prompt += role + ":"
if images:
return prompt, images
else:
return prompt


class ChatQnAGateway(Gateway):
Expand Down Expand Up @@ -439,3 +465,47 @@ async def handle_request(self, request: Request):
)
)
return ChatCompletionResponse(model="faqgen", choices=choices, usage=usage)


class VisualQnAGateway(Gateway):
def __init__(self, megaservice, host="0.0.0.0", port=8888):
super().__init__(
megaservice, host, port, str(MegaServiceEndpoint.VISUAL_QNA), ChatCompletionRequest, ChatCompletionResponse
)

async def handle_request(self, request: Request):
data = await request.json()
stream_opt = data.get("stream", False)
chat_request = ChatCompletionRequest.parse_obj(data)
prompt, images = self._handle_message(chat_request.messages)
parameters = LLMParams(
max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03,
streaming=stream_opt,
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"prompt": prompt, "image": images[0]}, llm_parameters=parameters
)
for node, response in result_dict.items():
# Here it suppose the last microservice in the megaservice is LVM.
if (
isinstance(response, StreamingResponse)
and node == list(self.megaservice.services.keys())[-1]
and self.megaservice.services[node].service_type == ServiceType.LVM
):
return response
last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["text"]
choices = []
usage = UsageInfo()
choices.append(
ChatCompletionResponseChoice(
index=0,
message=ChatMessage(role="assistant", content=response),
finish_reason="stop",
)
)
return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage)
35 changes: 18 additions & 17 deletions comps/cores/mega/micro_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,24 @@ def register_microservice(
provider_endpoint: Optional[str] = None,
):
def decorator(func):
micro_service = MicroService(
name=name,
service_role=service_role,
service_type=service_type,
protocol=protocol,
host=host,
port=port,
ssl_keyfile=ssl_keyfile,
ssl_certfile=ssl_certfile,
endpoint=endpoint,
input_datatype=input_datatype,
output_datatype=output_datatype,
provider=provider,
provider_endpoint=provider_endpoint,
)
micro_service.app.router.add_api_route(endpoint, func, methods=["POST"])
opea_microservices[name] = micro_service
if name not in opea_microservices:
micro_service = MicroService(
name=name,
service_role=service_role,
service_type=service_type,
protocol=protocol,
host=host,
port=port,
ssl_keyfile=ssl_keyfile,
ssl_certfile=ssl_certfile,
endpoint=endpoint,
input_datatype=input_datatype,
output_datatype=output_datatype,
provider=provider,
provider_endpoint=provider_endpoint,
)
opea_microservices[name] = micro_service
opea_microservices[name].app.router.add_api_route(endpoint, func, methods=["POST"])
return func

return decorator
7 changes: 5 additions & 2 deletions comps/cores/mega/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ async def schedule(self, initial_inputs: Dict, llm_parameters: LLMParams = LLMPa
timeout = aiohttp.ClientTimeout(total=1000)
async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session:
pending = {
asyncio.create_task(self.execute(session, node, initial_inputs, runtime_graph))
asyncio.create_task(self.execute(session, node, initial_inputs, runtime_graph, llm_parameters))
for node in self.ind_nodes()
}
ind_nodes = self.ind_nodes()
Expand Down Expand Up @@ -117,7 +117,10 @@ async def execute(
if inputs.get(field) != value:
inputs[field] = value

if self.services[cur_node].service_type == ServiceType.LLM and llm_parameters.streaming:
if (
self.services[cur_node].service_type == ServiceType.LLM
or self.services[cur_node].service_type == ServiceType.LVM
) and llm_parameters.streaming:
# Still leave to sync requests.post for StreamingResponse
response = requests.post(
url=endpoint, data=json.dumps(inputs), proxies={"http": None}, stream=True, timeout=1000
Expand Down
6 changes: 6 additions & 0 deletions comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,9 @@ class LVMDoc(BaseDoc):
image: str
prompt: str
max_new_tokens: conint(ge=0, le=1024) = 512
top_k: int = 10
top_p: float = 0.95
typical_p: float = 0.95
temperature: float = 0.01
repetition_penalty: float = 1.03
streaming: bool = False
2 changes: 1 addition & 1 deletion comps/dataprep/milvus/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530))
COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus")

MOSEC_EMBEDDING_MODEL = "/root/bce-embedding-base_v1"
MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/root/bce-embedding-base_v1")
MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "")
os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
os.environ["OPENAI_API_KEY"] = "Dummy key"
12 changes: 6 additions & 6 deletions comps/dataprep/redis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,13 @@ docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https
- option 1: Start single-process version (for 1-10 files processing)

```bash
docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest
docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest
```

- option 2: Start multi-process version (for >10 files processing)

```bash
docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
```

## 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
Expand Down Expand Up @@ -250,7 +250,7 @@ To get uploaded file structures, use the following command:
```bash
curl -X POST \
-H "Content-Type: application/json" \
http://localhost:6008/v1/dataprep/get_file
http://localhost:6007/v1/dataprep/get_file
```

Then you will get the response JSON like this:
Expand Down Expand Up @@ -283,17 +283,17 @@ The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API.
curl -X POST \
-H "Content-Type: application/json" \
-d '{"file_path": "https://www.ces.tech/.txt"}' \
http://10.165.57.68:6009/v1/dataprep/delete_file
http://localhost:6007/v1/dataprep/delete_file

# delete file
curl -X POST \
-H "Content-Type: application/json" \
-d '{"file_path": "uploaded_file_1.txt"}' \
http://10.165.57.68:6009/v1/dataprep/delete_file
http://localhost:6007/v1/dataprep/delete_file

# delete all files and links
curl -X POST \
-H "Content-Type: application/json" \
-d '{"file_path": "all"}' \
http://10.165.57.68:6009/v1/dataprep/delete_file
http://localhost:6007/v1/dataprep/delete_file
```
Loading

0 comments on commit 1e63e5a

Please sign in to comment.