Skip to content

Commit

Permalink
Add video-llama LVM microservice under lvms (opea-project#495)
Browse files Browse the repository at this point in the history
Signed-off-by: BaoHuiling <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: srinarayan-srikanthan <[email protected]>
  • Loading branch information
2 people authored and srinarayan-srikanthan committed Sep 1, 2024
1 parent 389fb61 commit bb01c19
Show file tree
Hide file tree
Showing 18 changed files with 812 additions and 1 deletion.
1 change: 1 addition & 0 deletions comps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
RAGASScores,
GraphDoc,
LVMDoc,
LVMVideoDoc,
)

# Constants
Expand Down
10 changes: 9 additions & 1 deletion comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
from docarray import BaseDoc, DocList
from docarray.documents import AudioDoc
from docarray.documents import AudioDoc, VideoDoc
from docarray.typing import AudioUrl
from pydantic import Field, conint, conlist, field_validator

Expand Down Expand Up @@ -171,3 +171,11 @@ class LVMDoc(BaseDoc):
temperature: float = 0.01
repetition_penalty: float = 1.03
streaming: bool = False


class LVMVideoDoc(BaseDoc):
video_url: str
chunk_start: float
chunk_duration: float
prompt: str
max_new_tokens: conint(ge=0, le=1024) = 512
18 changes: 18 additions & 0 deletions comps/lvms/video-llama/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM python:3.11-slim

# Set environment variables
ENV LANG=en_US.UTF-8

COPY comps /home/comps

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/comps/lvms/video-llama/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home

WORKDIR /home/comps/lvms/video-llama

ENTRYPOINT ["python", "lvm.py"]
70 changes: 70 additions & 0 deletions comps/lvms/video-llama/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# LVM Microservice

This is a Docker-based microservice that runs Video-Llama as a Large Vision Model (LVM). It utilizes Llama-2-7b-chat-hf for conversations based on video dialogues. It support Intel Xeon CPU.

# 🚀1. Start Microservice with Docker

## 1.1 Build Images

```bash
cd GenAIComps
# Video-Llama Server Image
docker build --no-cache -t opea/video-llama-lvm-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/server/docker/Dockerfile .
# LVM Service Image
docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile .
```

## 1.2 Start Video-Llama and LVM Services

For the very first run, please follow below steps:

```bash
# prepare environment variables
export ip_address=$(hostname -I | awk '{print $1}')
export no_proxy=$no_proxy,${ip_address}
export LVM_ENDPOINT=http://${ip_address}:9009
# Start service
docker compose -f comps/lvms/video-llama/docker_compose.yaml up -d
# it should take about 1.5 hours for the model to download in the video-llama server, assuming a maximum download speed of 100 Mbps
until docker logs video-llama-lvm-server 2>&1 | grep -q "Uvicorn running on"; do
sleep 5m
done
```

If you've run the microservice before, it's recommended to keep the downloaded model so it won't be redownloaded each time you run it. To achieve this, you need to modify the following configuration:

```yaml
# comps/lvms/video-llama/docker_compose.yaml
services:
lvm-video-llama:
...
environment:
llm_download: "False" # avoid download
```
# ✅ 2. Test
```bash
# use curl
export ip_address=$(hostname -I | awk '{print $1}')
## check video-llama
http_proxy="" curl -X POST "http://${ip_address}:9009/generate?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" -H "accept: */*" -d ''

## check lvm
http_proxy="" curl -X POST http://${ip_address}:9000/v1/lvm -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 9,"prompt":"What is the person doing?","max_new_tokens": 150}' -H 'Content-Type: application/json'

# or use python
export ip_address=$(hostname -I | awk '{print $1}')
python comps/lvms/video-llama/check_lvm.py
```

# ♻️ 3. Clean

```bash
# remove the container
cid=$(docker ps -aq --filter "name=video-llama")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
# remove the model volume (suggest to keep this to avoid download for each run)
if docker volume ls | grep -q video-llama-model; then docker volume rm video-llama_video-llama-model; fi

```
50 changes: 50 additions & 0 deletions comps/lvms/video-llama/check_lvm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import datetime
import json
import os

import requests

ip_address = os.getenv("ip_address")
####### video-llama request ########
print("video-llama request")
api_url = f"http://${ip_address}:9009/generate"
content = {
"video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4",
"start": 0.0,
"duration": 9,
"prompt": "What is the person doing?",
"max_new_tokens": 150,
}

start = datetime.datetime.now()
with requests.post(api_url, params=content, stream=True) as response:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output

end = datetime.datetime.now()
print(f"\nTotal time: {end - start}")

####### lvm request ########
print("lvm request")
api_url = f"http://${ip_address}:9000/v1/lvm"
headers = {"Content-Type": "application/json"}
data = {
"video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4",
"chunk_start": 0,
"chunk_duration": 9,
"prompt": "what is the person doing",
"max_new_tokens": 150,
}

start = datetime.datetime.now()
with requests.post(api_url, headers=headers, data=json.dumps(data), stream=True) as response:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output

end = datetime.datetime.now()
print(f"\nTotal time: {end - start}")
40 changes: 40 additions & 0 deletions comps/lvms/video-llama/docker_compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3"
services:
lvm-video-llama:
image: opea/video-llama-lvm-server:latest
container_name: video-llama-lvm-server
ports:
- "9009:9009"
ipc: host
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
llm_download: "True"
volumes:
- "/home/$USER/.cache:/home/user/.cache" # RECOMMENDED: use local cache to avoid download
- video-llama-model:/home/user/model
restart: unless-stopped

lvm:
image: opea/lvm-video-llama:latest
container_name: lvm-video-llama
ports:
- "9000:9000"
ipc: host
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
LVM_ENDPOINT: ${LVM_ENDPOINT}
restart: unless-stopped
depends_on:
- lvm-video-llama
networks:
default:
driver: bridge
volumes:
video-llama-model:
80 changes: 80 additions & 0 deletions comps/lvms/video-llama/lvm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0


# import json
import logging
import os

import requests
from fastapi import HTTPException
from fastapi.responses import StreamingResponse

from comps import LVMVideoDoc, ServiceType, opea_microservices, register_microservice, register_statistics

# import time


logging.basicConfig(level=logging.INFO)


@register_microservice(
name="opea_service@lvm",
service_type=ServiceType.LVM,
endpoint="/v1/lvm",
host="0.0.0.0",
port=9000,
input_datatype=LVMVideoDoc,
output_datatype=StreamingResponse,
)
@register_statistics(names=["opea_service@lvm"])
async def lvm(input: LVMVideoDoc):
"""This function handles the LVM microservice, which generates text based on a video URL, start time, duration, prompt, and maximum new tokens.
Parameters:
input (LVMVideoDoc): The input containing the video URL, start time, duration, prompt, and maximum new tokens.
Returns:
StreamingResponse: A streaming response containing the generated text in text/event-stream format, or a JSON error response if the upstream API responds with an error.
"""
logging.info("[lvm] Received input")

video_url = input.video_url
chunk_start = input.chunk_start
chunk_duration = input.chunk_duration
prompt = input.prompt
max_new_tokens = input.max_new_tokens

params = {
"video_url": video_url,
"start": chunk_start,
"duration": chunk_duration,
"prompt": prompt,
"max_new_tokens": max_new_tokens,
}
logging.info(f"[lvm] Params: {params}")

response = requests.post(url=f"{lvm_endpoint}/generate", params=params, proxies={"http": None}, stream=True)
logging.info(f"[lvm] Response status code: {response.status_code}")
if response.status_code == 200:

def streamer():
yield f"{{'video_url': '{video_url}', 'chunk_start': {chunk_start}, 'chunk_duration': {chunk_duration}}}\n".encode(
"utf-8"
)
for chunk in response.iter_content(chunk_size=8192):
if chunk:
yield chunk
logging.info(f"[llm - chat_stream] Streaming: {chunk}")
logging.info("[llm - chat_stream] stream response finished")

return StreamingResponse(streamer(), media_type="text/event-stream")
else:
logging.error(f"[lvm] Error: {response.text}")
raise HTTPException(status_code=500, detail="The upstream API responded with an error.")


if __name__ == "__main__":
lvm_endpoint = os.getenv("LVM_ENDPOINT")

opea_microservices["opea_service@lvm"].start()
11 changes: 11 additions & 0 deletions comps/lvms/video-llama/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
datasets
docarray
fastapi
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
Pillow
prometheus-fastapi-instrumentator
pydub
shortuuid
uvicorn
Binary file not shown.
38 changes: 38 additions & 0 deletions comps/lvms/video-llama/server/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM python:3.9-slim

ENV LANG=C.UTF-8

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
git git-lfs && \
git lfs install

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user:user /home/user/
RUN mkdir /home/user/model && chown user:user -R /home/user/model

USER user

COPY --chown=user:user comps /home/user/comps
WORKDIR /home/user/comps/lvms/video-llama/server

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/lvms/video-llama/server/requirements.txt

ARG VIDEO_LLAMA_REPO=https://github.com/DAMO-NLP-SG/Video-LLaMA.git
ARG VIDEO_LLAMA_COMMIT=0adb19e
RUN tar -xvf video-llama.patch.tar && \
git clone ${VIDEO_LLAMA_REPO} Video-LLaMA && \
cd Video-LLaMA && git checkout ${VIDEO_LLAMA_COMMIT} && \
git apply --whitespace=fix ../video-llama.patch && \
mv video_llama ../ && \
cd ../ && rm -rf Video-LLaMA


ENV PYTHONPATH=/home/user


ENTRYPOINT ["bash", "start.sh"]
25 changes: 25 additions & 0 deletions comps/lvms/video-llama/server/docker/docker_compose_vllama.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3"
services:
lvm-video-llama:
image: opea/video-llama-lvm-server:latest
container_name: video-llama-lvm-server
ports:
- "9009:9009"
ipc: host
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
llm_download: "True"
volumes:
- "/home/$USER/.cache:/home/user/.cache" # RECOMMENDED: use cache to avoid download
- video-llama-model:/home/user/model
restart: unless-stopped
networks:
default:
driver: bridge
volumes:
video-llama-model:
Loading

0 comments on commit bb01c19

Please sign in to comment.