Skip to content

Commit

Permalink
Merge pull request #1 from amazeeio/dev
Browse files Browse the repository at this point in the history
update main
  • Loading branch information
Schnitzel authored Aug 22, 2023
2 parents 4753dfd + a697ed6 commit afddde6
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 55 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
ENV
data
7 changes: 1 addition & 6 deletions .lagoon.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1 @@
docker-compose-yaml: docker-compose.yml
environments:
main:
routes:
- llama2-api:
- llama2.marco-test6.amazeeio.review
docker-compose-yaml: docker-compose.gpu.yml
16 changes: 3 additions & 13 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM python:3-slim-bullseye
ENV MODEL=WizardLM-13B-V1.2

# Update and upgrade the existing packages
# Update and upgrade the existing packages
RUN apt-get update && apt-get upgrade -y && apt-get install -y \
python3 \
python3-pip \
Expand All @@ -22,23 +22,13 @@ RUN rm -rf /var/lib/apt/lists/*
# Set a working directory for better clarity
WORKDIR /app

COPY ./amazee_server.sh /app/amazee_server.sh
COPY ./start-llama2.sh /app/start-llama2.sh
COPY ./hug_model.py /app/hug_model.py
COPY ./fix-permissions.sh /app/fix-permissions.sh
RUN chmod +x /app/fix-permissions.sh

RUN mkdir -p /data \
&& /app/fix-permissions.sh /data \
&& /app/fix-permissions.sh /app

# Make the server start script executable
RUN chmod +x /app/amazee_server.sh

# Set environment variable for the host
ENV HOST=0.0.0.0

# Expose a port for the server
EXPOSE 8000

# Run the server start script
CMD ["/bin/sh", "/app/amazee_server.sh"]
CMD ["/app/start-llama2.sh"]
7 changes: 1 addition & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@ lagoon going llama2

# local run on mac M1
```sh
docker build --platform linux/amd64 .
docker run -it --platform linux/amd64
```

```sh
docker run --rm -it --platform linux/amd64 -v '/Users/marco/Downloads:/data' -p '8000:8000' $(docker build --platform linux/amd64 -q .)
docker-compose up -d
```

# curl test openai api
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: '2'
services:
llama2-api:
build:
context: .
dockerfile: gpu.Dockerfile
labels:
lagoon.type: basic-persistent
lagoon.persistent: /data
lagoon.service.port: 8000
lagoon.autogeneratedroute: true
lagoon.gpu: true
volumes:
- ./data:/data:delegated
ports:
- "8000:8000"
ui:
build:
context: .
dockerfile: ui.Dockerfile
labels:
lagoon.type: node
ports:
- "3000:3000"
15 changes: 12 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,23 @@ services:
llama2-api:
build:
context: .
dockerfile: gpu.Dockerfile
dockerfile: Dockerfile
labels:
lagoon.type: basic-persistent
lagoon.persistent: /data
lagoon.service.port: 8000
lagoon.autogeneratedroute: true
lagoon.gpu: true
volumes:
- ./app:/app:delegated
- ./data:/data:delegated
ports:
- "8000:8000"
platform: linux/amd64
ui:
user: '10000'
build:
context: .
dockerfile: ui.Dockerfile
labels:
lagoon.type: node
ports:
- "3000:3000"
5 changes: 0 additions & 5 deletions fix-permissions.sh

This file was deleted.

24 changes: 7 additions & 17 deletions gpu.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
FROM nvidia/cuda:${CUDA_IMAGE}
ENV MODEL=WizardLM-13B-V1.2

# Install the package
RUN apt-get update && apt-get upgrade -y \
&& apt-get install -y git build-essential \
python3 python3-pip gcc wget \
ocl-icd-opencl-dev opencl-headers clinfo \
libclblast-dev libopenblas-dev \
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
&& rm -rf /var/lib/apt/lists/*

ENV CUDA_DOCKER_ARCH=all
ENV LLAMA_CUBLAS=1
Expand All @@ -16,26 +18,14 @@ RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fa

RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python

# Run the server
ENV MODEL=vicuna-13B-v1.5-16K-GGML

# Clean up apt cache
RUN rm -rf /var/lib/apt/lists/*

# Set a working directory for better clarity
WORKDIR /app

COPY ./amazee_server.sh /app/amazee_server.sh
COPY ./start-llama2.sh /app/start-llama2.sh
COPY ./hug_model.py /app/hug_model.py
COPY ./fix-permissions.sh /app/fix-permissions.sh
RUN chmod +x /app/fix-permissions.sh

RUN mkdir -p /data \
&& /app/fix-permissions.sh /data \
&& /app/fix-permissions.sh /app

# Make the server start script executable
RUN chmod +x /app/amazee_server.sh
# Tell LLAMA_CPP that we want to offload layers to the GPU
ENV LLAMA_CPP_ARGS="--n_gpu_layers=43"

# Set environment variable for the host
ENV HOST=0.0.0.0
Expand All @@ -44,4 +34,4 @@ ENV HOST=0.0.0.0
EXPOSE 8000

# Run the server start script
CMD ["/bin/sh", "/app/amazee_server.sh"]
CMD ["/app/start-llama2.sh"]
7 changes: 3 additions & 4 deletions hug_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

def symlink_model(data_dir, model_path):
# Creating a symbolic link from destination to "model.bin"
data_dir = '.'
model_bin = os.path.join(data_dir, "model.bin")
if os.path.isfile(model_bin):
os.remove(model_bin) # remove the existing link if any
Expand Down Expand Up @@ -52,7 +51,7 @@ def download_file(url, destination, params):
print('.', end='', flush=True)
total_downloaded = 0
print("\nDownload complete.")

symlink_model(params['datadir'], destination)
else:
print(f"Download failed with status code {response.status_code}")
Expand All @@ -76,7 +75,7 @@ def get_user_choice(model_list):
print("Invalid input. Please enter a number corresponding to a model.")
except IndexError:
print("Invalid choice. Index out of range.")

return None

def main():
Expand All @@ -96,7 +95,7 @@ def main():
help='HuggingFace model repository filename substring match')
parser.add_argument('-d', '--datadir', type=str, default='/data',
help='Data directory to store HuggingFace models')

# Parse the arguments
args = parser.parse_args()

Expand Down
2 changes: 1 addition & 1 deletion amazee_server.sh → start-llama2.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ ulimit -l unlimited
echo "Model: ${MODEL}"

python3 hug_model.py -s ${MODEL} -f "q5_1"
python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=43
python3 -B -m llama_cpp.server --model /data/model.bin ${LLAMA_CPP_ARGS}
5 changes: 5 additions & 0 deletions ui.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM ghcr.io/mckaywrigley/chatbot-ui:main

ENV OPENAI_API_KEY=not-needed \
OPENAI_API_HOST=http://llama2-api:8000 \
NPM_CONFIG_CACHE=/tmp

0 comments on commit afddde6

Please sign in to comment.