Merge pull request #1 from amazeeio/dev

update main
amazeeio · Aug 22, 2023 · afddde6 · afddde6
2 parents 4753dfd + a697ed6
commit afddde6
Show file tree

Hide file tree

Showing 11 changed files with 58 additions and 55 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 ENV
+data
diff --git a/.lagoon.yml b/.lagoon.yml
@@ -1,6 +1 @@
-docker-compose-yaml: docker-compose.yml
-environments:
-  main:
-    routes:
-      - llama2-api:
-        - llama2.marco-test6.amazeeio.review
+docker-compose-yaml: docker-compose.gpu.yml
diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,7 @@
 FROM python:3-slim-bullseye
 ENV MODEL=WizardLM-13B-V1.2
 
-# Update and upgrade the existing packages 
+# Update and upgrade the existing packages
 RUN apt-get update && apt-get upgrade -y && apt-get install -y \
     python3 \
     python3-pip \
@@ -22,23 +22,13 @@ RUN rm -rf /var/lib/apt/lists/*
 # Set a working directory for better clarity
 WORKDIR /app
 
-COPY ./amazee_server.sh /app/amazee_server.sh
+COPY ./start-llama2.sh /app/start-llama2.sh
 COPY ./hug_model.py /app/hug_model.py
-COPY ./fix-permissions.sh /app/fix-permissions.sh
-RUN chmod +x /app/fix-permissions.sh
-
-RUN mkdir -p /data \
-    && /app/fix-permissions.sh /data  \
-    && /app/fix-permissions.sh /app
-
-# Make the server start script executable
-RUN chmod +x /app/amazee_server.sh
-
 # Set environment variable for the host
 ENV HOST=0.0.0.0
 
 # Expose a port for the server
 EXPOSE 8000
 
 # Run the server start script
-CMD ["/bin/sh", "/app/amazee_server.sh"]
+CMD ["/app/start-llama2.sh"]
diff --git a/README.md b/README.md
@@ -3,12 +3,7 @@ lagoon going llama2
 
 # local run on mac M1
 ```sh
-docker build --platform linux/amd64 .
-docker run -it --platform linux/amd64
-```
-
-```sh
-docker run --rm -it --platform linux/amd64 -v '/Users/marco/Downloads:/data' -p '8000:8000' $(docker build --platform linux/amd64 -q .)
+docker-compose up -d
 ```
 
 # curl test openai api

diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml
@@ -0,0 +1,24 @@
+version: '2'
+services:
+  llama2-api:
+    build:
+      context: .
+      dockerfile: gpu.Dockerfile
+    labels:
+      lagoon.type: basic-persistent
+      lagoon.persistent: /data
+      lagoon.service.port: 8000
+      lagoon.autogeneratedroute: true
+      lagoon.gpu: true
+    volumes:
+      - ./data:/data:delegated
+    ports:
+      - "8000:8000"
+  ui:
+    build:
+      context: .
+      dockerfile: ui.Dockerfile
+    labels:
+      lagoon.type: node
+    ports:
+      - "3000:3000"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -3,14 +3,23 @@ services:
   llama2-api:
     build:
       context: .
-      dockerfile: gpu.Dockerfile
+      dockerfile: Dockerfile
     labels:
       lagoon.type: basic-persistent
       lagoon.persistent: /data
       lagoon.service.port: 8000
       lagoon.autogeneratedroute: true
-      lagoon.gpu: true
     volumes:
-      - ./app:/app:delegated
+      - ./data:/data:delegated
     ports:
       - "8000:8000"
+    platform: linux/amd64
+  ui:
+    user: '10000'
+    build:
+      context: .
+      dockerfile: ui.Dockerfile
+    labels:
+      lagoon.type: node
+    ports:
+      - "3000:3000"
diff --git a/fix-permissions.sh b/fix-permissions.sh
diff --git a/gpu.Dockerfile b/gpu.Dockerfile
@@ -1,13 +1,15 @@
 ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
 FROM nvidia/cuda:${CUDA_IMAGE}
+ENV MODEL=WizardLM-13B-V1.2
 
 # Install the package
 RUN apt-get update && apt-get upgrade -y \
     && apt-get install -y git build-essential \
     python3 python3-pip gcc wget \
     ocl-icd-opencl-dev opencl-headers clinfo \
     libclblast-dev libopenblas-dev \
-    && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
+    && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
+    && rm -rf /var/lib/apt/lists/*
 
 ENV CUDA_DOCKER_ARCH=all
 ENV LLAMA_CUBLAS=1
@@ -16,26 +18,14 @@ RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fa
 
 RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
 
-# Run the server
-ENV MODEL=vicuna-13B-v1.5-16K-GGML
-
-# Clean up apt cache
-RUN rm -rf /var/lib/apt/lists/*
-
 # Set a working directory for better clarity
 WORKDIR /app
 
-COPY ./amazee_server.sh /app/amazee_server.sh
+COPY ./start-llama2.sh /app/start-llama2.sh
 COPY ./hug_model.py /app/hug_model.py
-COPY ./fix-permissions.sh /app/fix-permissions.sh
-RUN chmod +x /app/fix-permissions.sh
-
-RUN mkdir -p /data \
-    && /app/fix-permissions.sh /data  \
-    && /app/fix-permissions.sh /app
 
-# Make the server start script executable
-RUN chmod +x /app/amazee_server.sh
+# Tell LLAMA_CPP that we want to offload layers to the GPU
+ENV LLAMA_CPP_ARGS="--n_gpu_layers=43"
 
 # Set environment variable for the host
 ENV HOST=0.0.0.0
@@ -44,4 +34,4 @@ ENV HOST=0.0.0.0
 EXPOSE 8000
 
 # Run the server start script
-CMD ["/bin/sh", "/app/amazee_server.sh"]
+CMD ["/app/start-llama2.sh"]
diff --git a/hug_model.py b/hug_model.py
@@ -6,7 +6,6 @@
 
 def symlink_model(data_dir, model_path):
     # Creating a symbolic link from destination to "model.bin"
-    data_dir = '.'
     model_bin = os.path.join(data_dir, "model.bin")
     if os.path.isfile(model_bin):
         os.remove(model_bin)  # remove the existing link if any
@@ -52,7 +51,7 @@ def download_file(url, destination, params):
                         print('.', end='', flush=True)
                         total_downloaded = 0
         print("\nDownload complete.")
-        
+
         symlink_model(params['datadir'], destination)
     else:
         print(f"Download failed with status code {response.status_code}")
@@ -76,7 +75,7 @@ def get_user_choice(model_list):
         print("Invalid input. Please enter a number corresponding to a model.")
     except IndexError:
         print("Invalid choice. Index out of range.")
-    
+
     return None
 
 def main():
@@ -96,7 +95,7 @@ def main():
                         help='HuggingFace model repository filename substring match')
     parser.add_argument('-d', '--datadir', type=str, default='/data',
                         help='Data directory to store HuggingFace models')
- 
+
     # Parse the arguments
     args = parser.parse_args()
 

diff --git a/amazee_server.sh → start-llama2.sh b/amazee_server.sh → start-llama2.sh
@@ -5,4 +5,4 @@ ulimit -l unlimited
 echo "Model: ${MODEL}"
 
 python3 hug_model.py -s ${MODEL} -f "q5_1"
-python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=43
+python3 -B -m llama_cpp.server --model /data/model.bin ${LLAMA_CPP_ARGS}
diff --git a/ui.Dockerfile b/ui.Dockerfile
@@ -0,0 +1,5 @@
+FROM ghcr.io/mckaywrigley/chatbot-ui:main
+
+ENV OPENAI_API_KEY=not-needed \
+    OPENAI_API_HOST=http://llama2-api:8000 \
+    NPM_CONFIG_CACHE=/tmp