-
Notifications
You must be signed in to change notification settings - Fork 528
/
Copy pathDockerfile.builder
63 lines (58 loc) · 2.19 KB
/
Dockerfile.builder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#---
# name: llama_cpp:builder
# group: llm
# config: config.py
# depends: [cuda, cudnn, cmake, python, numpy, huggingface_hub]
# requires: '>=34.1.0'
# test: test_version.py
# docs: docs.md
#---
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG CUDA_ARCHITECTURES \
LLAMA_CPP_PYTHON_REPO \
LLAMA_CPP_PYTHON_BRANCH \
LLAMA_CPP_PYTHON_DIR="/opt/llama-cpp-python"
# the llama-cpp-python bindings contain llama-cpp as submodule - use that version for sanity
ADD https://api.github.com/repos/${LLAMA_CPP_PYTHON_REPO}/git/refs/heads/${LLAMA_CPP_PYTHON_BRANCH} /tmp/llama_cpp_python_version.json
RUN set -ex \
&& git clone --branch=${LLAMA_CPP_PYTHON_BRANCH} --depth=1 --recursive https://github.com/${LLAMA_CPP_PYTHON_REPO} ${LLAMA_CPP_PYTHON_DIR} \
&& ln -s "$LLAMA_CPP_PYTHON_DIR/vendor/llama.cpp" "$LLAMA_CPP_PYTHON_DIR/llama.cpp" \
\
# build C++ libraries \
&& mkdir -p "$LLAMA_CPP_PYTHON_DIR/vendor/llama.cpp/build" \
&& cmake \
-B "$LLAMA_CPP_PYTHON_DIR/vendor/llama.cpp/build" \
-S "$LLAMA_CPP_PYTHON_DIR/vendor/llama.cpp" \
-DLLAMA_CUBLAS=on \
-DLLAMA_CUDA_F16=1 \
-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES} \
&& cmake \
--build "$LLAMA_CPP_PYTHON_DIR/vendor/llama.cpp/build" \
--config Release \
--parallel $(nproc) \
&& ln -s $LLAMA_CPP_PYTHON_DIR/vendor/llama.cpp/build/bin $LLAMA_CPP_PYTHON_DIR/llama.cpp/bin \
\
# build Python bindings \
&& CMAKE_ARGS="-DLLAMA_CUBLAS=on -DLLAMA_CUDA_F16=1 -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}" FORCE_CMAKE=1 \
pip3 wheel --wheel-dir=/opt --verbose "$LLAMA_CPP_PYTHON_DIR" \
\
# install the wheel \
# python3 -m llama_cpp.server missing 'import uvicorn' \
&& pip3 install --no-cache-dir --verbose \
/opt/llama_cpp_python*.whl \
typing-extensions \
uvicorn \
anyio \
starlette \
sse-starlette \
starlette-context \
fastapi \
pydantic-settings
# add benchmark script
COPY benchmark.py llama.cpp/bin/benchmark.py
# make sure it loads
RUN set -ex \
&& pip3 show llama-cpp-python | grep llama \
&& python3 -c 'import llama_cpp' \
&& python3 -m llama_cpp.server --help