-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Xiaodong Ye <[email protected]>
- Loading branch information
1 parent
32b28a6
commit f91d6ec
Showing
9 changed files
with
370 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
name: Docker | ||
|
||
# This workflow uses actions that are not certified by GitHub. | ||
# They are provided by a third-party and are governed by | ||
# separate terms of service, privacy policy, and support | ||
# documentation. | ||
|
||
on: | ||
schedule: | ||
- cron: '21 19 * * *' | ||
push: | ||
branches: [ "main" ] | ||
# Publish semver tags as releases. | ||
tags: [ 'v*.*.*' ] | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
env: | ||
# Use docker.io for Docker Hub if empty | ||
REGISTRY: ghcr.io | ||
# github.repository as <account>/<repo> | ||
# XXX: Updated | ||
IMAGE_NAME: yeahdongcn/litellm-proxy | ||
|
||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
packages: write | ||
# This is used to complete the identity challenge | ||
# with sigstore/fulcio when running outside of PRs. | ||
id-token: write | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
|
||
# Install the cosign tool except on PR | ||
# https://github.com/sigstore/cosign-installer | ||
- name: Install cosign | ||
if: github.event_name != 'pull_request' | ||
uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1 | ||
with: | ||
cosign-release: 'v2.1.1' | ||
|
||
# XXX: Updated | ||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v3 | ||
|
||
# Set up BuildKit Docker container builder to be able to build | ||
# multi-platform images and export cache | ||
# https://github.com/docker/setup-buildx-action | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 | ||
|
||
# Login against a Docker registry except on PR | ||
# https://github.com/docker/login-action | ||
- name: Log into registry ${{ env.REGISTRY }} | ||
if: github.event_name != 'pull_request' | ||
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
# Extract metadata (tags, labels) for Docker | ||
# https://github.com/docker/metadata-action | ||
- name: Extract Docker metadata | ||
id: meta | ||
uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 | ||
with: | ||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | ||
|
||
# Build and push Docker image with Buildx (don't push on PR) | ||
# https://github.com/docker/build-push-action | ||
- name: Build and push Docker image | ||
id: build-and-push | ||
uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 | ||
with: | ||
# XXX: Updated | ||
context: ./docker/ | ||
platforms: linux/amd64,linux/arm64 | ||
push: ${{ github.event_name != 'pull_request' }} | ||
tags: ${{ steps.meta.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels }} | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max | ||
|
||
# Sign the resulting Docker image digest except on PRs. | ||
# This will only write to the public Rekor transparency log when the Docker | ||
# repository is public to avoid leaking data. If you would like to publish | ||
# transparency data even for private images, pass --force to cosign below. | ||
# https://github.com/sigstore/cosign | ||
- name: Sign the published Docker image | ||
if: ${{ github.event_name != 'pull_request' }} | ||
env: | ||
# https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable | ||
TAGS: ${{ steps.meta.outputs.tags }} | ||
DIGEST: ${{ steps.build-and-push.outputs.digest }} | ||
# This step uses the identity token to provision an ephemeral certificate | ||
# against the sigstore community Fulcio instance. | ||
run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ollama.pid | ||
bin/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,52 @@ | ||
# MacAI | ||
# OllamaStack | ||
|
||
Setup [Ollama](https://github.com/jmorganca/ollama) stack on macOS. | ||
|
||
- [OllamaStack](#ollamastack) | ||
- [Prerequisites](#prerequisites) | ||
- [Diagram](#diagram) | ||
- [Quickstart](#quickstart) | ||
- [Demo](#demo) | ||
|
||
## Prerequisites | ||
* A [Metal capable](https://support.apple.com/en-us/102894) Mac device. | ||
* [Mods](https://github.com/charmbracelet/mods): AI for the command line, built for pipelines. | ||
* [Docker](https://www.docker.com/products/docker-desktop): The fastest way to containerize applications. | ||
|
||
## Diagram | ||
|
||
```mermaid | ||
graph LR; | ||
subgraph Host | ||
subgraph CLI | ||
B(Mods) | ||
end | ||
subgraph Server | ||
C(Ollama) | ||
D[Metal] | ||
end | ||
end | ||
subgraph Container | ||
E(LiteLLM Proxy) | ||
F(Ollama Web UI) | ||
end | ||
A(User) --> |Terminal|B; | ||
A --> |Browser|F; | ||
B --> |OpenAI API|E; | ||
E --> |REST API|C; | ||
F --> |REST API|C; | ||
C-. Link .-> D; | ||
``` | ||
|
||
## Quickstart | ||
|
||
```bash | ||
$ git clone https://github.com/yeahdongcn/OllamaStack.git | ||
$ cd OllamaStack | ||
$ ./start.sh | ||
$ ./stop.sh | ||
``` | ||
|
||
### Demo | ||
|
||
![633462](https://github.com/yeahdongcn/OllamaStack/assets/2831050/1290b08a-6636-493e-8ad4-edcb18971198) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Runtime image | ||
ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine | ||
|
||
# Runtime stage | ||
FROM $LITELLM_RUNTIME_IMAGE as runtime | ||
|
||
RUN pip install 'litellm[proxy]' | ||
|
||
EXPOSE 8000/tcp | ||
|
||
# Set your entrypoint and command | ||
ENTRYPOINT ["litellm"] | ||
CMD ["--port", "8000"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
IMG := r0ckstar/litellm-proxy | ||
|
||
.PHONY: docker-build | ||
docker-build: | ||
docker build -t ${IMG} . | ||
|
||
.PHONY: docker-push | ||
docker-push: docker-build | ||
docker push ${IMG} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
router_settings: | ||
routing_strategy: "least-busy" | ||
|
||
model_list: | ||
- model_name: ollama | ||
litellm_params: | ||
model: "ollama/llama2" | ||
api_base: http://host.docker.internal:11434 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
|
||
# Default model (gpt-3.5-turbo, gpt-4, ggml-gpt4all-j...). | ||
default-model: ollama | ||
# Text to append when using the -f flag. | ||
format-text: Format the response as markdown without enclosing backticks. | ||
# Ask for the response to be formatted as markdown unless otherwise set. | ||
format: false | ||
# Render output as raw text when connected to a TTY. | ||
raw: false | ||
# Quiet mode (hide the spinner while loading). | ||
quiet: false | ||
# Temperature (randomness) of results, from 0.0 to 2.0. | ||
temp: 1.0 | ||
# TopP, an alternative to temperature that narrows response, from 0.0 to 1.0. | ||
topp: 1.0 | ||
# Turn off the client-side limit on the size of the input into the model. | ||
no-limit: false | ||
# Include the prompt from the arguments in the response. | ||
include-prompt-args: false | ||
# Include the prompt from the arguments and stdin, truncate stdin to specified number of lines. | ||
include-prompt: 0 | ||
# Maximum number of times to retry API calls. | ||
max-retries: 5 | ||
# Your desired level of fanciness. | ||
fanciness: 10 | ||
# Text to show while generating. | ||
status-text: Generating | ||
# Default character limit on input to model. | ||
max-input-chars: 12250 | ||
# Maximum number of tokens in response. | ||
# max-tokens: 100 | ||
# Aliases and endpoints for OpenAI compatible REST API. | ||
apis: | ||
openai: | ||
base-url: https://api.openai.com/v1 | ||
api-key-env: OPENAI_API_KEY | ||
models: | ||
gpt-4: | ||
aliases: ["4"] | ||
max-input-chars: 24500 | ||
fallback: gpt-3.5-turbo | ||
gpt-4-32k: | ||
aliases: ["32k"] | ||
max-input-chars: 98000 | ||
fallback: gpt-4 | ||
gpt-3.5-turbo: | ||
aliases: ["35t"] | ||
max-input-chars: 12250 | ||
fallback: gpt-3.5 | ||
gpt-3.5-turbo-16k: | ||
aliases: ["35t16k"] | ||
max-input-chars: 44500 | ||
fallback: gpt-3.5 | ||
gpt-3.5: | ||
aliases: ["35"] | ||
max-input-chars: 12250 | ||
fallback: | ||
localai: | ||
# LocalAI setup instructions: https://github.com/go-skynet/LocalAI#example-use-gpt4all-j-model | ||
base-url: http://localhost:8000 | ||
api-key: "IGNORED" | ||
models: | ||
ollama: | ||
aliases: ["local"] | ||
max-input-chars: 12250 | ||
fallback: | ||
azure: | ||
# Set to 'azure-ad' to use Active Directory | ||
# Azure OpenAI setup: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource | ||
base-url: https://YOUR_RESOURCE_NAME.openai.azure.com | ||
api-key-env: AZURE_OPENAI_KEY | ||
models: | ||
gpt-4: | ||
aliases: ["az4"] | ||
max-input-chars: 24500 | ||
fallback: gpt-35-turbo | ||
gpt-35-turbo: | ||
aliases: ["az35t"] | ||
max-input-chars: 12250 | ||
fallback: gpt-35 | ||
gpt-35: | ||
aliases: ["az35"] | ||
max-input-chars: 12250 | ||
fallback: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env bash | ||
|
||
# Initialize variables: | ||
LAST_SPINNER_PID="" | ||
OLLAMA_BINARY="$(pwd)/bin/ollama" | ||
OLLAMA_PID="$(pwd)/ollama.pid" | ||
|
||
# Kill background processes on exit | ||
trap exit_trap EXIT | ||
function exit_trap { | ||
# Kill the last spinner process | ||
kill_spinner | ||
} | ||
|
||
# Draw a spinner so the user knows something is happening | ||
function spinner { | ||
local delay=0.1 | ||
local spinstr='/-\|' | ||
printf "..." | ||
while [ true ]; do | ||
local temp=${spinstr#?} | ||
printf "[%c]" "$spinstr" | ||
local spinstr=$temp${spinstr%"$temp"} | ||
sleep $delay | ||
printf "\b\b\b" | ||
done | ||
} | ||
|
||
function kill_spinner { | ||
if [ ! -z "$LAST_SPINNER_PID" ]; then | ||
kill >/dev/null 2>&1 $LAST_SPINNER_PID | ||
wait $LAST_SPINNER_PID 2>/dev/null | ||
printf "\b\b\bdone\n" | ||
LAST_SPINNER_PID="" | ||
fi | ||
} | ||
|
||
# Echo text to the log file, summary log file and stdout | ||
# echo_summary "something to say" | ||
function echo_summary { | ||
kill_spinner | ||
echo -n -e $@ | ||
spinner & | ||
LAST_SPINNER_PID=$! | ||
} | ||
|
||
# Create dir from OLLAMA_BINARY | ||
mkdir -p $(dirname "$OLLAMA_BINARY") | ||
|
||
# Download ollama | ||
echo_summary "Downloading ollama to $(dirname "$OLLAMA_BINARY")" | ||
if [ ! -f "$OLLAMA_BINARY" ]; then | ||
curl -sL https://api.github.com/repos/jmorganca/ollama/releases/latest | | ||
grep "browser_download_url.*ollama-darwin" | | ||
cut -d : -f 2,3 | | ||
tr -d \" | | ||
wget -O "$OLLAMA_BINARY" -qi - | ||
fi | ||
kill_spinner | ||
|
||
chmod +x "$OLLAMA_BINARY" | ||
"$OLLAMA_BINARY" serve >/dev/null 2>&1 & | ||
echo $! >"$OLLAMA_PID" | ||
echo "ollama started" | ||
|
||
docker rm -f litellm-proxy >/dev/null 2>&1 | ||
docker run --mount type=bind,source="$(pwd)"/litellm-config.yaml,target=/config.yaml,readonly \ | ||
-p 8000:8000 --add-host=host.docker.internal:host-gateway \ | ||
-d --name litellm-proxy ghcr.io/yeahdongcn/litellm-proxy:main --drop_params --config /config.yaml | ||
echo "litellm started" | ||
|
||
read -rp "Do you want to use predefined mods config? y/n [n]: " USE_MODS | ||
if [[ $USE_MODS =~ ^[Yy]$ ]]; then | ||
mv ~/Library/Application\ Support/mods/mods.yml ~/Library/Application\ Support/mods/mods.yml.backup | ||
cp mods.yml ~/Library/Application\ Support/mods/mods.yml | ||
fi | ||
|
||
read -rp "Do you want to start web UI? y/n [n]: " USE_WEBUI | ||
if [[ $USE_WEBUI =~ ^[Yy]$ ]]; then | ||
mkdir -p ollama-webui | ||
docker rm -f ollama-webui >/dev/null 2>&1 | ||
docker run --pull always -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v ollama-webui:/app/backend/data \ | ||
--name ollama-webui --restart always ghcr.io/ollama-webui/ollama-webui:main | ||
sleep 5 | ||
open http://localhost:3000 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/usr/bin/env bash | ||
|
||
# Initialize variables: | ||
OLLAMA_PID="$(pwd)/ollama.pid" | ||
|
||
kill -9 $(cat $OLLAMA_PID) | ||
echo "Stopped ollama" | ||
docker rm -f litellm-proxy >/dev/null 2>&1 | ||
echo "Stopped litellm-proxy" | ||
docker rm -f ollama-webui >/dev/null 2>&1 | ||
echo "Stopped ollama-webui" |