Skip to content

Commit

Permalink
Add everything
Browse files Browse the repository at this point in the history
Signed-off-by: Xiaodong Ye <[email protected]>
  • Loading branch information
yeahdongcn committed Jan 30, 2024
1 parent 32b28a6 commit f91d6ec
Show file tree
Hide file tree
Showing 9 changed files with 370 additions and 1 deletion.
105 changes: 105 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
name: Docker

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

on:
schedule:
- cron: '21 19 * * *'
push:
branches: [ "main" ]
# Publish semver tags as releases.
tags: [ 'v*.*.*' ]
pull_request:
branches: [ "main" ]

env:
# Use docker.io for Docker Hub if empty
REGISTRY: ghcr.io
# github.repository as <account>/<repo>
# XXX: Updated
IMAGE_NAME: yeahdongcn/litellm-proxy


jobs:
build:

runs-on: ubuntu-latest
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@v3

# Install the cosign tool except on PR
# https://github.com/sigstore/cosign-installer
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1
with:
cosign-release: 'v2.1.1'

# XXX: Updated
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

# Set up BuildKit Docker container builder to be able to build
# multi-platform images and export cache
# https://github.com/docker/setup-buildx-action
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0

# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0
with:
# XXX: Updated
context: ./docker/
platforms: linux/amd64,linux/arm64
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

# Sign the resulting Docker image digest except on PRs.
# This will only write to the public Rekor transparency log when the Docker
# repository is public to avoid leaking data. If you would like to publish
# transparency data even for private images, pass --force to cosign below.
# https://github.com/sigstore/cosign
- name: Sign the published Docker image
if: ${{ github.event_name != 'pull_request' }}
env:
# https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable
TAGS: ${{ steps.meta.outputs.tags }}
DIGEST: ${{ steps.build-and-push.outputs.digest }}
# This step uses the identity token to provision an ephemeral certificate
# against the sigstore community Fulcio instance.
run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ollama.pid
bin/
53 changes: 52 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,52 @@
# MacAI
# OllamaStack

Setup [Ollama](https://github.com/jmorganca/ollama) stack on macOS.

- [OllamaStack](#ollamastack)
- [Prerequisites](#prerequisites)
- [Diagram](#diagram)
- [Quickstart](#quickstart)
- [Demo](#demo)

## Prerequisites
* A [Metal capable](https://support.apple.com/en-us/102894) Mac device.
* [Mods](https://github.com/charmbracelet/mods): AI for the command line, built for pipelines.
* [Docker](https://www.docker.com/products/docker-desktop): The fastest way to containerize applications.

## Diagram

```mermaid
graph LR;
subgraph Host
subgraph CLI
B(Mods)
end
subgraph Server
C(Ollama)
D[Metal]
end
end
subgraph Container
E(LiteLLM Proxy)
F(Ollama Web UI)
end
A(User) --> |Terminal|B;
A --> |Browser|F;
B --> |OpenAI API|E;
E --> |REST API|C;
F --> |REST API|C;
C-. Link .-> D;
```

## Quickstart

```bash
$ git clone https://github.com/yeahdongcn/OllamaStack.git
$ cd OllamaStack
$ ./start.sh
$ ./stop.sh
```

### Demo

![633462](https://github.com/yeahdongcn/OllamaStack/assets/2831050/1290b08a-6636-493e-8ad4-edcb18971198)
13 changes: 13 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine

# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE as runtime

RUN pip install 'litellm[proxy]'

EXPOSE 8000/tcp

# Set your entrypoint and command
ENTRYPOINT ["litellm"]
CMD ["--port", "8000"]
9 changes: 9 additions & 0 deletions docker/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
IMG := r0ckstar/litellm-proxy

.PHONY: docker-build
docker-build:
docker build -t ${IMG} .

.PHONY: docker-push
docker-push: docker-build
docker push ${IMG}
8 changes: 8 additions & 0 deletions litellm-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
router_settings:
routing_strategy: "least-busy"

model_list:
- model_name: ollama
litellm_params:
model: "ollama/llama2"
api_base: http://host.docker.internal:11434
84 changes: 84 additions & 0 deletions mods.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@

# Default model (gpt-3.5-turbo, gpt-4, ggml-gpt4all-j...).
default-model: ollama
# Text to append when using the -f flag.
format-text: Format the response as markdown without enclosing backticks.
# Ask for the response to be formatted as markdown unless otherwise set.
format: false
# Render output as raw text when connected to a TTY.
raw: false
# Quiet mode (hide the spinner while loading).
quiet: false
# Temperature (randomness) of results, from 0.0 to 2.0.
temp: 1.0
# TopP, an alternative to temperature that narrows response, from 0.0 to 1.0.
topp: 1.0
# Turn off the client-side limit on the size of the input into the model.
no-limit: false
# Include the prompt from the arguments in the response.
include-prompt-args: false
# Include the prompt from the arguments and stdin, truncate stdin to specified number of lines.
include-prompt: 0
# Maximum number of times to retry API calls.
max-retries: 5
# Your desired level of fanciness.
fanciness: 10
# Text to show while generating.
status-text: Generating
# Default character limit on input to model.
max-input-chars: 12250
# Maximum number of tokens in response.
# max-tokens: 100
# Aliases and endpoints for OpenAI compatible REST API.
apis:
openai:
base-url: https://api.openai.com/v1
api-key-env: OPENAI_API_KEY
models:
gpt-4:
aliases: ["4"]
max-input-chars: 24500
fallback: gpt-3.5-turbo
gpt-4-32k:
aliases: ["32k"]
max-input-chars: 98000
fallback: gpt-4
gpt-3.5-turbo:
aliases: ["35t"]
max-input-chars: 12250
fallback: gpt-3.5
gpt-3.5-turbo-16k:
aliases: ["35t16k"]
max-input-chars: 44500
fallback: gpt-3.5
gpt-3.5:
aliases: ["35"]
max-input-chars: 12250
fallback:
localai:
# LocalAI setup instructions: https://github.com/go-skynet/LocalAI#example-use-gpt4all-j-model
base-url: http://localhost:8000
api-key: "IGNORED"
models:
ollama:
aliases: ["local"]
max-input-chars: 12250
fallback:
azure:
# Set to 'azure-ad' to use Active Directory
# Azure OpenAI setup: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource
base-url: https://YOUR_RESOURCE_NAME.openai.azure.com
api-key-env: AZURE_OPENAI_KEY
models:
gpt-4:
aliases: ["az4"]
max-input-chars: 24500
fallback: gpt-35-turbo
gpt-35-turbo:
aliases: ["az35t"]
max-input-chars: 12250
fallback: gpt-35
gpt-35:
aliases: ["az35"]
max-input-chars: 12250
fallback:
86 changes: 86 additions & 0 deletions start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env bash

# Initialize variables:
LAST_SPINNER_PID=""
OLLAMA_BINARY="$(pwd)/bin/ollama"
OLLAMA_PID="$(pwd)/ollama.pid"

# Kill background processes on exit
trap exit_trap EXIT
function exit_trap {
# Kill the last spinner process
kill_spinner
}

# Draw a spinner so the user knows something is happening
function spinner {
local delay=0.1
local spinstr='/-\|'
printf "..."
while [ true ]; do
local temp=${spinstr#?}
printf "[%c]" "$spinstr"
local spinstr=$temp${spinstr%"$temp"}
sleep $delay
printf "\b\b\b"
done
}

function kill_spinner {
if [ ! -z "$LAST_SPINNER_PID" ]; then
kill >/dev/null 2>&1 $LAST_SPINNER_PID
wait $LAST_SPINNER_PID 2>/dev/null
printf "\b\b\bdone\n"
LAST_SPINNER_PID=""
fi
}

# Echo text to the log file, summary log file and stdout
# echo_summary "something to say"
function echo_summary {
kill_spinner
echo -n -e $@
spinner &
LAST_SPINNER_PID=$!
}

# Create dir from OLLAMA_BINARY
mkdir -p $(dirname "$OLLAMA_BINARY")

# Download ollama
echo_summary "Downloading ollama to $(dirname "$OLLAMA_BINARY")"
if [ ! -f "$OLLAMA_BINARY" ]; then
curl -sL https://api.github.com/repos/jmorganca/ollama/releases/latest |
grep "browser_download_url.*ollama-darwin" |
cut -d : -f 2,3 |
tr -d \" |
wget -O "$OLLAMA_BINARY" -qi -
fi
kill_spinner

chmod +x "$OLLAMA_BINARY"
"$OLLAMA_BINARY" serve >/dev/null 2>&1 &
echo $! >"$OLLAMA_PID"
echo "ollama started"

docker rm -f litellm-proxy >/dev/null 2>&1
docker run --mount type=bind,source="$(pwd)"/litellm-config.yaml,target=/config.yaml,readonly \
-p 8000:8000 --add-host=host.docker.internal:host-gateway \
-d --name litellm-proxy ghcr.io/yeahdongcn/litellm-proxy:main --drop_params --config /config.yaml
echo "litellm started"

read -rp "Do you want to use predefined mods config? y/n [n]: " USE_MODS
if [[ $USE_MODS =~ ^[Yy]$ ]]; then
mv ~/Library/Application\ Support/mods/mods.yml ~/Library/Application\ Support/mods/mods.yml.backup
cp mods.yml ~/Library/Application\ Support/mods/mods.yml
fi

read -rp "Do you want to start web UI? y/n [n]: " USE_WEBUI
if [[ $USE_WEBUI =~ ^[Yy]$ ]]; then
mkdir -p ollama-webui
docker rm -f ollama-webui >/dev/null 2>&1
docker run --pull always -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v ollama-webui:/app/backend/data \
--name ollama-webui --restart always ghcr.io/ollama-webui/ollama-webui:main
sleep 5
open http://localhost:3000
fi
11 changes: 11 additions & 0 deletions stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

# Initialize variables:
OLLAMA_PID="$(pwd)/ollama.pid"

kill -9 $(cat $OLLAMA_PID)
echo "Stopped ollama"
docker rm -f litellm-proxy >/dev/null 2>&1
echo "Stopped litellm-proxy"
docker rm -f ollama-webui >/dev/null 2>&1
echo "Stopped ollama-webui"

0 comments on commit f91d6ec

Please sign in to comment.