From f91d6ec8550495332b42b4edeb4dc026a16435f6 Mon Sep 17 00:00:00 2001 From: Xiaodong Ye Date: Tue, 30 Jan 2024 08:57:57 +0800 Subject: [PATCH] Add everything Signed-off-by: Xiaodong Ye --- .github/workflows/docker-publish.yml | 105 +++++++++++++++++++++++++++ .gitignore | 2 + README.md | 53 +++++++++++++- docker/Dockerfile | 13 ++++ docker/Makefile | 9 +++ litellm-config.yaml | 8 ++ mods.yml | 84 +++++++++++++++++++++ start.sh | 86 ++++++++++++++++++++++ stop.sh | 11 +++ 9 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/docker-publish.yml create mode 100644 .gitignore create mode 100644 docker/Dockerfile create mode 100644 docker/Makefile create mode 100644 litellm-config.yaml create mode 100644 mods.yml create mode 100755 start.sh create mode 100755 stop.sh diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..67dc76f --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,105 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + schedule: + - cron: '21 19 * * *' + push: + branches: [ "main" ] + # Publish semver tags as releases. + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "main" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + # XXX: Updated + IMAGE_NAME: yeahdongcn/litellm-proxy + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1 + with: + cosign-release: 'v2.1.1' + + # XXX: Updated + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + # Set up BuildKit Docker container builder to be able to build + # multi-platform images and export cache + # https://github.com/docker/setup-buildx-action + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 + with: + # XXX: Updated + context: ./docker/ + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # Sign the resulting Docker image digest except on PRs. + # This will only write to the public Rekor transparency log when the Docker + # repository is public to avoid leaking data. If you would like to publish + # transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + - name: Sign the published Docker image + if: ${{ github.event_name != 'pull_request' }} + env: + # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable + TAGS: ${{ steps.meta.outputs.tags }} + DIGEST: ${{ steps.build-and-push.outputs.digest }} + # This step uses the identity token to provision an ephemeral certificate + # against the sigstore community Fulcio instance. + run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2602da2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +ollama.pid +bin/ diff --git a/README.md b/README.md index c11e958..187c92c 100644 --- a/README.md +++ b/README.md @@ -1 +1,52 @@ -# MacAI \ No newline at end of file +# OllamaStack + +Setup [Ollama](https://github.com/jmorganca/ollama) stack on macOS. + +- [OllamaStack](#ollamastack) + - [Prerequisites](#prerequisites) + - [Diagram](#diagram) + - [Quickstart](#quickstart) + - [Demo](#demo) + +## Prerequisites +* A [Metal capable](https://support.apple.com/en-us/102894) Mac device. +* [Mods](https://github.com/charmbracelet/mods): AI for the command line, built for pipelines. +* [Docker](https://www.docker.com/products/docker-desktop): The fastest way to containerize applications. + +## Diagram + +```mermaid +graph LR; + subgraph Host + subgraph CLI + B(Mods) + end + subgraph Server + C(Ollama) + D[Metal] + end + end + subgraph Container + E(LiteLLM Proxy) + F(Ollama Web UI) + end + A(User) --> |Terminal|B; + A --> |Browser|F; + B --> |OpenAI API|E; + E --> |REST API|C; + F --> |REST API|C; + C-. Link .-> D; +``` + +## Quickstart + +```bash +$ git clone https://github.com/yeahdongcn/OllamaStack.git +$ cd OllamaStack +$ ./start.sh +$ ./stop.sh +``` + +### Demo + +![633462](https://github.com/yeahdongcn/OllamaStack/assets/2831050/1290b08a-6636-493e-8ad4-edcb18971198) diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..33f3788 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,13 @@ +# Runtime image +ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine + +# Runtime stage +FROM $LITELLM_RUNTIME_IMAGE as runtime + +RUN pip install 'litellm[proxy]' + +EXPOSE 8000/tcp + +# Set your entrypoint and command +ENTRYPOINT ["litellm"] +CMD ["--port", "8000"] \ No newline at end of file diff --git a/docker/Makefile b/docker/Makefile new file mode 100644 index 0000000..e87111d --- /dev/null +++ b/docker/Makefile @@ -0,0 +1,9 @@ +IMG := r0ckstar/litellm-proxy + +.PHONY: docker-build +docker-build: + docker build -t ${IMG} . + +.PHONY: docker-push +docker-push: docker-build + docker push ${IMG} diff --git a/litellm-config.yaml b/litellm-config.yaml new file mode 100644 index 0000000..59be6cb --- /dev/null +++ b/litellm-config.yaml @@ -0,0 +1,8 @@ +router_settings: + routing_strategy: "least-busy" + +model_list: + - model_name: ollama + litellm_params: + model: "ollama/llama2" + api_base: http://host.docker.internal:11434 \ No newline at end of file diff --git a/mods.yml b/mods.yml new file mode 100644 index 0000000..09bc87a --- /dev/null +++ b/mods.yml @@ -0,0 +1,84 @@ + +# Default model (gpt-3.5-turbo, gpt-4, ggml-gpt4all-j...). +default-model: ollama +# Text to append when using the -f flag. +format-text: Format the response as markdown without enclosing backticks. +# Ask for the response to be formatted as markdown unless otherwise set. +format: false +# Render output as raw text when connected to a TTY. +raw: false +# Quiet mode (hide the spinner while loading). +quiet: false +# Temperature (randomness) of results, from 0.0 to 2.0. +temp: 1.0 +# TopP, an alternative to temperature that narrows response, from 0.0 to 1.0. +topp: 1.0 +# Turn off the client-side limit on the size of the input into the model. +no-limit: false +# Include the prompt from the arguments in the response. +include-prompt-args: false +# Include the prompt from the arguments and stdin, truncate stdin to specified number of lines. +include-prompt: 0 +# Maximum number of times to retry API calls. +max-retries: 5 +# Your desired level of fanciness. +fanciness: 10 +# Text to show while generating. +status-text: Generating +# Default character limit on input to model. +max-input-chars: 12250 +# Maximum number of tokens in response. +# max-tokens: 100 +# Aliases and endpoints for OpenAI compatible REST API. +apis: + openai: + base-url: https://api.openai.com/v1 + api-key-env: OPENAI_API_KEY + models: + gpt-4: + aliases: ["4"] + max-input-chars: 24500 + fallback: gpt-3.5-turbo + gpt-4-32k: + aliases: ["32k"] + max-input-chars: 98000 + fallback: gpt-4 + gpt-3.5-turbo: + aliases: ["35t"] + max-input-chars: 12250 + fallback: gpt-3.5 + gpt-3.5-turbo-16k: + aliases: ["35t16k"] + max-input-chars: 44500 + fallback: gpt-3.5 + gpt-3.5: + aliases: ["35"] + max-input-chars: 12250 + fallback: + localai: + # LocalAI setup instructions: https://github.com/go-skynet/LocalAI#example-use-gpt4all-j-model + base-url: http://localhost:8000 + api-key: "IGNORED" + models: + ollama: + aliases: ["local"] + max-input-chars: 12250 + fallback: + azure: + # Set to 'azure-ad' to use Active Directory + # Azure OpenAI setup: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource + base-url: https://YOUR_RESOURCE_NAME.openai.azure.com + api-key-env: AZURE_OPENAI_KEY + models: + gpt-4: + aliases: ["az4"] + max-input-chars: 24500 + fallback: gpt-35-turbo + gpt-35-turbo: + aliases: ["az35t"] + max-input-chars: 12250 + fallback: gpt-35 + gpt-35: + aliases: ["az35"] + max-input-chars: 12250 + fallback: diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..f5b3071 --- /dev/null +++ b/start.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +# Initialize variables: +LAST_SPINNER_PID="" +OLLAMA_BINARY="$(pwd)/bin/ollama" +OLLAMA_PID="$(pwd)/ollama.pid" + +# Kill background processes on exit +trap exit_trap EXIT +function exit_trap { + # Kill the last spinner process + kill_spinner +} + +# Draw a spinner so the user knows something is happening +function spinner { + local delay=0.1 + local spinstr='/-\|' + printf "..." + while [ true ]; do + local temp=${spinstr#?} + printf "[%c]" "$spinstr" + local spinstr=$temp${spinstr%"$temp"} + sleep $delay + printf "\b\b\b" + done +} + +function kill_spinner { + if [ ! -z "$LAST_SPINNER_PID" ]; then + kill >/dev/null 2>&1 $LAST_SPINNER_PID + wait $LAST_SPINNER_PID 2>/dev/null + printf "\b\b\bdone\n" + LAST_SPINNER_PID="" + fi +} + +# Echo text to the log file, summary log file and stdout +# echo_summary "something to say" +function echo_summary { + kill_spinner + echo -n -e $@ + spinner & + LAST_SPINNER_PID=$! +} + +# Create dir from OLLAMA_BINARY +mkdir -p $(dirname "$OLLAMA_BINARY") + +# Download ollama +echo_summary "Downloading ollama to $(dirname "$OLLAMA_BINARY")" +if [ ! -f "$OLLAMA_BINARY" ]; then + curl -sL https://api.github.com/repos/jmorganca/ollama/releases/latest | + grep "browser_download_url.*ollama-darwin" | + cut -d : -f 2,3 | + tr -d \" | + wget -O "$OLLAMA_BINARY" -qi - +fi +kill_spinner + +chmod +x "$OLLAMA_BINARY" +"$OLLAMA_BINARY" serve >/dev/null 2>&1 & +echo $! >"$OLLAMA_PID" +echo "ollama started" + +docker rm -f litellm-proxy >/dev/null 2>&1 +docker run --mount type=bind,source="$(pwd)"/litellm-config.yaml,target=/config.yaml,readonly \ + -p 8000:8000 --add-host=host.docker.internal:host-gateway \ + -d --name litellm-proxy ghcr.io/yeahdongcn/litellm-proxy:main --drop_params --config /config.yaml +echo "litellm started" + +read -rp "Do you want to use predefined mods config? y/n [n]: " USE_MODS +if [[ $USE_MODS =~ ^[Yy]$ ]]; then + mv ~/Library/Application\ Support/mods/mods.yml ~/Library/Application\ Support/mods/mods.yml.backup + cp mods.yml ~/Library/Application\ Support/mods/mods.yml +fi + +read -rp "Do you want to start web UI? y/n [n]: " USE_WEBUI +if [[ $USE_WEBUI =~ ^[Yy]$ ]]; then + mkdir -p ollama-webui + docker rm -f ollama-webui >/dev/null 2>&1 + docker run --pull always -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v ollama-webui:/app/backend/data \ + --name ollama-webui --restart always ghcr.io/ollama-webui/ollama-webui:main + sleep 5 + open http://localhost:3000 +fi diff --git a/stop.sh b/stop.sh new file mode 100755 index 0000000..03067c1 --- /dev/null +++ b/stop.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# Initialize variables: +OLLAMA_PID="$(pwd)/ollama.pid" + +kill -9 $(cat $OLLAMA_PID) +echo "Stopped ollama" +docker rm -f litellm-proxy >/dev/null 2>&1 +echo "Stopped litellm-proxy" +docker rm -f ollama-webui >/dev/null 2>&1 +echo "Stopped ollama-webui"