From f91d6ec8550495332b42b4edeb4dc026a16435f6 Mon Sep 17 00:00:00 2001
From: Xiaodong Ye <yeahdongcn@gmail.com>
Date: Tue, 30 Jan 2024 08:57:57 +0800
Subject: [PATCH] Add everything

Signed-off-by: Xiaodong Ye <yeahdongcn@gmail.com>
---
 .github/workflows/docker-publish.yml | 105 +++++++++++++++++++++++++++
 .gitignore                           |   2 +
 README.md                            |  53 +++++++++++++-
 docker/Dockerfile                    |  13 ++++
 docker/Makefile                      |   9 +++
 litellm-config.yaml                  |   8 ++
 mods.yml                             |  84 +++++++++++++++++++++
 start.sh                             |  86 ++++++++++++++++++++++
 stop.sh                              |  11 +++
 9 files changed, 370 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/docker-publish.yml
 create mode 100644 .gitignore
 create mode 100644 docker/Dockerfile
 create mode 100644 docker/Makefile
 create mode 100644 litellm-config.yaml
 create mode 100644 mods.yml
 create mode 100755 start.sh
 create mode 100755 stop.sh
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
new file mode 100644
index 0000000..67dc76f
--- /dev/null
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,105 @@
+name: Docker
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+on:
+  schedule:
+    - cron: '21 19 * * *'
+  push:
+    branches: [ "main" ]
+    # Publish semver tags as releases.
+    tags: [ 'v*.*.*' ]
+  pull_request:
+    branches: [ "main" ]
+
+env:
+  # Use docker.io for Docker Hub if empty
+  REGISTRY: ghcr.io
+  # github.repository as <account>/<repo>
+  # XXX: Updated
+  IMAGE_NAME: yeahdongcn/litellm-proxy
+
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      # This is used to complete the identity challenge
+      # with sigstore/fulcio when running outside of PRs.
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      # Install the cosign tool except on PR
+      # https://github.com/sigstore/cosign-installer
+      - name: Install cosign
+        if: github.event_name != 'pull_request'
+        uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1
+        with:
+          cosign-release: 'v2.1.1'
+
+      # XXX: Updated
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      # Set up BuildKit Docker container builder to be able to build
+      # multi-platform images and export cache
+      # https://github.com/docker/setup-buildx-action
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
+
+      # Login against a Docker registry except on PR
+      # https://github.com/docker/login-action
+      - name: Log into registry ${{ env.REGISTRY }}
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Extract metadata (tags, labels) for Docker
+      # https://github.com/docker/metadata-action
+      - name: Extract Docker metadata
+        id: meta
+        uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      # Build and push Docker image with Buildx (don't push on PR)
+      # https://github.com/docker/build-push-action
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0
+        with:
+          # XXX: Updated
+          context: ./docker/
+          platforms: linux/amd64,linux/arm64
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      # Sign the resulting Docker image digest except on PRs.
+      # This will only write to the public Rekor transparency log when the Docker
+      # repository is public to avoid leaking data.  If you would like to publish
+      # transparency data even for private images, pass --force to cosign below.
+      # https://github.com/sigstore/cosign
+      - name: Sign the published Docker image
+        if: ${{ github.event_name != 'pull_request' }}
+        env:
+          # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable
+          TAGS: ${{ steps.meta.outputs.tags }}
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        # This step uses the identity token to provision an ephemeral certificate
+        # against the sigstore community Fulcio instance.
+        run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2602da2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+ollama.pid
+bin/
diff --git a/README.md b/README.md
index c11e958..187c92c 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,52 @@
-# MacAI
\ No newline at end of file
+# OllamaStack
+
+Setup [Ollama](https://github.com/jmorganca/ollama) stack on macOS.
+
+- [OllamaStack](#ollamastack)
+  - [Prerequisites](#prerequisites)
+  - [Diagram](#diagram)
+  - [Quickstart](#quickstart)
+    - [Demo](#demo)
+
+## Prerequisites
+* A [Metal capable](https://support.apple.com/en-us/102894) Mac device.
+* [Mods](https://github.com/charmbracelet/mods): AI for the command line, built for pipelines.
+* [Docker](https://www.docker.com/products/docker-desktop): The fastest way to containerize applications.
+
+## Diagram
+
+```mermaid
+graph LR;
+    subgraph Host
+      subgraph CLI
+        B(Mods)
+      end
+      subgraph Server
+        C(Ollama)
+        D[Metal]
+      end
+    end
+    subgraph Container
+      E(LiteLLM Proxy)
+      F(Ollama Web UI)
+    end
+    A(User) --> |Terminal|B;
+    A --> |Browser|F;
+    B --> |OpenAI API|E;
+    E --> |REST API|C;
+    F --> |REST API|C;
+    C-. Link .-> D;
+```
+
+## Quickstart
+
+```bash
+$ git clone https://github.com/yeahdongcn/OllamaStack.git
+$ cd OllamaStack
+$ ./start.sh
+$ ./stop.sh
+```
+
+### Demo
+
+![633462](https://github.com/yeahdongcn/OllamaStack/assets/2831050/1290b08a-6636-493e-8ad4-edcb18971198)
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..33f3788
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,13 @@
+# Runtime image
+ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine
+
+# Runtime stage
+FROM $LITELLM_RUNTIME_IMAGE as runtime
+
+RUN pip install 'litellm[proxy]'
+
+EXPOSE 8000/tcp
+
+# Set your entrypoint and command
+ENTRYPOINT ["litellm"]
+CMD ["--port", "8000"]
\ No newline at end of file
diff --git a/docker/Makefile b/docker/Makefile
new file mode 100644
index 0000000..e87111d
--- /dev/null
+++ b/docker/Makefile
@@ -0,0 +1,9 @@
+IMG := r0ckstar/litellm-proxy
+
+.PHONY: docker-build
+docker-build:
+	docker build -t ${IMG} .
+
+.PHONY: docker-push
+docker-push: docker-build
+	docker push ${IMG}
diff --git a/litellm-config.yaml b/litellm-config.yaml
new file mode 100644
index 0000000..59be6cb
--- /dev/null
+++ b/litellm-config.yaml
@@ -0,0 +1,8 @@
+router_settings:
+  routing_strategy: "least-busy"
+
+model_list:
+  - model_name: ollama
+    litellm_params:
+      model: "ollama/llama2"
+      api_base: http://host.docker.internal:11434
\ No newline at end of file
diff --git a/mods.yml b/mods.yml
new file mode 100644
index 0000000..09bc87a
--- /dev/null
+++ b/mods.yml
@@ -0,0 +1,84 @@
+
+# Default model (gpt-3.5-turbo, gpt-4, ggml-gpt4all-j...).
+default-model: ollama
+# Text to append when using the -f flag.
+format-text: Format the response as markdown without enclosing backticks.
+# Ask for the response to be formatted as markdown unless otherwise set.
+format: false
+# Render output as raw text when connected to a TTY.
+raw: false
+# Quiet mode (hide the spinner while loading).
+quiet: false
+# Temperature (randomness) of results, from 0.0 to 2.0.
+temp: 1.0
+# TopP, an alternative to temperature that narrows response, from 0.0 to 1.0.
+topp: 1.0
+# Turn off the client-side limit on the size of the input into the model.
+no-limit: false
+# Include the prompt from the arguments in the response.
+include-prompt-args: false
+# Include the prompt from the arguments and stdin, truncate stdin to specified number of lines.
+include-prompt: 0
+# Maximum number of times to retry API calls.
+max-retries: 5
+# Your desired level of fanciness.
+fanciness: 10
+# Text to show while generating.
+status-text: Generating
+# Default character limit on input to model.
+max-input-chars: 12250
+# Maximum number of tokens in response.
+# max-tokens: 100
+# Aliases and endpoints for OpenAI compatible REST API.
+apis:
+  openai:
+    base-url: https://api.openai.com/v1
+    api-key-env: OPENAI_API_KEY
+    models:
+      gpt-4:
+        aliases: ["4"]
+        max-input-chars: 24500
+        fallback: gpt-3.5-turbo
+      gpt-4-32k:
+        aliases: ["32k"]
+        max-input-chars: 98000
+        fallback: gpt-4
+      gpt-3.5-turbo:
+        aliases: ["35t"]
+        max-input-chars: 12250
+        fallback: gpt-3.5
+      gpt-3.5-turbo-16k:
+        aliases: ["35t16k"]
+        max-input-chars: 44500
+        fallback: gpt-3.5
+      gpt-3.5:
+        aliases: ["35"]
+        max-input-chars: 12250
+        fallback:
+  localai:
+    # LocalAI setup instructions: https://github.com/go-skynet/LocalAI#example-use-gpt4all-j-model
+    base-url: http://localhost:8000
+    api-key: "IGNORED"
+    models:
+      ollama:
+        aliases: ["local"]
+        max-input-chars: 12250
+        fallback:
+  azure:
+    # Set to 'azure-ad' to use Active Directory
+    # Azure OpenAI setup: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource
+    base-url: https://YOUR_RESOURCE_NAME.openai.azure.com
+    api-key-env: AZURE_OPENAI_KEY
+    models:
+      gpt-4:
+        aliases: ["az4"]
+        max-input-chars: 24500
+        fallback: gpt-35-turbo
+      gpt-35-turbo:
+        aliases: ["az35t"]
+        max-input-chars: 12250
+        fallback: gpt-35
+      gpt-35:
+        aliases: ["az35"]
+        max-input-chars: 12250
+        fallback:
diff --git a/start.sh b/start.sh
new file mode 100755
index 0000000..f5b3071
--- /dev/null
+++ b/start.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+# Initialize variables:
+LAST_SPINNER_PID=""
+OLLAMA_BINARY="$(pwd)/bin/ollama"
+OLLAMA_PID="$(pwd)/ollama.pid"
+
+# Kill background processes on exit
+trap exit_trap EXIT
+function exit_trap {
+    # Kill the last spinner process
+    kill_spinner
+}
+
+# Draw a spinner so the user knows something is happening
+function spinner {
+    local delay=0.1
+    local spinstr='/-\|'
+    printf "..."
+    while [ true ]; do
+        local temp=${spinstr#?}
+        printf "[%c]" "$spinstr"
+        local spinstr=$temp${spinstr%"$temp"}
+        sleep $delay
+        printf "\b\b\b"
+    done
+}
+
+function kill_spinner {
+    if [ ! -z "$LAST_SPINNER_PID" ]; then
+        kill >/dev/null 2>&1 $LAST_SPINNER_PID
+        wait $LAST_SPINNER_PID 2>/dev/null
+        printf "\b\b\bdone\n"
+        LAST_SPINNER_PID=""
+    fi
+}
+
+# Echo text to the log file, summary log file and stdout
+# echo_summary "something to say"
+function echo_summary {
+    kill_spinner
+    echo -n -e $@
+    spinner &
+    LAST_SPINNER_PID=$!
+}
+
+# Create dir from OLLAMA_BINARY
+mkdir -p $(dirname "$OLLAMA_BINARY")
+
+# Download ollama
+echo_summary "Downloading ollama to $(dirname "$OLLAMA_BINARY")"
+if [ ! -f "$OLLAMA_BINARY" ]; then
+    curl -sL https://api.github.com/repos/jmorganca/ollama/releases/latest |
+        grep "browser_download_url.*ollama-darwin" |
+        cut -d : -f 2,3 |
+        tr -d \" |
+        wget -O "$OLLAMA_BINARY" -qi -
+fi
+kill_spinner
+
+chmod +x "$OLLAMA_BINARY"
+"$OLLAMA_BINARY" serve >/dev/null 2>&1 &
+echo $! >"$OLLAMA_PID"
+echo "ollama started"
+
+docker rm -f litellm-proxy >/dev/null 2>&1
+docker run --mount type=bind,source="$(pwd)"/litellm-config.yaml,target=/config.yaml,readonly \
+    -p 8000:8000 --add-host=host.docker.internal:host-gateway \
+    -d --name litellm-proxy ghcr.io/yeahdongcn/litellm-proxy:main --drop_params --config /config.yaml
+echo "litellm started"
+
+read -rp "Do you want to use predefined mods config? y/n [n]: " USE_MODS
+if [[ $USE_MODS =~ ^[Yy]$ ]]; then
+    mv ~/Library/Application\ Support/mods/mods.yml ~/Library/Application\ Support/mods/mods.yml.backup
+    cp mods.yml ~/Library/Application\ Support/mods/mods.yml
+fi
+
+read -rp "Do you want to start web UI? y/n [n]: " USE_WEBUI
+if [[ $USE_WEBUI =~ ^[Yy]$ ]]; then
+    mkdir -p ollama-webui
+    docker rm -f ollama-webui >/dev/null 2>&1
+    docker run --pull always -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v ollama-webui:/app/backend/data \
+        --name ollama-webui --restart always ghcr.io/ollama-webui/ollama-webui:main
+    sleep 5
+    open http://localhost:3000
+fi
diff --git a/stop.sh b/stop.sh
new file mode 100755
index 0000000..03067c1
--- /dev/null
+++ b/stop.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Initialize variables:
+OLLAMA_PID="$(pwd)/ollama.pid"
+
+kill -9 $(cat $OLLAMA_PID)
+echo "Stopped ollama"
+docker rm -f litellm-proxy >/dev/null 2>&1
+echo "Stopped litellm-proxy"
+docker rm -f ollama-webui >/dev/null 2>&1
+echo "Stopped ollama-webui"