Skip to content

Commit

Permalink
Merge branch 'zylon-ai:main' into streaming-choice-feature
Browse files Browse the repository at this point in the history
  • Loading branch information
basicbloke authored Sep 24, 2024
2 parents 77c9160 + fa3c306 commit 41bfe23
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 20 deletions.
6 changes: 3 additions & 3 deletions Dockerfile.llamacpp-cpu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
### You will run into a segfault in mac
FROM python:3.11.6-slim-bookworm as base
FROM python:3.11.6-slim-bookworm AS base

# Install poetry
RUN pip install pipx
Expand All @@ -20,14 +20,14 @@ RUN apt update && apt install -y \
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true

FROM base as dependencies
FROM base AS dependencies
WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./

ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
RUN poetry install --no-root --extras "${POETRY_EXTRAS}"

FROM base as app
FROM base AS app

ENV PYTHONUNBUFFERED=1
ENV PORT=8080
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile.ollama
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.11.6-slim-bookworm as base
FROM python:3.11.6-slim-bookworm AS base

# Install poetry
RUN pip install pipx
Expand All @@ -10,14 +10,14 @@ ENV PATH=".venv/bin/:$PATH"
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true

FROM base as dependencies
FROM base AS dependencies
WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./

ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama"
RUN poetry install --no-root --extras "${POETRY_EXTRAS}"

FROM base as app
FROM base AS app
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
ENV APP_ENV=prod
Expand Down
12 changes: 8 additions & 4 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ services:
# This service builds from an external Dockerfile and runs the Ollama mode.
private-gpt-ollama:
image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version
user: root
build:
context: .
dockerfile: Dockerfile.ollama
volumes:
- ./local_data/:/home/worker/app/local_data
- ./local_data:/home/worker/app/local_data
ports:
- "8001:8001"
environment:
Expand All @@ -27,11 +28,14 @@ services:
- ollama-cpu
- ollama-cuda
- ollama-api
depends_on:
- ollama

# Private-GPT service for the local mode
# This service builds from a local Dockerfile and runs the application in local mode.
private-gpt-llamacpp-cpu:
image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version
user: root
build:
context: .
dockerfile: Dockerfile.llamacpp-cpu
Expand All @@ -44,7 +48,7 @@ services:
environment:
PORT: 8001
PGPT_PROFILES: local
HF_TOKEN: ${HF_TOKEN}
HF_TOKEN: ${HF_TOKEN:-}
profiles:
- llamacpp-cpu

Expand All @@ -57,7 +61,7 @@ services:
ollama:
image: traefik:v2.10
ports:
- "8081:8080"
- "11434:11434"
command:
- "--providers.file.filename=/etc/router.yml"
- "--log.level=ERROR"
Expand Down Expand Up @@ -98,4 +102,4 @@ services:
count: 1
capabilities: [gpu]
profiles:
- ollama-cuda
- ollama-cuda
79 changes: 75 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions private_gpt/components/embedding/embedding_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,23 @@ def __init__(self, settings: Settings) -> None:
api_key=settings.gemini.api_key,
model_name=settings.gemini.embedding_model,
)
case "mistralai":
try:
from llama_index.embeddings.mistralai import ( # type: ignore
MistralAIEmbedding,
)
except ImportError as e:
raise ImportError(
"Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`"
) from e

api_key = settings.openai.api_key
model = settings.openai.embedding_model

self.embedding_model = MistralAIEmbedding(
api_key=api_key,
model=model,
)
case "mock":
# Not a random number, is the dimensionality used by
# the default embedding model
Expand Down
13 changes: 12 additions & 1 deletion private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,14 @@ class HuggingFaceSettings(BaseModel):

class EmbeddingSettings(BaseModel):
mode: Literal[
"huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini"
"huggingface",
"openai",
"azopenai",
"sagemaker",
"ollama",
"mock",
"gemini",
"mistralai",
]
ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
"simple",
Expand Down Expand Up @@ -350,6 +357,10 @@ class AzureOpenAISettings(BaseModel):
class UISettings(BaseModel):
enabled: bool
path: str
default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field(
"RAG",
description="The default mode.",
)
default_chat_system_prompt: str = Field(
None,
description="The default system prompt to use for the chat mode.",
Expand Down
9 changes: 6 additions & 3 deletions private_gpt/ui/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,11 @@ def __init__(
self._selected_filename = None

# Initialize system prompt based on default mode
self.mode = MODES[0]
self._system_prompt = self._get_default_system_prompt(self.mode)
default_mode_map = {mode.value: mode for mode in Modes}
self._default_mode = default_mode_map.get(
settings().ui.default_mode, Modes.RAG_MODE
)
self._system_prompt = self._get_default_system_prompt(self._default_mode)

# Initialize default response style: Streaming
self.response_style = STYLES[0]
Expand Down Expand Up @@ -425,7 +428,7 @@ def _build_ui_blocks(self) -> gr.Blocks:

with gr.Row(equal_height=False):
with gr.Column(scale=3):
default_mode = MODES[0]
default_mode = self._default_mode
mode = gr.Radio(
[mode.value for mode in MODES],
label="Mode",
Expand Down
19 changes: 17 additions & 2 deletions private_gpt/utils/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,39 @@
from collections.abc import Iterator, Mapping
from typing import Any

from httpx import ConnectError
from tqdm import tqdm # type: ignore

from private_gpt.utils.retry import retry

try:
from ollama import Client # type: ignore
from ollama import Client, ResponseError # type: ignore
except ImportError as e:
raise ImportError(
"Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
) from e

logger = logging.getLogger(__name__)

_MAX_RETRIES = 5
_JITTER = (3.0, 10.0)


@retry(
is_async=False,
exceptions=(ConnectError, ResponseError),
tries=_MAX_RETRIES,
jitter=_JITTER,
logger=logger,
)
def check_connection(client: Client) -> bool:
try:
client.list()
return True
except (ConnectError, ResponseError) as e:
raise e
except Exception as e:
logger.error(f"Failed to connect to Ollama: {e!s}")
logger.error(f"Failed to connect to Ollama: {type(e).__name__}: {e!s}")
return False


Expand Down
31 changes: 31 additions & 0 deletions private_gpt/utils/retry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import logging
from collections.abc import Callable
from typing import Any

from retry_async import retry as retry_untyped # type: ignore

retry_logger = logging.getLogger(__name__)


def retry(
exceptions: Any = Exception,
*,
is_async: bool = False,
tries: int = -1,
delay: float = 0,
max_delay: float | None = None,
backoff: float = 1,
jitter: float | tuple[float, float] = 0,
logger: logging.Logger = retry_logger,
) -> Callable[..., Any]:
wrapped = retry_untyped(
exceptions=exceptions,
is_async=is_async,
tries=tries,
delay=delay,
max_delay=max_delay,
backoff=backoff,
jitter=jitter,
logger=logger,
)
return wrapped # type: ignore
Loading

0 comments on commit 41bfe23

Please sign in to comment.