From 7c7399243f97ab1b97a1385242d3d63bac0755fe Mon Sep 17 00:00:00 2001 From: Alexis VIALARET Date: Wed, 21 Feb 2024 16:51:37 +0100 Subject: [PATCH 1/2] doc: readme --- README.md | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 8e7544d..2c154a1 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,35 @@ # skaff-rag-accelerator - -This is a starter kit to deploy a modularizable RAG locally or on the cloud (or across multiple clouds) +This is a starter kit to prototype locally, deploy on any cloud, and industrialize a Retrival-Augmented Generation (RAG) service. ## Features - A configurable RAG setup based around Langchain ([Check out the configuration cookbook here](https://artefactory.github.io/skaff-rag-accelerator/cookbook/)) - `RAG` and `RagConfig` python classes to help you set things up -- A REST API based on FastAPI to provide easy access to the RAG as a web backend -- A demo Streamlit to serve as a basic working frontend (not production grade) +- A REST API based on Langserve + FastAPI to provide easy access to the RAG as a web backend +- A demo Streamlit to serve as a basic working frontend - A document loader for the RAG -- Secure user authentication -- User feedback collection -- Streamed responses +- Optional plugins for secure user authentication and session management ## Quickstart +This quickstart will guide you through the steps to serve a RAG fully locally. You will run the API backend and frontend on your machine, which should allow you to run your first queries against the RAG. + +For this exemple, we will be using the `tinyllama` LLM, the `BAAI/bge-base-en-v1.5` embedding model, and Chroma for the vector store. This allows this setup to be fully local, and independent of any external API (and thus, free). However, the relevance of answers will not be impressive. + +Duration: ~15 minutes. + +### Pre-requisites + +- Ollama, to serve the LLM locally ([Download and install](https://ollama.com/)) +- A few GB of disk space to host the models +- Tested with python 3.11 (may work with other versions) + +Start the LLM server: +```python +ollama run tinyllama +``` + In a fresh env: ```shell pip install -r requirements.txt @@ -25,37 +39,34 @@ You will need to set some env vars, either in a .env file at the project root, o ```shell export PYTHONPATH=. export ADMIN_MODE=1 -export OPENAI_API_KEY="xxx" # API key used to query the LLM -export EMBEDDING_API_KEY="xxx" # API key used to query the embedding model -export DATABASE_URL="sqlite:///$(pwd)/database/db.sqlite3" # For local developement only. You will need a real, cloud-based SQL database URL for prod. ``` -Start the backend server locally +Start the backend server locally: ```shell python -m uvicorn backend.main:app ``` Start the frontend demo ```shell -python -m streamlit run frontend/app.py +python -m streamlit run frontend/front.py ``` You should then be able to login and chat to the bot: -![](docs/login_and_chat.gif) +![](login_and_chat.gif) Right now the RAG does not have any document loaded, let's add a sample: ```shell python data_sample/add_data_sample_to_rag.py ``` -The RAG now has access to thn information from your loaded documents: +The RAG now has access to the information from your loaded documents: -![](docs/query_with_knowledge.gif) +![](query_with_knowledge.gif) ## Documentation -To learn how to configure the RAG, take a look at the documentation +To deep dive into under the hood, take a look at the documentation [On github pages](https://artefactory.github.io/skaff-rag-accelerator/) From a9e591a49c08aa5dfbd88c4601fbc804a2751cf0 Mon Sep 17 00:00:00 2001 From: Alexis VIALARET Date: Wed, 21 Feb 2024 16:51:46 +0100 Subject: [PATCH 2/2] chore: linting --- backend/__init__.py | 2 +- .../insecure_authentication.py | 3 ++- .../secure_authentcation/secure_authentication.py | 9 +++++---- backend/api_plugins/sessions/sessions.py | 4 ++-- docs/backend/backend.md | 4 ++-- docs/backend/plugins/conversational_rag_plugin.md | 2 +- docs/backend/plugins/user_based_sessions.md | 2 +- docs/backend/rag_ragconfig.md | 2 +- docs/database.md | 4 ++-- docs/frontend.md | 2 +- docs/getting_started.md | 4 ++-- frontend/__init__.py | 2 +- frontend/front.py | 12 +++++++----- frontend/lib/backend_interface.py | 3 ++- frontend/lib/basic_chat.py | 1 + frontend/lib/session_chat.py | 2 +- frontend/lib/streamlit_helpers.py | 3 ++- 17 files changed, 34 insertions(+), 27 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index c566a40..2631c46 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -12,4 +12,4 @@ ALGORITHM = os.getenv("ALGORITHM", "HS256") # If the API runs in admin mode, it will allow the creation of new users -ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False))) \ No newline at end of file +ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False))) diff --git a/backend/api_plugins/insecure_authentication/insecure_authentication.py b/backend/api_plugins/insecure_authentication/insecure_authentication.py index 1dd0a3e..0c3d60e 100644 --- a/backend/api_plugins/insecure_authentication/insecure_authentication.py +++ b/backend/api_plugins/insecure_authentication/insecure_authentication.py @@ -1,4 +1,5 @@ from pathlib import Path + from fastapi import Depends, HTTPException, Response, status from backend.api_plugins.lib.user_management import ( @@ -56,7 +57,7 @@ async def login(email: str) -> dict: @app.get("/user/me") async def user_me(current_user: User = Depends(get_current_user)) -> User: return current_user - + @app.get("/user") async def user_root() -> dict: diff --git a/backend/api_plugins/secure_authentcation/secure_authentication.py b/backend/api_plugins/secure_authentcation/secure_authentication.py index 99327d7..ea03f7d 100644 --- a/backend/api_plugins/secure_authentcation/secure_authentication.py +++ b/backend/api_plugins/secure_authentcation/secure_authentication.py @@ -1,10 +1,11 @@ from pathlib import Path from typing import List + from fastapi import Depends, HTTPException, Response, status from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm from jose import JWTError, jwt -from backend import ADMIN_MODE +from backend import ADMIN_MODE from backend.api_plugins.lib.user_management import ( ALGORITHM, SECRET_KEY, @@ -23,7 +24,7 @@ def authentication_routes(app, dependencies=List[Depends]): from backend.database import Database with Database() as connection: connection.run_script(Path(__file__).parent / "users_tables.sql") - + oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/user/login") async def get_current_user(token: str = Depends(oauth2_scheme)) -> User: @@ -96,8 +97,8 @@ async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> dict: @app.get("/user/me") async def user_me(current_user: User = Depends(get_current_user)) -> User: return current_user - - + + @app.get("/user") async def user_root() -> dict: return Response("User management routes are enabled.", status_code=200) diff --git a/backend/api_plugins/sessions/sessions.py b/backend/api_plugins/sessions/sessions.py index 703d5c8..2649bd4 100644 --- a/backend/api_plugins/sessions/sessions.py +++ b/backend/api_plugins/sessions/sessions.py @@ -1,5 +1,5 @@ -from datetime import datetime import json +from datetime import datetime from pathlib import Path from typing import List, Optional, Sequence from uuid import uuid4 @@ -67,7 +67,7 @@ async def chat(session_id: str, current_user: User=authentication, dependencies= ) messages.append(message) return {"chat_id": session_id, "messages": [message.dict() for message in messages]} - + @app.get("/session") async def session_root(current_user: User=authentication, dependencies=dependencies) -> dict: return Response("Sessions management routes are enabled.", status_code=200) diff --git a/docs/backend/backend.md b/docs/backend/backend.md index 83b09da..304325a 100644 --- a/docs/backend/backend.md +++ b/docs/backend/backend.md @@ -1,4 +1,4 @@ -The backend provides a REST API to abstract RAG functionalities. The core embarks just enough to query your indexed documents. +The backend provides a REST API to abstract RAG functionalities. The core embarks just enough to query your indexed documents. More advanced features (authentication, user sessions, ...) can be enabled through [plugins](plugins/plugins.md). @@ -39,4 +39,4 @@ By going to the API documentation (http://0.0.0.0:8000/docs if serving locally) You can also query your RAG using the Langserve playground at http://0.0.0.0:8000/playground. It should look like this: -![base_playground.png](base_playground.png) \ No newline at end of file +![base_playground.png](base_playground.png) diff --git a/docs/backend/plugins/conversational_rag_plugin.md b/docs/backend/plugins/conversational_rag_plugin.md index 370c46d..a7e088f 100644 --- a/docs/backend/plugins/conversational_rag_plugin.md +++ b/docs/backend/plugins/conversational_rag_plugin.md @@ -25,5 +25,5 @@ We have added two things here: We will now have new session management routes available in the API: ![sessions_api.png](sessions_api.png) -And also, the playground now takes a `SESSION ID` configuration: +And also, the playground now takes a `SESSION ID` configuration: ![sessions_playground.png](sessions_playground.png) diff --git a/docs/backend/plugins/user_based_sessions.md b/docs/backend/plugins/user_based_sessions.md index d0b2a6e..14892ae 100644 --- a/docs/backend/plugins/user_based_sessions.md +++ b/docs/backend/plugins/user_based_sessions.md @@ -20,4 +20,4 @@ add_routes(app, chain, dependencies=[auth]) Here our authentication plugin is injected in both the sessions and core routes. With this setup, all calls will need to be authenticated with a bearer token that the API provides after a sucessful login. Notice the locks pictograms on every route. These indicate the routes are protected by our authentication scheme. You can still query your RAG using this interface by first login through the `Authorize` button. The Langserve playground does not support this however, and is not usable anymore. -![sec_auth_api.png](sec_auth_api.png) \ No newline at end of file +![sec_auth_api.png](sec_auth_api.png) diff --git a/docs/backend/rag_ragconfig.md b/docs/backend/rag_ragconfig.md index 4cdf716..5006443 100644 --- a/docs/backend/rag_ragconfig.md +++ b/docs/backend/rag_ragconfig.md @@ -86,4 +86,4 @@ llm_config = LLMConfig( ### Extending the `RAGConfig` -See: [How to extend the RAGConfig](../cookbook/extend_ragconfig.md) \ No newline at end of file +See: [How to extend the RAGConfig](../cookbook/extend_ragconfig.md) diff --git a/docs/database.md b/docs/database.md index 5111a62..099d466 100644 --- a/docs/database.md +++ b/docs/database.md @@ -16,7 +16,7 @@ This section focuses on architecture descision making. you will find implementat - This is the very beginning of the project -SQLite is the default option. It is a minimalist SQL database stored as a single `.sqlite` file. This is suitable for local development and prototyping, but not for industrialization. +SQLite is the default option. It is a minimalist SQL database stored as a single `.sqlite` file. This is suitable for local development and prototyping, but not for industrialization. As the data is only persisted locally, this also means you can not easily share it with the rest of the dev team in your project. If that is something you need, consider using cloud-based Postgres or MySQL backends. @@ -68,4 +68,4 @@ with Database() as connection: ### Database data model -The minimal database for the RAG only has one table, `message_history`. It is meant to be extended by plugins to add functionalities as they are needed. See the the [plugins documentation](backend/plugins/plugins.md) for more info. \ No newline at end of file +The minimal database for the RAG only has one table, `message_history`. It is meant to be extended by plugins to add functionalities as they are needed. See the the [plugins documentation](backend/plugins/plugins.md) for more info. diff --git a/docs/frontend.md b/docs/frontend.md index 2e6ac6a..4c15c0a 100644 --- a/docs/frontend.md +++ b/docs/frontend.md @@ -6,4 +6,4 @@ The frontend is the end user facing part. It reaches out to the backend ONLY thr !!! success "" As you work on this repo, it is advisable to keep the front and back decoupled. You can consider the `backend` and `frontend` folders to be two different, standalone repos. - You may have code that looks like it would fit well in a `commons` directory and be used by both. In that case, prefer integrating it in the backend and making it available to the frontend via the API. You can also just duplicate the code if it's small enough. \ No newline at end of file + You may have code that looks like it would fit well in a `commons` directory and be used by both. In that case, prefer integrating it in the backend and making it available to the frontend via the API. You can also just duplicate the code if it's small enough. diff --git a/docs/getting_started.md b/docs/getting_started.md index dfda7e3..beb56cd 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -14,7 +14,7 @@ This is a starter kit to deploy a modularizable RAG locally or on the cloud (or ## Quickstart -This quickstart will guide you through the steps to serve a RAG fully locally. You will run the API backend and frontend on your machine, which should allow you to run your first queries against the RAG. +This quickstart will guide you through the steps to serve a RAG fully locally. You will run the API backend and frontend on your machine, which should allow you to run your first queries against the RAG. For this exemple, we will be using the `tinyllama` LLM, the `BAAI/bge-base-en-v1.5` embedding model, and Chroma for the vector store. This allows this setup to be fully local, and independent of any external API (and thus, free). However, the relevance of answers will not be impressive. @@ -93,4 +93,4 @@ The structure of the repo mirrors this architecture. ![](backend_archi_short.png) The RAG itself does the following: -![](backend/RAG.png) \ No newline at end of file +![](backend/RAG.png) diff --git a/frontend/__init__.py b/frontend/__init__.py index 7bcf356..da70b57 100644 --- a/frontend/__init__.py +++ b/frontend/__init__.py @@ -8,4 +8,4 @@ ASSETS_PATH = Path(__file__).parent / "assets" # If the API runs in admin mode, it will allow the creation of new users -ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False))) \ No newline at end of file +ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False))) diff --git a/frontend/front.py b/frontend/front.py index 0d6cd8f..cea1b6f 100644 --- a/frontend/front.py +++ b/frontend/front.py @@ -1,11 +1,13 @@ -from langserve import RemoteRunnable - import streamlit as st +from langserve import RemoteRunnable from PIL import Image -from frontend.lib.auth import authentication_page, create_session -from frontend.lib.backend_interface import backend_supports_auth, backend_supports_sessions from frontend import ASSETS_PATH, BACKEND_URL +from frontend.lib.auth import authentication_page, create_session +from frontend.lib.backend_interface import ( + backend_supports_auth, + backend_supports_sessions, +) from frontend.lib.basic_chat import basic_chat from frontend.lib.session_chat import session_chat from frontend.lib.sidebar import sidebar @@ -31,7 +33,7 @@ def application_header(): # The session is used to make requests to the backend. It helps with the handling of cookies, auth, and other session data initialize_state_variable("session", value=create_session()) - # The chain is our RAG that will be used to answer questions. + # The chain is our RAG that will be used to answer questions. # Langserve's RemoteRunnable allows us to work as if the RAG was local, but it's actually running on the backend initialize_state_variable("chain", value=RemoteRunnable(BACKEND_URL)) diff --git a/frontend/lib/backend_interface.py b/frontend/lib/backend_interface.py index 986296e..19af143 100644 --- a/frontend/lib/backend_interface.py +++ b/frontend/lib/backend_interface.py @@ -1,4 +1,5 @@ from urllib.parse import urljoin + import streamlit as st from requests.sessions import Session @@ -44,4 +45,4 @@ def request(self, method, url, *args, **kwargs): if url.startswith("/"): url = url[1:] url = urljoin(self.base_url, url) - return super().request(method, url, *args, **kwargs) \ No newline at end of file + return super().request(method, url, *args, **kwargs) diff --git a/frontend/lib/basic_chat.py b/frontend/lib/basic_chat.py index fc90252..51c3738 100644 --- a/frontend/lib/basic_chat.py +++ b/frontend/lib/basic_chat.py @@ -1,5 +1,6 @@ import streamlit as st + def basic_chat(): user_question = st.chat_input("Say something") diff --git a/frontend/lib/session_chat.py b/frontend/lib/session_chat.py index e2fc251..3f20969 100644 --- a/frontend/lib/session_chat.py +++ b/frontend/lib/session_chat.py @@ -49,7 +49,7 @@ def session_chat(): for chunk in response: full_response += chunk.content placeholder.write(full_response) - + bot_message = Message("assistant", full_response, session_id) st.session_state["messages"].append(bot_message) diff --git a/frontend/lib/streamlit_helpers.py b/frontend/lib/streamlit_helpers.py index abca98a..c1e8769 100644 --- a/frontend/lib/streamlit_helpers.py +++ b/frontend/lib/streamlit_helpers.py @@ -1,5 +1,6 @@ import streamlit as st + def initialize_state_variable(name: str, value): if name not in st.session_state: - st.session_state[name] = value \ No newline at end of file + st.session_state[name] = value