Merge branch 'av/v2'

artefactory-skaff · Feb 21, 2024 · f3e932c · f3e932c
2 parents 284f96d + a9e591a
commit f3e932c
Show file tree

Hide file tree

Showing 18 changed files with 61 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -1,21 +1,35 @@
 # skaff-rag-accelerator
 
-
-This is a starter kit to deploy a modularizable RAG locally or on the cloud (or across multiple clouds)
+This is a starter kit to prototype locally, deploy on any cloud, and industrialize a Retrival-Augmented Generation (RAG) service.
 
 ## Features
 
 - A configurable RAG setup based around Langchain ([Check out the configuration cookbook here](https://artefactory.github.io/skaff-rag-accelerator/cookbook/))
 - `RAG` and `RagConfig` python classes to help you set things up
-- A REST API based on FastAPI to provide easy access to the RAG as a web backend
-- A demo Streamlit to serve as a basic working frontend (not production grade)
+- A REST API based on Langserve + FastAPI to provide easy access to the RAG as a web backend
+- A demo Streamlit to serve as a basic working frontend
 - A document loader for the RAG
-- Secure user authentication
-- User feedback collection
-- Streamed responses
+- Optional plugins for secure user authentication and session management
 
 ## Quickstart
 
+This quickstart will guide you through the steps to serve a RAG fully locally. You will run the API backend and frontend on your machine, which should allow you to run your first queries against the RAG.
+
+For this exemple, we will be using the `tinyllama` LLM, the `BAAI/bge-base-en-v1.5` embedding model, and Chroma for the vector store. This allows this setup to be fully local, and independent of any external API (and thus, free). However, the relevance of answers will not be impressive.
+
+Duration: ~15 minutes.
+
+### Pre-requisites
+
+- Ollama, to serve the LLM locally ([Download and install](https://ollama.com/))
+- A few GB of disk space to host the models
+- Tested with python 3.11 (may work with other versions)
+
+Start the LLM server:
+```python
+ollama run tinyllama
+```
+
 In a fresh env:
 ```shell
 pip install -r requirements.txt
@@ -25,37 +39,34 @@ You will need to set some env vars, either in a .env file at the project root, o
 ```shell
 export PYTHONPATH=.
 export ADMIN_MODE=1
-export OPENAI_API_KEY="xxx"  # API key used to query the LLM
-export EMBEDDING_API_KEY="xxx"  # API key used to query the embedding model
-export DATABASE_URL="sqlite:///$(pwd)/database/db.sqlite3"  # For local developement only. You will need a real, cloud-based SQL database URL for prod.
 ```
 
-Start the backend server locally
+Start the backend server locally:
 ```shell
 python -m uvicorn backend.main:app
 ```
 
 Start the frontend demo
 ```shell
-python -m streamlit run frontend/app.py
+python -m streamlit run frontend/front.py
 ```
 
 You should then be able to login and chat to the bot:
 
-![](docs/login_and_chat.gif)
+![](login_and_chat.gif)
 
 Right now the RAG does not have any document loaded, let's add a sample:
 ```shell
 python data_sample/add_data_sample_to_rag.py
 ```
 
-The RAG now has access to thn information from your loaded documents:
+The RAG now has access to the information from your loaded documents:
 
-![](docs/query_with_knowledge.gif)
+![](query_with_knowledge.gif)
 
 ## Documentation
 
-To learn how to configure the RAG, take a look at the documentation
+To deep dive into under the hood, take a look at the documentation
 
 [On github pages](https://artefactory.github.io/skaff-rag-accelerator/)
 

diff --git a/backend/__init__.py b/backend/__init__.py
@@ -12,4 +12,4 @@
 ALGORITHM = os.getenv("ALGORITHM", "HS256")
 
 # If the API runs in admin mode, it will allow the creation of new users
-ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False)))
+ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False)))
diff --git a/backend/api_plugins/insecure_authentication/insecure_authentication.py b/backend/api_plugins/insecure_authentication/insecure_authentication.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+
 from fastapi import Depends, HTTPException, Response, status
 
 from backend.api_plugins.lib.user_management import (
@@ -56,7 +57,7 @@ async def login(email: str) -> dict:
     @app.get("/user/me")
     async def user_me(current_user: User = Depends(get_current_user)) -> User:
         return current_user
-    
+
 
     @app.get("/user")
     async def user_root() -> dict:

diff --git a/backend/api_plugins/secure_authentcation/secure_authentication.py b/backend/api_plugins/secure_authentcation/secure_authentication.py
@@ -1,10 +1,11 @@
 from pathlib import Path
 from typing import List
+
 from fastapi import Depends, HTTPException, Response, status
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from jose import JWTError, jwt
-from backend import ADMIN_MODE
 
+from backend import ADMIN_MODE
 from backend.api_plugins.lib.user_management import (
     ALGORITHM,
     SECRET_KEY,
@@ -23,7 +24,7 @@ def authentication_routes(app, dependencies=List[Depends]):
     from backend.database import Database
     with Database() as connection:
         connection.run_script(Path(__file__).parent / "users_tables.sql")
-        
+
     oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/user/login")
 
     async def get_current_user(token: str = Depends(oauth2_scheme)) -> User:
@@ -96,8 +97,8 @@ async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> dict:
     @app.get("/user/me")
     async def user_me(current_user: User = Depends(get_current_user)) -> User:
         return current_user
-    
-    
+
+
     @app.get("/user")
     async def user_root() -> dict:
         return Response("User management routes are enabled.", status_code=200)

diff --git a/backend/api_plugins/sessions/sessions.py b/backend/api_plugins/sessions/sessions.py
@@ -1,5 +1,5 @@
-from datetime import datetime
 import json
+from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Sequence
 from uuid import uuid4
@@ -67,7 +67,7 @@ async def chat(session_id: str, current_user: User=authentication, dependencies=
                 )
                 messages.append(message)
         return {"chat_id": session_id, "messages": [message.dict() for message in messages]}
-    
+
     @app.get("/session")
     async def session_root(current_user: User=authentication, dependencies=dependencies) -> dict:
         return Response("Sessions management routes are enabled.", status_code=200)
diff --git a/docs/backend/backend.md b/docs/backend/backend.md
@@ -1,4 +1,4 @@
-The backend provides a REST API to abstract RAG functionalities. The core embarks just enough to query your indexed documents. 
+The backend provides a REST API to abstract RAG functionalities. The core embarks just enough to query your indexed documents.
 
 More advanced features (authentication, user sessions, ...) can be enabled through [plugins](plugins/plugins.md).
 
@@ -39,4 +39,4 @@ By going to the API documentation (http://0.0.0.0:8000/docs if serving locally)
 
 You can also query your RAG using the Langserve playground at http://0.0.0.0:8000/playground. It should look like this:
 
-![base_playground.png](base_playground.png)
+![base_playground.png](base_playground.png)
diff --git a/docs/backend/plugins/conversational_rag_plugin.md b/docs/backend/plugins/conversational_rag_plugin.md
@@ -25,5 +25,5 @@ We have added two things here:
 We will now have new session management routes available in the API:
 ![sessions_api.png](sessions_api.png)
 
-And also, the playground now takes a `SESSION ID` configuration: 
+And also, the playground now takes a `SESSION ID` configuration:
 ![sessions_playground.png](sessions_playground.png)
diff --git a/docs/backend/plugins/user_based_sessions.md b/docs/backend/plugins/user_based_sessions.md
@@ -20,4 +20,4 @@ add_routes(app, chain, dependencies=[auth])
 Here our authentication plugin is injected in both the sessions and core routes. With this setup, all calls will need to be authenticated with a bearer token that the API provides after a sucessful login.
 
 Notice the locks pictograms on every route. These indicate the routes are protected by our authentication scheme. You can still query your RAG using this interface by first login through the `Authorize` button. The Langserve playground does not support this however, and is not usable anymore.
-![sec_auth_api.png](sec_auth_api.png)
+![sec_auth_api.png](sec_auth_api.png)
diff --git a/docs/backend/rag_ragconfig.md b/docs/backend/rag_ragconfig.md
@@ -86,4 +86,4 @@ llm_config = LLMConfig(
 
 ### Extending the `RAGConfig`
 
-See: [How to extend the RAGConfig](../cookbook/extend_ragconfig.md)
+See: [How to extend the RAGConfig](../cookbook/extend_ragconfig.md)
diff --git a/docs/database.md b/docs/database.md
@@ -16,7 +16,7 @@ This section focuses on architecture descision making. you will find implementat
     - This is the very beginning of the project
 
 
-SQLite is the default option. It is a minimalist SQL database stored as a single `.sqlite` file. This is suitable for local development and prototyping, but not for industrialization. 
+SQLite is the default option. It is a minimalist SQL database stored as a single `.sqlite` file. This is suitable for local development and prototyping, but not for industrialization.
 
 As the data is only persisted locally, this also means you can not easily share it with the rest of the dev team in your project. If that is something you need, consider using cloud-based Postgres or MySQL backends.
 
@@ -68,4 +68,4 @@ with Database() as connection:
 
 ### Database data model
 
-The minimal database for the RAG only has one table, `message_history`. It is meant to be extended by plugins to add functionalities as they are needed. See the the [plugins documentation](backend/plugins/plugins.md) for more info.
+The minimal database for the RAG only has one table, `message_history`. It is meant to be extended by plugins to add functionalities as they are needed. See the the [plugins documentation](backend/plugins/plugins.md) for more info.
diff --git a/docs/frontend.md b/docs/frontend.md
@@ -6,4 +6,4 @@ The frontend is the end user facing part. It reaches out to the backend ONLY thr
 !!! success ""
     As you work on this repo, it is advisable to keep the front and back decoupled. You can consider the `backend` and `frontend` folders to be two different, standalone repos.
 
-    You may have code that looks like it would fit well in a `commons` directory and be used by both. In that case, prefer integrating it in the backend and making it available to the frontend via the API. You can also just duplicate the code if it's small enough.
+    You may have code that looks like it would fit well in a `commons` directory and be used by both. In that case, prefer integrating it in the backend and making it available to the frontend via the API. You can also just duplicate the code if it's small enough.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -14,7 +14,7 @@ This is a starter kit to deploy a modularizable RAG locally or on the cloud (or
 
 ## Quickstart
 
-This quickstart will guide you through the steps to serve a RAG fully locally. You will run the API backend and frontend on your machine, which should allow you to run your first queries against the RAG. 
+This quickstart will guide you through the steps to serve a RAG fully locally. You will run the API backend and frontend on your machine, which should allow you to run your first queries against the RAG.
 
 For this exemple, we will be using the `tinyllama` LLM, the `BAAI/bge-base-en-v1.5` embedding model, and Chroma for the vector store. This allows this setup to be fully local, and independent of any external API (and thus, free). However, the relevance of answers will not be impressive.
 
@@ -93,4 +93,4 @@ The structure of the repo mirrors this architecture.
 ![](backend_archi_short.png)
 
 The RAG itself does the following:
-![](backend/RAG.png)
+![](backend/RAG.png)
diff --git a/frontend/__init__.py b/frontend/__init__.py
@@ -8,4 +8,4 @@
 ASSETS_PATH = Path(__file__).parent / "assets"
 
 # If the API runs in admin mode, it will allow the creation of new users
-ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False)))
+ADMIN_MODE = bool(int(os.getenv("ADMIN_MODE", False)))
diff --git a/frontend/front.py b/frontend/front.py
@@ -1,11 +1,13 @@
-from langserve import RemoteRunnable
-
 import streamlit as st
+from langserve import RemoteRunnable
 from PIL import Image
-from frontend.lib.auth import authentication_page, create_session
-from frontend.lib.backend_interface import backend_supports_auth, backend_supports_sessions
 
 from frontend import ASSETS_PATH, BACKEND_URL
+from frontend.lib.auth import authentication_page, create_session
+from frontend.lib.backend_interface import (
+    backend_supports_auth,
+    backend_supports_sessions,
+)
 from frontend.lib.basic_chat import basic_chat
 from frontend.lib.session_chat import session_chat
 from frontend.lib.sidebar import sidebar
@@ -31,7 +33,7 @@ def application_header():
     # The session is used to make requests to the backend. It helps with the handling of cookies, auth, and other session data
     initialize_state_variable("session", value=create_session())
 
-    # The chain is our RAG that will be used to answer questions. 
+    # The chain is our RAG that will be used to answer questions.
     # Langserve's RemoteRunnable allows us to work as if the RAG was local, but it's actually running on the backend
     initialize_state_variable("chain", value=RemoteRunnable(BACKEND_URL))
 

diff --git a/frontend/lib/backend_interface.py b/frontend/lib/backend_interface.py
@@ -1,4 +1,5 @@
 from urllib.parse import urljoin
+
 import streamlit as st
 from requests.sessions import Session
 
@@ -44,4 +45,4 @@ def request(self, method, url, *args, **kwargs):
         if url.startswith("/"):
             url = url[1:]
         url = urljoin(self.base_url, url)
-        return super().request(method, url, *args, **kwargs)
+        return super().request(method, url, *args, **kwargs)
diff --git a/frontend/lib/basic_chat.py b/frontend/lib/basic_chat.py
@@ -1,5 +1,6 @@
 import streamlit as st
 
+
 def basic_chat():
     user_question = st.chat_input("Say something")
 

diff --git a/frontend/lib/session_chat.py b/frontend/lib/session_chat.py
@@ -49,7 +49,7 @@ def session_chat():
                 for chunk in response:
                     full_response += chunk.content
                     placeholder.write(full_response)
-                
+
             bot_message = Message("assistant", full_response, session_id)
             st.session_state["messages"].append(bot_message)
 

diff --git a/frontend/lib/streamlit_helpers.py b/frontend/lib/streamlit_helpers.py
@@ -1,5 +1,6 @@
 import streamlit as st
 
+
 def initialize_state_variable(name: str, value):
     if name not in st.session_state:
-        st.session_state[name] = value
+        st.session_state[name] = value
Original file line number	Diff line number	Diff line change
Expand Up		@@ -86,4 +86,4 @@ llm_config = LLMConfig(

		### Extending the `RAGConfig`

		See: [How to extend the RAGConfig](../cookbook/extend_ragconfig.md)
		See: [How to extend the RAGConfig](../cookbook/extend_ragconfig.md)