diff --git a/README.md b/README.md index 6e5c1f3..78c83ba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # 📚 Local RAG -![local-rag-logo](logo.png) +![local-rag-demo](demo.gif) ![GitHub commit activity](https://img.shields.io/github/commit-activity/t/jonfairbanks/local-rag) ![GitHub last commit](https://img.shields.io/github/last-commit/jonfairbanks/local-rag) @@ -46,10 +46,9 @@ Docker: - [ ] Chat Mode - [x] top_k - [x] chunk_size - - [x] chunk_overlap -- [ ] Allow Switching of Embedding Model & Settings + - [ ] chunk_overlap +- [x] Allow Switching of Embedding Model & Settings - [x] Delete Files after Index Created/Failed -- [ ] Ability to Remove Files from Index - [ ] Function to Handle GitHub Repo Ingestion - [ ] Support for JSON Files - [x] Show Loaders in UI (File Uploads, Conversions, ...) @@ -64,7 +63,7 @@ Docker: - [ ] Refreshing the page loses all state (expected Streamlit behavior; need to implement local-storage) - [x] Files can be uploaded before Ollama config is set, leading to embedding errors - [ ] Assuming Ollama is hosted on localhost, Models are automatically loaded and selected, but the dropdown does not render the selected option -- [ ] Upon sending a Chat message, the File Processing expander appears to re-run itself +- [ ] Upon sending a Chat message, the File Processing expander appears to re-run itself (seems something is not using state correctly) ### Resources - [Ollama](https://ollama.com/) @@ -72,4 +71,5 @@ Docker: - [Streamlit](https://docs.streamlit.io/library/api-reference) - [Ollama w/ Llama-Index](https://docs.llamaindex.ai/en/stable/examples/llm/ollama.html) - [RAG w/ Llama-Index](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/) -- [Llama-Index Chat Engine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context.html) \ No newline at end of file +- [Llama-Index Chat Engine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context.html) +- [PoC Notebook](https://github.com/fairbanksio/notebooks/blob/main/llm/local/github-rag-prep.ipynb) \ No newline at end of file diff --git a/components/tabs/file_upload.py b/components/tabs/file_upload.py index 168925f..ed30d53 100644 --- a/components/tabs/file_upload.py +++ b/components/tabs/file_upload.py @@ -73,7 +73,7 @@ def file_upload(): print("Using default embedding model...") hf_embedding_model = "BAAI/bge-large-en-v1.5" - if embedding_model == "Best (Salesforce/SFR-Embedding-Mistral)": + if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)": print("Using the Salesforce embedding model; RIP yer VRAM...") hf_embedding_model = "Salesforce/SFR-Embedding-Mistral" diff --git a/components/tabs/settings.py b/components/tabs/settings.py index cd6bfc7..8ab2418 100644 --- a/components/tabs/settings.py +++ b/components/tabs/settings.py @@ -57,7 +57,7 @@ def settings(): "Model", [ "Default (bge-large-en-v1.5)", - "Best (Salesforce/SFR-Embedding-Mistral)", + "Large (Salesforce/SFR-Embedding-Mistral)", "Other", ], key="embedding_model", @@ -79,13 +79,6 @@ def settings(): placeholder="1024", value=st.session_state["chunk_size"], ) - st.text_input( - "Chunk Overlap", - help="`chunk_overlap` sets the overlap between consecutive document chunks. It prevents loss of information at chunk boundaries. For instance, a value of 20 means a 20-token overlap. Adjusting this parameter affects the precision and generality of the calculated embeddings.", - key="chunk_overlap", - placeholder="20", - value=st.session_state["chunk_overlap"], - ) st.subheader("Export Data") export_data_settings = st.container(border=True) diff --git a/demo.gif b/demo.gif new file mode 100644 index 0000000..b03e51f Binary files /dev/null and b/demo.gif differ diff --git a/utils/llama_index.py b/utils/llama_index.py index 08034ab..7b82b51 100644 --- a/utils/llama_index.py +++ b/utils/llama_index.py @@ -43,7 +43,7 @@ def create_service_context( llm=llm, system_prompt=system_prompt, embed_model=formatted_embed_model, - chunk_size=chunk_size, + chunk_size=int(chunk_size), ) # Note: this may be redundant since service_context is returned