diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..796a0f7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,41 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, attach screenshots to help explain your problem. **A screenshot of your Settings > Advanced > Application State section can greatly aid in troubleshooting.** + +**Logs** +If applicable, attach log files to help explain your problem. **Reproducing your issue and sharing a copy of your `local-rag.log` can greatly aid in troubleshooting.** + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..11fc491 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/Pipfile.lock b/Pipfile.lock index e053d10..179c905 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -967,12 +967,12 @@ }, "llama-index": { "hashes": [ - "sha256:796dc3a540f4e531d0f027702ebcb7bb89d5c42a94e77413a06b0c228ea66654", - "sha256:e42bcb00a41f2e60fad3da2d286e53ba0d3f963f94741e7bffd9600e80208413" + "sha256:0a6193365fcaa8cc2ce4c146e9c544aaf93d5672cdc34c4c9d86309e685a2356", + "sha256:1b9a113056f948178caa5f7b77a524b934d3efd4497a3832974ccc94851a8f88" ], "index": "pypi", "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.10.20" + "version": "==0.10.22" }, "llama-index-agent-openai": { "hashes": [ @@ -984,19 +984,19 @@ }, "llama-index-cli": { "hashes": [ - "sha256:1d1d0d2f7b1752a62270438654557d8325f7e8921e45a5b1cc22e6f06ba62600", - "sha256:3dd9d2f2cd84fed3d18a322df9dc389b19dc7a8048fa4bfcac1d901ccc59d1cd" + "sha256:44bc32af6d9bc0b523ad284f24fa1ec17288491243fe6d7c7b4770b3245dbb84", + "sha256:5de707e125aa877d70c61da70cc44fea72a9f7adb77f413b51f73b1deffdd750" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.10" + "version": "==0.1.11" }, "llama-index-core": { "hashes": [ - "sha256:6d05e53271250dd340ea75f0395d9529eea6788546c1fe9020dc7597fa47907b", - "sha256:d7d83a2f89269a3abc64663ef122ab6b132d6fbc1d32b5f0ba2909c4e8c6c55a" + "sha256:7a3a715a4a27c349e0241d477d6b6767c19b95b150116b924e6862325f33484b", + "sha256:7bc36bd39e2bd46291389f818da311e638043d22226130e84d0425906d79502c" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.10.20.post2" + "version": "==0.10.22" }, "llama-index-embeddings-huggingface": { "hashes": [ @@ -1009,11 +1009,11 @@ }, "llama-index-embeddings-openai": { "hashes": [ - "sha256:f12f0ef6f92211efe1a022a97bb68fc8731c93bd20df3b0567dba69c610033db", - "sha256:f8b2dded0718e9f57c08ce352d186941e6acf7de414c64219210b66f7a6d6d2d" + "sha256:6023925ed1487b0688323d21711efbf8880e82ed3b87ef413255c3dc63a2f2fe", + "sha256:c71cc9820680c4cedfc9845dc87b94f6851d1ccce1e486fc91298f8fa8d9f27d" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.6" + "version": "==0.1.7" }, "llama-index-indices-managed-llama-cloud": { "hashes": [ @@ -1669,11 +1669,11 @@ }, "openai": { "hashes": [ - "sha256:1fab5dd623cdc0c7c6e7da5d8d11fa6900f94191c2dfb6510d7eac33195fa175", - "sha256:f9322b0bf3b82bbd06930fad535369a023f35a3a96d3ef0b827644a15d7aae97" + "sha256:a48b3c4d635b603952189ac5a0c0c9b06c025b80eb2900396939f02bb2104ac3", + "sha256:e5642f7c02cf21994b08477d7bb2c1e46d8f335d72c26f0396c5f89b15b5b153" ], "markers": "python_full_version >= '3.7.1'", - "version": "==1.14.1" + "version": "==1.14.2" }, "outcome": { "hashes": [ @@ -2078,52 +2078,46 @@ }, "pymupdf": { "hashes": [ - "sha256:05e672ed3e82caca7ef02a88ace30130b1dd392a1190f03b2b58ffe7aa331400", - "sha256:0bbb0cf6593e53524f3fc26fb5e6ead17c02c64791caec7c4afe61b677dedf80", - "sha256:2dfc9e010669ae92fade6fb72aaea49ebe3b8dcd7ee4dcbbe50115abcaa4d3fe", - "sha256:2eb701247d8e685a24e45899d1175f01a3ce5fc792a4431c91fbb68633b29298", - "sha256:3f876533aa7f9a94bcd9a0225ce72571b7808260903fec1d95c120bc842fb52d", - "sha256:42ad2b819b90ce1947e11b90ec5085889df0a2e3aa0207bc97ecacfc6157cabc", - "sha256:52df831d46beb9ff494f5fba3e5d069af6d81f49abf6b6e799ee01f4f8fa6799", - "sha256:5ef4360f20015673c20cf59b7e19afc97168795188c584254ed3778cde43ce77", - "sha256:645a05321aecc8c45739f71f0eb574ce33138d19189582ffa5241fea3a8e2549", - "sha256:6577e2f473625e2d0df5f5a3bf1e4519e94ae749733cc9937994d1b256687bfa", - "sha256:734ee380b3abd038602be79114194a3cb74ac102b7c943bcb333104575922c50", - "sha256:73fce034f2afea886a59ead2d0caedf27e2b2a8558b5da16d0286882e0b1eb82", - "sha256:879e7f5ad35709d8760ab6103c3d5dac8ab8043a856ab3653fd324af7358ee87", - "sha256:92b3c4dd4d0491d495f333be2d41f4e1c155a409bc9d04b5ff29655dccbf4655", - "sha256:97b40bb22e3056874634617a90e0ed24a5172cf71791b9e25d1d91c6743bc567", - "sha256:99607649f89a02bba7d8ebe96e2410664316adc95e9337f7dfeff6a154f93049", - "sha256:9f7f4ef99dd8ac97fb0b852efa3dcbee515798078b6c79a6a13c7b1e7c5d41a4", - "sha256:a217689ede18cc6991b4e6a78afee8a440b3075d53b9dec4ba5ef7487d4547e9", - "sha256:a904261b317b761b0aa2bd2c1f6cd25d25aa4258be67a90c02a878efc5dca649", - "sha256:b22f8d854f8196ad5b20308c1cebad3d5189ed9f0988acbafa043947ea7e6c55", - "sha256:b3de8618b7cb5b36db611083840b3bcf09b11a893e2d8262f4e042102c7e65de", - "sha256:ba9a54552c7afb9ec85432c765e2fa9a81413acfaa7d70db7c9b528297749e5b", - "sha256:bb42d4b8407b4de7cb58c28f01449f16f32a6daed88afb41108f1aeb3552bdd4", - "sha256:c40d044411615e6f0baa7d3d933b3032cf97e168c7fa77d1be8a46008c109aee", - "sha256:cc0f794e3466bc96b5bf79d42fbc1551428751e3fef38ebc10ac70396b676144", - "sha256:d7cd88842b2e7f4c71eef4d87c98c35646b80b60e6375392d7ce40e519261f59", - "sha256:deee96c2fd415ded7b5070d8d5b2c60679aee6ed0e28ac0d2cb998060d835c2c", - "sha256:e2804a64bb57da414781e312fb0561f6be67658ad57ed4a73dce008b23fc70a6", - "sha256:f25aafd3e7fb9d7761a22acf2b67d704f04cc36d4dc33a3773f0eb3f4ec3606f", - "sha256:fab8833559bc47ab26ce736f915b8fc1dd37c108049b90396f7cd5e1004d7593", - "sha256:fbe1a3255b2cd0d769b2da2c4efdd0c0f30d4961a1aac02c0f75cf951b337aa4" + "sha256:08bb534a046d7492ab7cf726ef9aa01a14791e53922ffc2a341fa617709434f2", + "sha256:160a3310f33fda1c0cfaed82d4e22a2aca960ebf5c6919982032727973e42830", + "sha256:2d46cd6535f25ffeb6261d389b932fa6359193a12de3633e200504898d48c27d", + "sha256:37160eb301e017ec67bb63b1c6f52eae2c90bd1159f6a6b2ec469c3e69d55f74", + "sha256:4395b420477620be4fc90567deb20f17eda5e9757e2ca95f7bc3854d2a6713cc", + "sha256:490d10c85defec873bf33a54eea1e8cc637927c7efeaff3570b812d7c65256f7", + "sha256:4e92d2895eb55b5475572bda167bb6d3c5b7757ba0b6beee0456ca0d3db852b2", + "sha256:65fc88a23804b83b9390016d377d9350dece167e349140de93769618858ccf8d", + "sha256:6731cc7ef76d972220bd1bb50d5b67720de2038312be23806045bcc5f9675951", + "sha256:750908f95771fa0fcdbc690f6aae7e0031ff002c5ea343f12930e42da73e5c8b", + "sha256:82ff0a4ed3a27de95726db1f10744c2865212eed2a28e3fd19a081b9c247028d", + "sha256:8db27eca7f6aa2c5aa84278cc9961a0183e8aca6d7210a5648658816ea9601bf", + "sha256:9354c2654512390d261bad37a90168de0cb954be4e9b3d55073a67e8ca07f7f8", + "sha256:963759f1a2722d25d08e79e00db696e4f5342675bed3b2f2129f03a8d4c41b77", + "sha256:96bcecd0a33b2de6954c4a3c677719cd1d1f36c1fe7dc4e229e06177aef8bdb7", + "sha256:986b234751e734da1b4f983fd270fa595258781abc25e26d409d96439136c41c", + "sha256:9e9945d1af3ec6deff4c5d61edc63b9c68d49c2212df1104614e2ab173b1d158", + "sha256:af2d8ba47851f2a5a2f7592453792a03cbcd705e40512e9aeb199edd7bcce886", + "sha256:b6811b09af1ddb93229066f7acf183f6aeeeec4bf9c2290ff81fbeebbc5a4f79", + "sha256:bfc953361277cafa38e5bb93edd2b7c6c0c4284f137cea5847efe730759fe0d2", + "sha256:d193319e3850f4025dc1e3c8a6a0b03683668353aacf660d434668be51e3e464", + "sha256:de1aa7825f3333dfbff26e88f9cd37491a625b783b8b4780a14e5f70ab6d9853", + "sha256:e72b7ab4b2dfffe38ceed1e577ffaaa2e34117d87fc716b0238a6f2a12670fe4", + "sha256:ee1188a8d9bf9dbf21aab8229c99472dd47af315a71753452210f40cff744a7b", + "sha256:f428210b2fc7e0094dbcd62acc15554cb3ee9778a3429bf2d04850cfbab227fb", + "sha256:fc4b7a212b9f3216bb32c1146340efe5282c1519f7250e52ccd9dedcfd04df5d" ], "markers": "python_version >= '3.8'", - "version": "==1.23.26" + "version": "==1.24.0" }, "pymupdfb": { "hashes": [ - "sha256:01016dd33220cef4ecaf929d09fd27a584dc3ec3e5c9f4112dfe63613ea35135", - "sha256:3ffa713ad18e816e584c8a5f569995c32d22f8ac76ab6e4a61f2d2983c4b73d9", - "sha256:7c9c157281fdee9f296e666a323307dbf74cb38f017921bb131fa7bfcd39c2bd", - "sha256:9085a1e2fbf16f2820f9f7ad3d25e85f81d9b9eb0409110c1670d4cf5a27a678", - "sha256:cf50e814db91f2a2325219302fbac229a23682c372cf8232aabd51ea3f18210e", - "sha256:d00e372452845aea624659c302d25e935052269fd3aafe26948301576d6f2ee8" + "sha256:051e043ada55ecf03cae28b9990ec53b975a69995a0f177caedc9b3bf85d2d22", + "sha256:113e424b534a73a00dfaf2407beab3e9c35bfe406f77cfa66a43cf5f87bafef6", + "sha256:3e368ce2a8935881965343a7b87565b532a1787a3dc8f5580980dfb8b91d0c39", + "sha256:5af4e14171efd5e85b82ce2ae94caaebae9f4314103fc9af62be99537e21562e", + "sha256:871e100637fd64c76356656ca4122f4d355906aa25173997959ccaf39413c8d4" ], "markers": "python_version >= '3.8'", - "version": "==1.23.22" + "version": "==1.24.0" }, "pyparsing": { "hashes": [ @@ -3213,7 +3207,7 @@ "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475", "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb" ], - "markers": "python_version >= '3.8'", + "markers": "python_version >= '3.8' and sys_platform != 'darwin'", "version": "==4.10.0" }, "typing-inspect": { diff --git a/components/page_state.py b/components/page_state.py index 0022905..e187a3a 100644 --- a/components/page_state.py +++ b/components/page_state.py @@ -76,9 +76,6 @@ def set_initial_state(): if "query_engine" not in st.session_state: st.session_state["query_engine"] = None - if "service_context" not in st.session_state: - st.session_state["service_context"] = None - if "chat_mode" not in st.session_state: st.session_state["chat_mode"] = "compact" @@ -107,4 +104,4 @@ def set_initial_state(): st.session_state["chunk_size"] = 1024 if "chunk_overlap" not in st.session_state: - st.session_state["chunk_overlap"] = 20 + st.session_state["chunk_overlap"] = 200 diff --git a/components/tabs/github_repo.py b/components/tabs/github_repo.py index a54606b..3b98e18 100644 --- a/components/tabs/github_repo.py +++ b/components/tabs/github_repo.py @@ -1,11 +1,7 @@ -import os -import shutil - import streamlit as st import utils.helpers as func -import utils.ollama as ollama -import utils.llama_index as llama_index +import utils.rag_pipeline as rag import utils.logs as logs @@ -29,116 +25,13 @@ def github_repo(): with st.spinner("Processing..."): if repo_processed is True: - error = None - - ###################################### - # Create Llama-Index service-context # - # to use local LLMs and embeddings # - ###################################### - - try: - llm = ollama.create_ollama_llm( - st.session_state["selected_model"], - st.session_state["ollama_endpoint"], - ) - st.session_state["llm"] = llm - st.caption("✔️ LLM Initialized") - - # resp = llm.complete("Hello!") - # print(resp) - except Exception as err: - logs.log.error(f"Failed to setup LLM: {err}") - error = err - st.exception(error) - st.stop() - - #################################### - # Determine embedding model to use # - #################################### - - embedding_model = st.session_state["embedding_model"] - hf_embedding_model = None - - if embedding_model == None: - hf_embedding_model = "BAAI/bge-large-en-v1.5" - - if embedding_model == "Default (bge-large-en-v1.5)": - hf_embedding_model = "BAAI/bge-large-en-v1.5" - - if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)": - hf_embedding_model = "Salesforce/SFR-Embedding-Mistral" - - if embedding_model == "Other": - hf_embedding_model = st.session_state["other_embedding_model"] - - try: - llama_index.create_service_context( - st.session_state["llm"], - st.session_state["system_prompt"], - hf_embedding_model, - st.session_state["chunk_size"], - # st.session_state["chunk_overlap"], - ) - st.caption("✔️ Context Created") - except Exception as err: - logs.log.error(f"Setting up Service Context failed: {err}") - error = err - st.exception(error) - st.stop() - - ####################################### - # Load files from the data/ directory # - ####################################### - - try: - save_dir = os.getcwd() + "/data" - documents = llama_index.load_documents(save_dir) - st.session_state["documents"] = documents - st.caption("✔️ Processed File Data") - except Exception as err: - logs.log.error(f"Document Load Error: {err}") - error = err - st.exception(error) - st.stop() - - ########################################### - # Create an index from ingested documents # - ########################################### - - try: - llama_index.create_query_engine( - st.session_state["documents"], - st.session_state["service_context"], - ) - st.caption("✔️ Created File Index") - except Exception as err: - logs.log.error(f"Index Creation Error: {err}") - error = err - st.exception(error) - st.stop() - - ##################### - # Remove data files # - ##################### - - try: - save_dir = os.getcwd() + "/data" - shutil.rmtree(save_dir) - st.caption("✔️ Removed Temp Files") - except Exception as err: - logs.log.error(f"Failed to delete data files: {err}") - error = err - st.exception(error) - st.stop() - - ##################### - # Show Final Status # - ##################### - + # Initiate the RAG pipeline, providing documents to be saved on disk if necessary + error = rag.rag_pipeline() + if error is not None: st.exception(error) else: - st.write("Your files are ready. Let's chat! 😎") + st.write("Your files are ready. Let's chat! 😎") # TODO: This should be a button. else: st.text_input( diff --git a/components/tabs/local_files.py b/components/tabs/local_files.py index 8ea175d..d8e6adc 100644 --- a/components/tabs/local_files.py +++ b/components/tabs/local_files.py @@ -53,4 +53,4 @@ def local_files(): if error is not None: st.exception(error) else: - st.write("Your files are ready. Let's chat! 😎") + st.write("Your files are ready. Let's chat! 😎") # TODO: This should be a button. diff --git a/components/tabs/settings.py b/components/tabs/settings.py index da80d58..7f72b20 100644 --- a/components/tabs/settings.py +++ b/components/tabs/settings.py @@ -94,7 +94,7 @@ def settings(): "Chunk Overlap", help="The amount of overlap between two consecutive chunks. A higher overlap value helps maintain continuity and context across chunks.", key="chunk_overlap", - placeholder="20", + placeholder="200", value=st.session_state["chunk_overlap"], ) diff --git a/components/tabs/website.py b/components/tabs/website.py index a97ca38..6ea114d 100644 --- a/components/tabs/website.py +++ b/components/tabs/website.py @@ -16,36 +16,6 @@ def ensure_https(url): def website(): - # if st.session_state["selected_model"] is not None: - # st.text_input( - # "Clone a website", - # placeholder="https://ollama.com/blog", - # ) - # st.button( - # "Copy", - # ) - # else: - # st.text_input( - # "Clone a website", - # placeholder="https://ollama.com/blog", - # disabled=True, - # ) - # st.button( - # "Copy", - # disabled=True, - # ) - - # css_example = ''' - # I'm importing the font-awesome icons as a stylesheet! - # - - # - # - # - # ''' - - # st.write(css_example, unsafe_allow_html=True) - st.write("Enter a Website") col1, col2 = st.columns([1, 0.2]) with col1: @@ -82,4 +52,4 @@ def website(): if error is not None: st.exception(error) else: - st.write("Site processing completed. Let's chat! 😎") + st.write("Site processing completed. Let's chat! 😎") # TODO: This should be a button. diff --git a/docs/todo.md b/docs/todo.md index 65dfef1..3fd83af 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -18,14 +18,14 @@ Although not final, items are generally sorted from highest to lowest priority. - [x] Export Data (Chat History, ...) - [x] Docker Support - [x] Windows Support -- [ ] Extract Metadata and Load into Index +- [x] Extract Metadata and Load into Index - [x] Faster Document Embeddings (Cuda, Batch Size, ...) - [ ] Swap to OpenAI compatible endpoints - [ ] Allow Usage of Ollama hosted embeddings - [ ] Enable support for additional LLM backends - [ ] Local AI - [ ] TabbyAPI -- [ ] Remove File Type Limitations for Uploads +- [ ] Remove File Type Limitations for Uploads? ### User Experience @@ -34,7 +34,7 @@ Although not final, items are generally sorted from highest to lowest priority. - [x] About Tab in Sidebar w/ Resources - [x] Enable Caching - [ ] Allow Users to Set LLM Settings - - [ ] System Prompt (needs more work) + - [x] System Prompt - [x] Chat Mode - [ ] Temperature - [x] top_k @@ -67,4 +67,5 @@ Although not final, items are generally sorted from highest to lowest priority. ### Other - [ ] Investigate [R2R](https://github.com/SciPhi-AI/R2R) backend support/migration -- [ ] ROCm Support -- Wanted: AMD Testers! 🔍🔴 \ No newline at end of file +- [ ] ROCm Support -- Wanted: AMD Testers! 🔍🔴 +- [ ] Improved Windows / Windows + Docker Support \ No newline at end of file diff --git a/main.py b/main.py index cabe030..92586da 100644 --- a/main.py +++ b/main.py @@ -12,7 +12,7 @@ def generate_welcome_message(msg): for char in msg: - time.sleep(0.025) # This is blocking :( + time.sleep(0.025) # TODO: Find a better way -- This is blocking :( yield char diff --git a/utils/llama_index.py b/utils/llama_index.py index 81bb145..2bcecb1 100644 --- a/utils/llama_index.py +++ b/utils/llama_index.py @@ -4,7 +4,6 @@ import utils.logs as logs -from torch import cuda from llama_index.embeddings.huggingface import HuggingFaceEmbedding # This is not used but required by llama-index and must be set FIRST @@ -13,11 +12,11 @@ from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, - ServiceContext, - set_global_service_context, + Settings, ) + ################################### # # Setup Embedding Model @@ -44,74 +43,25 @@ def setup_embedding_model( Notes: The `device` parameter can be set to 'cpu' or 'cuda' to specify the device to use for the embedding computations. If 'cuda' is used and CUDA is available, the embedding model will be run on the GPU. Otherwise, it will be run on the CPU. """ - device = "cpu" if not cuda.is_available() else "cuda" - logs.log.info(f"Using {device} to generate embeddings") - embed_model = HuggingFaceEmbedding( - model_name=model, - # embed_batch_size=25, // TODO: Turning this on creates chaos, but has the potential to improve performance - device=device, - ) - logs.log.info(f"Embedding model created successfully") - return embed_model - - -################################### -# -# Create Service Context -# -################################### - -# TODO: Migrate to LlamaIndex.Settings: https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/service_context_migration.html - - -def create_service_context( - llm, # TODO: Determine type - system_prompt: str = None, - embed_model: str = "BAAI/bge-large-en-v1.5", - chunk_size: int = 1024, # Llama-Index default is 1024 - chunk_overlap: int = 200, # Llama-Index default is 200 -): - """ - Creates a service context for the Llama language model. - - Args: - llm (tbd): The Ollama language model to use. - system_prompt (str): An optional string that can be used as the system prompt when generating text. If no system prompt is passed, the default value will be used. - embed_model (str): The name of the embedding model to use. Can also be a path to a saved embedding model. - chunk_size (int): The size of each chunk of text to generate. Defaults to 1024. - chunk_overlap (int): The amount of overlap between adjacent chunks of text. Defaults to 200. - - Returns: - A ServiceContext instance, configured with the specified Llama model, system prompt, and embedding model. - - Raises: - ValueError: If the specified Llama model is not a valid Llama model. - ValueError: If the specified embed_model is not a valid embedding model. + try: + from torch import cuda + device = "cpu" if not cuda.is_available() else "cuda" + except: + device = "cpu" + finally: + logs.log.info(f"Using {device} to generate embeddings") - Notes: - The `embed_model` parameter can be set to a path to a saved embedding model, or to a string representing the name of the embedding model to use. If the `embed_model` parameter is set to a path, it will be loaded and used to create the service context. Otherwise, it will be created using the specified name. - The `chunk_size` and `chunk_overlap` parameters can be adjusted to control how much text is generated in each chunk and how much overlap there is between chunks. - """ - formatted_embed_model = f"local:{embed_model}" try: - embedding_model = setup_embedding_model(embed_model) - service_context = ServiceContext.from_defaults( - llm=llm, - system_prompt=system_prompt, - embed_model=embedding_model, - chunk_size=int(chunk_size), - # chunk_overlap=int(chunk_overlap), + Settings.embed_model = HuggingFaceEmbedding( + model_name=model, + device=device, ) - logs.log.info(f"Service Context created successfully") - st.session_state["service_context"] = service_context - - # Note: this may be redundant since service_context is returned - set_global_service_context(service_context) - return service_context - except Exception as e: - logs.log.error(f"Failed to create service_context: {e}") - Exception(f"Failed to create service_context: {e}") # TODO: Redundant? + logs.log.info(f"Embedding model created successfully") + + return + except Exception as err: + print(f"Failed to setup the embedding model: {err}") ################################### @@ -144,6 +94,7 @@ def load_documents(data_dir: str): return documents except Exception as err: logs.log.error(f"Error creating data index: {err}") + raise Exception(f"Error creating data index: {err}") finally: for file in os.scandir(data_dir): if file.is_file() and not file.name.startswith( @@ -161,13 +112,12 @@ def load_documents(data_dir: str): @st.cache_data(show_spinner=False) -def create_index(_documents, _service_context): +def create_index(_documents): """ Creates an index from the provided documents and service context. Args: documents (list[str]): A list of strings representing the content of the documents to be indexed. - service_context (ServiceContext): The service context to use when creating the index. Returns: An instance of `VectorStoreIndex`, containing the indexed data. @@ -176,12 +126,12 @@ def create_index(_documents, _service_context): Exception: If there is an error creating the index. Notes: - The `documents` parameter should be a list of strings representing the content of the documents to be indexed. The `service_context` parameter should be an instance of `ServiceContext`, providing information about the Llama model and other configuration settings for the index. + The `documents` parameter should be a list of strings representing the content of the documents to be indexed. """ try: index = VectorStoreIndex.from_documents( - documents=_documents, service_context=_service_context, show_progress=True + documents=_documents, show_progress=True ) logs.log.info("Index created from loaded documents successfully") @@ -189,7 +139,7 @@ def create_index(_documents, _service_context): return index except Exception as err: logs.log.error(f"Index creation failed: {err}") - return False + raise Exception(f"Index creation failed: {err}") ################################### @@ -199,14 +149,13 @@ def create_index(_documents, _service_context): ################################### -@st.cache_resource(show_spinner=False) -def create_query_engine(_documents, _service_context): +# @st.cache_resource(show_spinner=False) +def create_query_engine(_documents): """ Creates a query engine from the provided documents and service context. Args: documents (list[str]): A list of strings representing the content of the documents to be indexed. - service_context (ServiceContext): The service context to use when creating the index. Returns: An instance of `QueryEngine`, containing the indexed data and allowing for querying of the data using a variety of parameters. @@ -215,19 +164,17 @@ def create_query_engine(_documents, _service_context): Exception: If there is an error creating the query engine. Notes: - The `documents` parameter should be a list of strings representing the content of the documents to be indexed. The `service_context` parameter should be an instance of `ServiceContext`, providing information about the Llama model and other configuration settings for the index. + The `documents` parameter should be a list of strings representing the content of the documents to be indexed. - This function uses the `create_index` function to create an index from the provided documents and service context, and then creates a query engine from the resulting index. The `query_engine` parameter is used to specify the parameters of the query engine, including the number of top-ranked items to return (`similarity_top_k`), the response mode (`response_mode`), and the service context (`service_context`). + This function uses the `create_index` function to create an index from the provided documents and service context, and then creates a query engine from the resulting index. The `query_engine` parameter is used to specify the parameters of the query engine, including the number of top-ranked items to return (`similarity_top_k`) and the response mode (`response_mode`). """ try: - index = create_index(_documents, _service_context) + index = create_index(_documents) query_engine = index.as_query_engine( similarity_top_k=st.session_state["top_k"], response_mode=st.session_state["chat_mode"], - service_context=_service_context, streaming=True, - # verbose=True, # Broken? ) st.session_state["query_engine"] = query_engine @@ -237,3 +184,4 @@ def create_query_engine(_documents, _service_context): return query_engine except Exception as e: logs.log.error(f"Error when creating Query Engine: {e}") + raise Exception(f"Error when creating Query Engine: {e}") diff --git a/utils/ollama.py b/utils/ollama.py index f61f61a..d2f72b7 100644 --- a/utils/ollama.py +++ b/utils/ollama.py @@ -9,6 +9,7 @@ os.environ["OPENAI_API_KEY"] = "sk-abc123" from llama_index.llms.ollama import Ollama +from llama_index.core import Settings from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine ################################### @@ -98,7 +99,7 @@ def get_models(): @st.cache_data(show_spinner=False) -def create_ollama_llm(model: str, base_url: str, request_timeout: int = 60) -> Ollama: +def create_ollama_llm(model: str, base_url: str, system_prompt: str = None, request_timeout: int = 60) -> Ollama: """ Create an instance of the Ollama language model. @@ -111,9 +112,9 @@ def create_ollama_llm(model: str, base_url: str, request_timeout: int = 60) -> O - llm: An instance of the Ollama language model with the specified configuration. """ try: - llm = Ollama(model=model, base_url=base_url, request_timeout=request_timeout) + Settings.llm = Ollama(model=model, base_url=base_url, system_prompt=system_prompt, request_timeout=request_timeout) logs.log.info("Ollama LLM instance created successfully") - return llm + return Settings.llm except Exception as e: logs.log.error(f"Error creating Ollama language model: {e}") return None @@ -138,7 +139,10 @@ def chat(prompt: str): """ try: - llm = create_ollama_llm() + llm = create_ollama_llm( + st.session_state["selected_model"], + st.session_state["ollama_endpoint"], + ) stream = llm.stream_complete(prompt) for chunk in stream: yield chunk.delta @@ -184,7 +188,8 @@ def context_chat(prompt: str, query_engine: RetrieverQueryEngine): try: stream = query_engine.query(prompt) for text in stream.response_gen: - yield text + # print(str(text), end="", flush=True) + yield str(text) except Exception as err: logs.log.error(f"Ollama chat stream error: {err}") return diff --git a/utils/rag_pipeline.py b/utils/rag_pipeline.py index 80b3d42..c38bb0d 100644 --- a/utils/rag_pipeline.py +++ b/utils/rag_pipeline.py @@ -59,6 +59,7 @@ def rag_pipeline(uploaded_files: list = None): llm = ollama.create_ollama_llm( st.session_state["selected_model"], st.session_state["ollama_endpoint"], + st.session_state["system_prompt"], ) st.session_state["llm"] = llm st.caption("✔️ LLM Initialized") @@ -91,16 +92,12 @@ def rag_pipeline(uploaded_files: list = None): hf_embedding_model = st.session_state["other_embedding_model"] try: - llama_index.create_service_context( - st.session_state["llm"], - st.session_state["system_prompt"], + llama_index.setup_embedding_model( hf_embedding_model, - st.session_state["chunk_size"], - # st.session_state["chunk_overlap"], ) - st.caption("✔️ Context Created") + st.caption("✔️ Embedding Model Created") except Exception as err: - logs.log.error(f"Setting up Service Context failed: {str(err)}") + logs.log.error(f"Setting up Embedding Model failed: {str(err)}") error = err st.exception(error) st.stop() @@ -135,7 +132,6 @@ def rag_pipeline(uploaded_files: list = None): try: llama_index.create_query_engine( st.session_state["documents"], - st.session_state["service_context"], ) st.caption("✔️ Created File Index") except Exception as err: