diff --git a/Pipfile b/Pipfile index 939b1e0..fe83c57 100644 --- a/Pipfile +++ b/Pipfile @@ -11,6 +11,7 @@ llama-index-llms-ollama = "*" llama-index-embeddings-huggingface = "*" pycryptodome = "*" nbconvert = "*" +pyexiftool = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index 52cd986..be327ba 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ea1904553bf293473d66bde1e9b73a823f8a6761f05eba4c5e84e61c813fa414" + "sha256": "6e793ac196dc4abbd5bb71d830765b7d4a31389269407ba781a6f492ac661303" }, "pipfile-spec": 6, "requires": { @@ -235,11 +235,11 @@ }, "cachetools": { "hashes": [ - "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2", - "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1" + "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945", + "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105" ], "markers": "python_version >= '3.7'", - "version": "==5.3.2" + "version": "==5.3.3" }, "certifi": { "hashes": [ @@ -377,11 +377,11 @@ }, "chromadb": { "hashes": [ - "sha256:3d3c2ffb4ff560721e3daf8c1a3729fd149c551525b6f75543eddb81a4f29e16", - "sha256:54d9a770640704c6cedc15317faab9fd45beb9833e7484c00037e7a8801a349f" + "sha256:3a08e237a4ad28b5d176685bd22429a03717fe09d35022fb230d516108da01da", + "sha256:a5c80b4e4ad9b236ed2d4899a5b9e8002b489293f2881cb2cadab5b199ee1c72" ], "markers": "python_version >= '3.8'", - "version": "==0.4.23" + "version": "==0.4.24" }, "click": { "hashes": [ @@ -793,11 +793,11 @@ "inference" ], "hashes": [ - "sha256:94e7f8e074475fbc67d6a71957b678e1b4a74ff1b64a644fd6cbb83da962d05d", - "sha256:d988ae4f00d3e307b0c80c6a05ca6dbb7edba8bba3079f74cda7d9c2e562a7b6" + "sha256:16955c2b60bcff32a0778f84b9e9ae8f61d7f003da6aa1fbb7bc897a0c37b28c", + "sha256:839f2fc69fc51797b76dcffa7edbf7fb1150176f74cb1dc2d87ca00e5e0b5611" ], "markers": "python_full_version >= '3.8.0'", - "version": "==0.20.3" + "version": "==0.21.2" }, "humanfriendly": { "hashes": [ @@ -897,12 +897,12 @@ }, "llama-index": { "hashes": [ - "sha256:6d179049112f0b4d65b908b2bc8e2521ed31db57f76b60b2fe3657ec5b71e5f9", - "sha256:84150cdb52f84fbe527a321d10dfec361ffd2596a8ef1f13d19f2e36bb7a101d" + "sha256:31883c563b1a8d296910c2d5fa054ebc60539064d5dcac25114e4bb9749883e5", + "sha256:bfc25753ea0c3c59918b4f5925cb470a478b3b0da083a45c48f1992ab16a695f" ], "index": "pypi", - "markers": "python_version < '3.12' and python_full_version >= '3.8.1'", - "version": "==0.10.12" + "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", + "version": "==0.10.14" }, "llama-index-agent-openai": { "hashes": [ @@ -914,28 +914,28 @@ }, "llama-index-cli": { "hashes": [ - "sha256:a0fcfc3239d8b05158558423ca5c1a426d2a455eab44128b2b786cab566f74ad", - "sha256:e2493ff7ecfd1983fd15c28c6c0c7bfdba66662c1d8960f6aea229db3d7fafda" + "sha256:403c1b0be437d5fa3ca677dbc61dcebbf095ad4daf565fcc0f9db28e94be5df1", + "sha256:bf94e6c61ab75240dbe59b867b9b3e4788b0f66b2cb1c2efb18320735a0bf612" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.5" + "version": "==0.1.6" }, "llama-index-core": { "hashes": [ - "sha256:071e3a9ab2071c900657149cabf39199818e7244d16ef5cc096e5c0bff8174f4", - "sha256:47663cc3282684e6b7f06e905d98382aa3dbec5191ab72c239b4f19e0b08c041" + "sha256:52e99ae101a32b2894477e49a0c4bc93de721a71d598fea61e4d9e8e68a35633", + "sha256:db6c66948c51751545a73bb3acecfe401649e05296d8865d71d22bcb5a1e55e7" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.10.12" + "version": "==0.10.14" }, "llama-index-embeddings-huggingface": { "hashes": [ - "sha256:e3059b76110e56a184e4c87186d30c4c336a5465eca34caa0a4cddcad96d36a4", - "sha256:fbb0fde5547fdd808a39ede4036033187e7d0ce1205a4873c479e55c237bdf45" + "sha256:042d249d91039bc4a531711c0c81ebf4f5c921de98629d2d342979bc4511a639", + "sha256:9c80539f3cbbd7191c219e2cda154b1a7151aa912196bc537c16f40e18e4187c" ], "index": "pypi", "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.3" + "version": "==0.1.4" }, "llama-index-embeddings-openai": { "hashes": [ @@ -1004,11 +1004,11 @@ }, "llama-index-readers-file": { "hashes": [ - "sha256:1b3c8252f81fb8893b56a193d5404a24667075bf19f8bb6e65e4877bd7f3d98e", - "sha256:e72147722b7a15cf00a9cb96215ddf06b1f6b22b0801c158931fee71a5f124fd" + "sha256:d9fc0ca84926d04bd757c57fe87841cd9dbc2606aab5f2ce927deec14aaa1a74", + "sha256:f583bd90353a0c0985213af02c97aa2f2f22e702d4311fe719de91382c9ad8dd" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.5" + "version": "==0.1.6" }, "llama-index-readers-llama-parse": { "hashes": [ @@ -1020,19 +1020,19 @@ }, "llama-index-vector-stores-chroma": { "hashes": [ - "sha256:7364f2a3f8a51b83d350da39da7e7046704cfa9c848ebe8fd1c6cb39ad4878f9", - "sha256:f475a450431ee4d9b2915ba9da2112dfdfacaee1ea220b8603720be1c116786c" + "sha256:40692f8bcc4b44d4a28b6ed578bad71fbc33ce5d95220c29b00e5ba7ab00d8a0", + "sha256:5e6ed1bc0b0e4c54a030b7ec95cc19015af4a8a22d3c37deb66f76b017d54b14" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.4" + "version": "==0.1.5" }, "llama-parse": { "hashes": [ - "sha256:5a30569c390ab9089dad66cf2a8c967f8c21d77641deec0a922672df4e16cfa3", - "sha256:b667c78d4c32fc5d0561e6e3ca6c53648a6701b436f21d0d252cd46774927660" + "sha256:736a80e4fc5970b9cbef1048171908021ebd26be43f07b806889f0d1bb3875fe", + "sha256:8e6e7a0986ad30cb82c5c67a29b7e2c3892620dd2a422afc909654a9d0f1c82c" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.3.4" + "version": "==0.3.5" }, "llamaindex-py-client": { "hashes": [ @@ -1118,11 +1118,11 @@ }, "marshmallow": { "hashes": [ - "sha256:4c1daff273513dc5eb24b219a8035559dc573c8f322558ef85f5438ddd1236dd", - "sha256:c21d4b98fee747c130e6bc8f45c4b3199ea66bc00c12ee1f639f0aeca034d5e9" + "sha256:20f53be28c6e374a711a16165fb22a8dc6003e3f7cda1285e3ca777b9193885b", + "sha256:e7997f83571c7fd476042c2c188e4ee8a78900ca5e74bd9c8097afa56624e9bd" ], "markers": "python_version >= '3.8'", - "version": "==3.20.2" + "version": "==3.21.0" }, "mdurl": { "hashes": [ @@ -1577,11 +1577,11 @@ }, "openai": { "hashes": [ - "sha256:99c5d257d09ea6533d689d1cc77caa0ac679fa21efef8893d8b0832a86877f1b", - "sha256:a54002c814e05222e413664f651b5916714e4700d041d5cf5724d3ae1a3e3481" + "sha256:5769b62abd02f350a8dd1a3a242d8972c947860654466171d60fb0972ae0a41c", + "sha256:ff6c6b3bc7327e715e4b3592a923a5a1c7519ff5dd764a83d69f633d49e77a7b" ], "markers": "python_full_version >= '3.7.1'", - "version": "==1.12.0" + "version": "==1.13.3" }, "opentelemetry-api": { "hashes": [ @@ -2018,10 +2018,10 @@ }, "pydantic": { "hashes": [ - "sha256:37a5432e54b12fecaa1049c5195f3d860a10e01bdfd24f1840ef14bd0d3aeab3", - "sha256:a09be1c3d28f3abe37f8a78af58284b236a92ce520105ddc91a6d29ea1176ba7" + "sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a", + "sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f" ], - "version": "==2.6.2" + "version": "==2.6.3" }, "pydantic-core": { "hashes": [ @@ -2116,6 +2116,15 @@ "markers": "python_version >= '3.7'", "version": "==0.8.1b0" }, + "pyexiftool": { + "hashes": [ + "sha256:22a972c1c212d1ad5f61916fded5057333dcc48fb8e42eed12d2ff9665b367ae", + "sha256:ac7d7836d2bf373f20aa558528f6b2222c4c0d896ed28c951a3ff8e6cec05a87" + ], + "index": "pypi", + "markers": "python_version >= '3.6'", + "version": "==0.5.6" + }, "pygments": { "hashes": [ "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", @@ -2498,11 +2507,11 @@ }, "rich": { "hashes": [ - "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa", - "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235" + "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222", + "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432" ], "markers": "python_full_version >= '3.7.0'", - "version": "==13.7.0" + "version": "==13.7.1" }, "rpds-py": { "hashes": [ diff --git a/components/page_config.py b/components/page_config.py index 7ae9e86..812b5dc 100644 --- a/components/page_config.py +++ b/components/page_config.py @@ -6,5 +6,10 @@ def set_page_config(): page_title="Local RAG", page_icon="📚", layout="wide", - initial_sidebar_state="expanded", + initial_sidebar_state=st.session_state['sidebar_state'], + menu_items={ + 'Get Help': 'https://github.com/jonfairbanks/local-rag/discussions', + 'Report a bug': "https://github.com/jonfairbanks/local-rag/issues", + 'About': None + } ) diff --git a/components/page_state.py b/components/page_state.py index 7097792..61ca562 100644 --- a/components/page_state.py +++ b/components/page_state.py @@ -11,6 +11,9 @@ def set_initial_state(): # General # ########### + if 'sidebar_state' not in st.session_state: + st.session_state['sidebar_state'] = 'expanded' + if "ollama_endpoint" not in st.session_state: st.session_state["ollama_endpoint"] = "http://localhost:11434" diff --git a/components/tabs/github_repo.py b/components/tabs/github_repo.py index f9821fc..a3b9d0c 100644 --- a/components/tabs/github_repo.py +++ b/components/tabs/github_repo.py @@ -57,19 +57,15 @@ def github_repo(): hf_embedding_model = None if embedding_model == None: - # logs.log.info("No embedding model set; using defaults...") hf_embedding_model = "BAAI/bge-large-en-v1.5" if embedding_model == "Default (bge-large-en-v1.5)": - # logs.log.info("Using default embedding model...") hf_embedding_model = "BAAI/bge-large-en-v1.5" if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)": - # logs.log.info("Using the Salesforce embedding model; RIP yer VRAM...") hf_embedding_model = "Salesforce/SFR-Embedding-Mistral" if embedding_model == "Other": - # logs.log.info("Using a user-provided embedding model...") hf_embedding_model = st.session_state["other_embedding_model"] try: diff --git a/main.py b/main.py index 19d0493..fdec4f4 100644 --- a/main.py +++ b/main.py @@ -15,14 +15,13 @@ def generate_welcome_message(msg): time.sleep(0.025) # This is blocking :( yield char +### Setup Initial State +set_initial_state() ### Page Setup set_page_config() set_page_header() -### Setup Initial State -set_initial_state() - for msg in st.session_state["messages"]: st.chat_message(msg["role"]).write(msg["content"]) # st.chat_message(msg["role"]).write_stream(generate_welcome_message(msg['content'])) diff --git a/utils/helpers.py b/utils/helpers.py index 8ffb007..3adc818 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -1,9 +1,12 @@ import os +import json import requests import subprocess import streamlit as st +from exiftool import ExifToolHelper + import utils.logs as logs ################################### @@ -24,10 +27,12 @@ def save_uploaded_file(uploaded_file: bytes, save_dir: str): try: if not os.path.exists(save_dir): os.makedirs(save_dir) + logs.log.info(f"Directory {save_dir} did not exist so creating it") with open(os.path.join(save_dir, uploaded_file.name), "wb") as f: f.write(uploaded_file.getbuffer()) + logs.log.info(f"Upload {uploaded_file.name} saved to disk") except Exception as e: - logs.log.info(f"Error saving upload to disk: {e}") + logs.log.error(f"Error saving upload to disk: {e}") ################################### @@ -75,3 +80,20 @@ def clone_github_repo(repo: str): else: Exception(f"Failed to process GitHub repo {st.session_state['github_repo']}") return False + + +################################### +# +# Extract File Metadata +# +################################### + + +def get_file_metadata(file_path): + """Returns a dictionary containing various metadata for the specified file.""" + try: + with ExifToolHelper() as et: + for d in et.get_metadata(file_path): + return json.dumps(d, indent=2) + except Exception: + pass \ No newline at end of file diff --git a/utils/llama_index.py b/utils/llama_index.py index f69d81b..b37cf29 100644 --- a/utils/llama_index.py +++ b/utils/llama_index.py @@ -52,6 +52,7 @@ def create_service_context( chunk_size=int(chunk_size), # chunk_overlap=int(chunk_overlap), ) + logs.log.info(f"Service Context created successfully") st.session_state["service_context"] = service_context # Note: this may be redundant since service_context is returned set_global_service_context(service_context) @@ -83,15 +84,16 @@ def load_documents(data_dir: str): try: files = SimpleDirectoryReader(input_dir=data_dir, recursive=True) documents = files.load_data(files) - # logs.log.info(f"Loaded {len(documents):,} documents") + logs.log.info(f"Loaded {len(documents):,} documents from files") return documents except Exception as err: logs.log.error(f"Error creating data index: {err}") return None finally: for file in os.scandir(data_dir): - if file.is_file() and not file.name.startswith("."): + if file.is_file() and not file.name.startswith(".gitkeep"): # TODO: Confirm syntax here os.remove(file.path) + logs.log.info(f"Document loading complete; removing local file(s)") ################################### @@ -126,12 +128,16 @@ def create_query_engine(_documents, _service_context): documents=_documents, service_context=_service_context, show_progress=True ) + logs.log.info("Index created from loaded documents successfully") + query_engine = index.as_query_engine( similarity_top_k=st.session_state["top_k"], service_context=_service_context, streaming=True, ) + logs.log.info("Query Engine created successfully") + st.session_state["query_engine"] = query_engine return query_engine diff --git a/utils/ollama.py b/utils/ollama.py index 2871f5d..87ba156 100644 --- a/utils/ollama.py +++ b/utils/ollama.py @@ -28,6 +28,7 @@ def create_client(host: str): - ollama.Client: An instance of the Ollama client. """ client = ollama.Client(host=host) + logs.log.info("Ollama chat client created successfully") return client @@ -51,6 +52,7 @@ def get_models(): models = [] for model in data["models"]: models.append(model["name"]) + logs.log.info("Ollama models loaded successuflly") st.session_state["ollama_models"] = models return models @@ -77,6 +79,7 @@ def create_ollama_llm(model: str, base_url: str, request_timeout: int = 60) -> O """ try: llm = Ollama(model=model, base_url=base_url, request_timeout=request_timeout) + logs.log.info("Ollama LLM instance created successfully") return llm except Exception as e: logs.log.error(f"Error creating Ollama language model: {e}")