Skip to content

Commit

Permalink
Merge pull request #20 from jonfairbanks/develop
Browse files Browse the repository at this point in the history
More Logging
  • Loading branch information
jonfairbanks authored Feb 29, 2024
2 parents 4b6f623 + 5ef2918 commit 86045bb
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 55 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ llama-index-llms-ollama = "*"
llama-index-embeddings-huggingface = "*"
pycryptodome = "*"
nbconvert = "*"
pyexiftool = "*"

[dev-packages]

Expand Down
97 changes: 53 additions & 44 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion components/page_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,10 @@ def set_page_config():
page_title="Local RAG",
page_icon="📚",
layout="wide",
initial_sidebar_state="expanded",
initial_sidebar_state=st.session_state['sidebar_state'],
menu_items={
'Get Help': 'https://github.com/jonfairbanks/local-rag/discussions',
'Report a bug': "https://github.com/jonfairbanks/local-rag/issues",
'About': None
}
)
3 changes: 3 additions & 0 deletions components/page_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ def set_initial_state():
# General #
###########

if 'sidebar_state' not in st.session_state:
st.session_state['sidebar_state'] = 'expanded'

if "ollama_endpoint" not in st.session_state:
st.session_state["ollama_endpoint"] = "http://localhost:11434"

Expand Down
4 changes: 0 additions & 4 deletions components/tabs/github_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,15 @@ def github_repo():
hf_embedding_model = None

if embedding_model == None:
# logs.log.info("No embedding model set; using defaults...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Default (bge-large-en-v1.5)":
# logs.log.info("Using default embedding model...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
# logs.log.info("Using the Salesforce embedding model; RIP yer VRAM...")
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"

if embedding_model == "Other":
# logs.log.info("Using a user-provided embedding model...")
hf_embedding_model = st.session_state["other_embedding_model"]

try:
Expand Down
5 changes: 2 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ def generate_welcome_message(msg):
time.sleep(0.025) # This is blocking :(
yield char

### Setup Initial State
set_initial_state()

### Page Setup
set_page_config()
set_page_header()

### Setup Initial State
set_initial_state()

for msg in st.session_state["messages"]:
st.chat_message(msg["role"]).write(msg["content"])
# st.chat_message(msg["role"]).write_stream(generate_welcome_message(msg['content']))
Expand Down
24 changes: 23 additions & 1 deletion utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
import json
import requests
import subprocess

import streamlit as st

from exiftool import ExifToolHelper

import utils.logs as logs

###################################
Expand All @@ -24,10 +27,12 @@ def save_uploaded_file(uploaded_file: bytes, save_dir: str):
try:
if not os.path.exists(save_dir):
os.makedirs(save_dir)
logs.log.info(f"Directory {save_dir} did not exist so creating it")
with open(os.path.join(save_dir, uploaded_file.name), "wb") as f:
f.write(uploaded_file.getbuffer())
logs.log.info(f"Upload {uploaded_file.name} saved to disk")
except Exception as e:
logs.log.info(f"Error saving upload to disk: {e}")
logs.log.error(f"Error saving upload to disk: {e}")


###################################
Expand Down Expand Up @@ -75,3 +80,20 @@ def clone_github_repo(repo: str):
else:
Exception(f"Failed to process GitHub repo {st.session_state['github_repo']}")
return False


###################################
#
# Extract File Metadata
#
###################################


def get_file_metadata(file_path):
"""Returns a dictionary containing various metadata for the specified file."""
try:
with ExifToolHelper() as et:
for d in et.get_metadata(file_path):
return json.dumps(d, indent=2)
except Exception:
pass
10 changes: 8 additions & 2 deletions utils/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def create_service_context(
chunk_size=int(chunk_size),
# chunk_overlap=int(chunk_overlap),
)
logs.log.info(f"Service Context created successfully")
st.session_state["service_context"] = service_context
# Note: this may be redundant since service_context is returned
set_global_service_context(service_context)
Expand Down Expand Up @@ -83,15 +84,16 @@ def load_documents(data_dir: str):
try:
files = SimpleDirectoryReader(input_dir=data_dir, recursive=True)
documents = files.load_data(files)
# logs.log.info(f"Loaded {len(documents):,} documents")
logs.log.info(f"Loaded {len(documents):,} documents from files")
return documents
except Exception as err:
logs.log.error(f"Error creating data index: {err}")
return None
finally:
for file in os.scandir(data_dir):
if file.is_file() and not file.name.startswith("."):
if file.is_file() and not file.name.startswith(".gitkeep"): # TODO: Confirm syntax here
os.remove(file.path)
logs.log.info(f"Document loading complete; removing local file(s)")


###################################
Expand Down Expand Up @@ -126,12 +128,16 @@ def create_query_engine(_documents, _service_context):
documents=_documents, service_context=_service_context, show_progress=True
)

logs.log.info("Index created from loaded documents successfully")

query_engine = index.as_query_engine(
similarity_top_k=st.session_state["top_k"],
service_context=_service_context,
streaming=True,
)

logs.log.info("Query Engine created successfully")

st.session_state["query_engine"] = query_engine

return query_engine
Expand Down
Loading

0 comments on commit 86045bb

Please sign in to comment.