Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More Logging #20

Merged
merged 3 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ llama-index-llms-ollama = "*"
llama-index-embeddings-huggingface = "*"
pycryptodome = "*"
nbconvert = "*"
pyexiftool = "*"

[dev-packages]

Expand Down
97 changes: 53 additions & 44 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion components/page_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,10 @@ def set_page_config():
page_title="Local RAG",
page_icon="📚",
layout="wide",
initial_sidebar_state="expanded",
initial_sidebar_state=st.session_state['sidebar_state'],
menu_items={
'Get Help': 'https://github.com/jonfairbanks/local-rag/discussions',
'Report a bug': "https://github.com/jonfairbanks/local-rag/issues",
'About': None
}
)
3 changes: 3 additions & 0 deletions components/page_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ def set_initial_state():
# General #
###########

if 'sidebar_state' not in st.session_state:
st.session_state['sidebar_state'] = 'expanded'

if "ollama_endpoint" not in st.session_state:
st.session_state["ollama_endpoint"] = "http://localhost:11434"

Expand Down
4 changes: 0 additions & 4 deletions components/tabs/github_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,15 @@ def github_repo():
hf_embedding_model = None

if embedding_model == None:
# logs.log.info("No embedding model set; using defaults...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Default (bge-large-en-v1.5)":
# logs.log.info("Using default embedding model...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
# logs.log.info("Using the Salesforce embedding model; RIP yer VRAM...")
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"

if embedding_model == "Other":
# logs.log.info("Using a user-provided embedding model...")
hf_embedding_model = st.session_state["other_embedding_model"]

try:
Expand Down
5 changes: 2 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ def generate_welcome_message(msg):
time.sleep(0.025) # This is blocking :(
yield char

### Setup Initial State
set_initial_state()

### Page Setup
set_page_config()
set_page_header()

### Setup Initial State
set_initial_state()

for msg in st.session_state["messages"]:
st.chat_message(msg["role"]).write(msg["content"])
# st.chat_message(msg["role"]).write_stream(generate_welcome_message(msg['content']))
Expand Down
24 changes: 23 additions & 1 deletion utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
import json
import requests
import subprocess

import streamlit as st

from exiftool import ExifToolHelper

import utils.logs as logs

###################################
Expand All @@ -24,10 +27,12 @@ def save_uploaded_file(uploaded_file: bytes, save_dir: str):
try:
if not os.path.exists(save_dir):
os.makedirs(save_dir)
logs.log.info(f"Directory {save_dir} did not exist so creating it")
with open(os.path.join(save_dir, uploaded_file.name), "wb") as f:
f.write(uploaded_file.getbuffer())
logs.log.info(f"Upload {uploaded_file.name} saved to disk")
except Exception as e:
logs.log.info(f"Error saving upload to disk: {e}")
logs.log.error(f"Error saving upload to disk: {e}")


###################################
Expand Down Expand Up @@ -75,3 +80,20 @@ def clone_github_repo(repo: str):
else:
Exception(f"Failed to process GitHub repo {st.session_state['github_repo']}")
return False


###################################
#
# Extract File Metadata
#
###################################


def get_file_metadata(file_path):
"""Returns a dictionary containing various metadata for the specified file."""
try:
with ExifToolHelper() as et:
for d in et.get_metadata(file_path):
return json.dumps(d, indent=2)
except Exception:
pass
10 changes: 8 additions & 2 deletions utils/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def create_service_context(
chunk_size=int(chunk_size),
# chunk_overlap=int(chunk_overlap),
)
logs.log.info(f"Service Context created successfully")
st.session_state["service_context"] = service_context
# Note: this may be redundant since service_context is returned
set_global_service_context(service_context)
Expand Down Expand Up @@ -83,15 +84,16 @@ def load_documents(data_dir: str):
try:
files = SimpleDirectoryReader(input_dir=data_dir, recursive=True)
documents = files.load_data(files)
# logs.log.info(f"Loaded {len(documents):,} documents")
logs.log.info(f"Loaded {len(documents):,} documents from files")
return documents
except Exception as err:
logs.log.error(f"Error creating data index: {err}")
return None
finally:
for file in os.scandir(data_dir):
if file.is_file() and not file.name.startswith("."):
if file.is_file() and not file.name.startswith(".gitkeep"): # TODO: Confirm syntax here
os.remove(file.path)
logs.log.info(f"Document loading complete; removing local file(s)")


###################################
Expand Down Expand Up @@ -126,12 +128,16 @@ def create_query_engine(_documents, _service_context):
documents=_documents, service_context=_service_context, show_progress=True
)

logs.log.info("Index created from loaded documents successfully")

query_engine = index.as_query_engine(
similarity_top_k=st.session_state["top_k"],
service_context=_service_context,
streaming=True,
)

logs.log.info("Query Engine created successfully")

st.session_state["query_engine"] = query_engine

return query_engine
Expand Down
Loading