diff --git a/.gitignore b/.gitignore index e1948427..21d4e6e0 100644 Binary files a/.gitignore and b/.gitignore differ diff --git a/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py b/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py index 0ea424d7..a1738798 100644 --- a/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py +++ b/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py @@ -259,7 +259,7 @@ def run_geval(transcript: str, summary: str, api_key: str, api_name: str = None, def create_geval_tab(): - with gr.Tab("G-Eval"): + with gr.Tab("G-Eval", id="g-eval"): gr.Markdown("# G-Eval Summarization Evaluation") with gr.Row(): with gr.Column(): diff --git a/App_Function_Libraries/DB/RAG_QA_Chat_DB.py b/App_Function_Libraries/DB/RAG_QA_Chat_DB.py new file mode 100644 index 00000000..4e93789d --- /dev/null +++ b/App_Function_Libraries/DB/RAG_QA_Chat_DB.py @@ -0,0 +1,484 @@ +# RAG_QA_Chat_DB.py +# Description: This file contains the database operations for the RAG QA Chat + Notes system. +# +# Imports +import configparser +import logging +import re +import sqlite3 +import uuid +from contextlib import contextmanager +from datetime import datetime + +from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_database_path + +# +# External Imports +# +# Local Imports +# +######################################################################################################################## +# +# Functions: +# Construct the path to the config file +config_path = get_project_relative_path('Config_Files/config.txt') + +# Read the config file +config = configparser.ConfigParser() +config.read(config_path) + +# Get the SQLite path from the config, or use the default if not specified +if config.has_section('Database') and config.has_option('Database', 'rag_qa_db_path'): + rag_qa_db_path = config.get('Database', 'rag_qa_db_path') +else: + rag_qa_db_path = get_database_path('RAG_QA_Chat.db') + +print(f"RAG QA Chat Database path: {rag_qa_db_path}") + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Database schema +SCHEMA_SQL = ''' +-- Table for storing chat messages +CREATE TABLE IF NOT EXISTS rag_qa_chats ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + conversation_id TEXT NOT NULL, + timestamp DATETIME NOT NULL, + role TEXT NOT NULL, + content TEXT NOT NULL +); + +-- Table for storing conversation metadata +CREATE TABLE IF NOT EXISTS conversation_metadata ( + conversation_id TEXT PRIMARY KEY, + created_at DATETIME NOT NULL, + last_updated DATETIME NOT NULL, + title TEXT NOT NULL +); + +-- Table for storing keywords +CREATE TABLE IF NOT EXISTS rag_qa_keywords ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + keyword TEXT NOT NULL UNIQUE +); + +-- Table for linking keywords to conversations +CREATE TABLE IF NOT EXISTS rag_qa_conversation_keywords ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + conversation_id TEXT NOT NULL, + keyword_id INTEGER NOT NULL, + FOREIGN KEY (conversation_id) REFERENCES conversation_metadata(conversation_id), + FOREIGN KEY (keyword_id) REFERENCES rag_qa_keywords(id) +); + +-- Table for storing keyword collections +CREATE TABLE IF NOT EXISTS rag_qa_keyword_collections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + parent_id INTEGER, + FOREIGN KEY (parent_id) REFERENCES rag_qa_keyword_collections(id) +); + +-- Table for linking keywords to collections +CREATE TABLE IF NOT EXISTS rag_qa_collection_keywords ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + collection_id INTEGER NOT NULL, + keyword_id INTEGER NOT NULL, + FOREIGN KEY (collection_id) REFERENCES rag_qa_keyword_collections(id), + FOREIGN KEY (keyword_id) REFERENCES rag_qa_keywords(id) +); + +-- Table for storing notes +CREATE TABLE IF NOT EXISTS rag_qa_notes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + conversation_id TEXT NOT NULL, + content TEXT NOT NULL, + timestamp DATETIME NOT NULL, + FOREIGN KEY (conversation_id) REFERENCES conversation_metadata(conversation_id) +); + +-- Table for linking notes to keywords +CREATE TABLE IF NOT EXISTS rag_qa_note_keywords ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + note_id INTEGER NOT NULL, + keyword_id INTEGER NOT NULL, + FOREIGN KEY (note_id) REFERENCES rag_qa_notes(id), + FOREIGN KEY (keyword_id) REFERENCES rag_qa_keywords(id) +); + +-- Indexes for improved query performance +CREATE INDEX IF NOT EXISTS idx_rag_qa_chats_conversation_id ON rag_qa_chats(conversation_id); +CREATE INDEX IF NOT EXISTS idx_rag_qa_chats_timestamp ON rag_qa_chats(timestamp); +CREATE INDEX IF NOT EXISTS idx_rag_qa_keywords_keyword ON rag_qa_keywords(keyword); +CREATE INDEX IF NOT EXISTS idx_rag_qa_conversation_keywords_conversation_id ON rag_qa_conversation_keywords(conversation_id); +CREATE INDEX IF NOT EXISTS idx_rag_qa_conversation_keywords_keyword_id ON rag_qa_conversation_keywords(keyword_id); +CREATE INDEX IF NOT EXISTS idx_rag_qa_keyword_collections_parent_id ON rag_qa_keyword_collections(parent_id); +CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_collection_id ON rag_qa_collection_keywords(collection_id); +CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_keyword_id ON rag_qa_collection_keywords(keyword_id); + +-- Full-text search virtual table for chat content +CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_chats_fts USING fts5(conversation_id, timestamp, role, content); + +-- Trigger to keep the FTS table up to date +CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ai AFTER INSERT ON rag_qa_chats BEGIN + INSERT INTO rag_qa_chats_fts(conversation_id, timestamp, role, content) VALUES (new.conversation_id, new.timestamp, new.role, new.content); +END; +''' + +# Database connection management +@contextmanager +def get_db_connection(): + conn = sqlite3.connect(rag_qa_db_path) + try: + yield conn + finally: + conn.close() + +@contextmanager +def transaction(): + with get_db_connection() as conn: + try: + yield conn + conn.commit() + except Exception: + conn.rollback() + raise + +def execute_query(query, params=None, conn=None): + if conn: + cursor = conn.cursor() + if params: + cursor.execute(query, params) + else: + cursor.execute(query) + return cursor.fetchall() + else: + with get_db_connection() as conn: + cursor = conn.cursor() + if params: + cursor.execute(query, params) + else: + cursor.execute(query) + conn.commit() + return cursor.fetchall() + +def create_tables(): + with get_db_connection() as conn: + conn.executescript(SCHEMA_SQL) + logger.info("All RAG QA Chat tables created successfully") + +# Initialize the database +create_tables() + +# Input validation +def validate_keyword(keyword): + if not isinstance(keyword, str): + raise ValueError("Keyword must be a string") + if not keyword.strip(): + raise ValueError("Keyword cannot be empty or just whitespace") + if len(keyword) > 100: + raise ValueError("Keyword is too long (max 100 characters)") + if not re.match(r'^[a-zA-Z0-9\s\-_]+$', keyword): + raise ValueError("Keyword contains invalid characters") + return keyword.strip() + +def validate_collection_name(name): + if not isinstance(name, str): + raise ValueError("Collection name must be a string") + if not name.strip(): + raise ValueError("Collection name cannot be empty or just whitespace") + if len(name) > 100: + raise ValueError("Collection name is too long (max 100 characters)") + if not re.match(r'^[a-zA-Z0-9\s\-_]+$', name): + raise ValueError("Collection name contains invalid characters") + return name.strip() + +# Core functions +def add_keyword(keyword, conn=None): + try: + validated_keyword = validate_keyword(keyword) + query = "INSERT OR IGNORE INTO rag_qa_keywords (keyword) VALUES (?)" + execute_query(query, (validated_keyword,), conn) + logger.info(f"Keyword '{validated_keyword}' added successfully") + except ValueError as e: + logger.error(f"Invalid keyword: {e}") + raise + except Exception as e: + logger.error(f"Error adding keyword '{keyword}': {e}") + raise + +def create_keyword_collection(name, parent_id=None): + try: + validated_name = validate_collection_name(name) + query = "INSERT INTO rag_qa_keyword_collections (name, parent_id) VALUES (?, ?)" + execute_query(query, (validated_name, parent_id)) + logger.info(f"Keyword collection '{validated_name}' created successfully") + except ValueError as e: + logger.error(f"Invalid collection name: {e}") + raise + except Exception as e: + logger.error(f"Error creating keyword collection '{name}': {e}") + raise + +def add_keyword_to_collection(collection_name, keyword): + try: + validated_collection_name = validate_collection_name(collection_name) + validated_keyword = validate_keyword(keyword) + + with transaction() as conn: + add_keyword(validated_keyword, conn) + + query = ''' + INSERT INTO rag_qa_collection_keywords (collection_id, keyword_id) + SELECT c.id, k.id + FROM rag_qa_keyword_collections c, rag_qa_keywords k + WHERE c.name = ? AND k.keyword = ? + ''' + execute_query(query, (validated_collection_name, validated_keyword), conn) + + logger.info(f"Keyword '{validated_keyword}' added to collection '{validated_collection_name}' successfully") + except ValueError as e: + logger.error(f"Invalid input: {e}") + raise + except Exception as e: + logger.error(f"Error adding keyword '{keyword}' to collection '{collection_name}': {e}") + raise + +def add_keywords_to_conversation(conversation_id, keywords): + if not isinstance(keywords, (list, tuple)): + raise ValueError("Keywords must be a list or tuple") + try: + with transaction() as conn: + for keyword in keywords: + validated_keyword = validate_keyword(keyword) + add_keyword(validated_keyword, conn) + + query = ''' + INSERT INTO rag_qa_conversation_keywords (conversation_id, keyword_id) + SELECT ?, id FROM rag_qa_keywords WHERE keyword = ? + ''' + execute_query(query, (conversation_id, validated_keyword), conn) + + logger.info(f"Keywords added to conversation '{conversation_id}' successfully") + except ValueError as e: + logger.error(f"Invalid keyword: {e}") + raise + except Exception as e: + logger.error(f"Error adding keywords to conversation '{conversation_id}': {e}") + raise + +def get_keywords_for_conversation(conversation_id): + try: + query = ''' + SELECT k.keyword + FROM rag_qa_keywords k + JOIN rag_qa_conversation_keywords ck ON k.id = ck.keyword_id + WHERE ck.conversation_id = ? + ''' + result = execute_query(query, (conversation_id,)) + keywords = [row[0] for row in result] + logger.info(f"Retrieved {len(keywords)} keywords for conversation '{conversation_id}'") + return keywords + except Exception as e: + logger.error(f"Error getting keywords for conversation '{conversation_id}': {e}") + raise + +def get_keywords_for_collection(collection_name): + try: + query = ''' + SELECT k.keyword + FROM rag_qa_keywords k + JOIN rag_qa_collection_keywords ck ON k.id = ck.keyword_id + JOIN rag_qa_keyword_collections c ON ck.collection_id = c.id + WHERE c.name = ? + ''' + result = execute_query(query, (collection_name,)) + keywords = [row[0] for row in result] + logger.info(f"Retrieved {len(keywords)} keywords for collection '{collection_name}'") + return keywords + except Exception as e: + logger.error(f"Error getting keywords for collection '{collection_name}': {e}") + raise + +def save_notes(conversation_id, content): + """Save notes to the database.""" + try: + query = "INSERT INTO rag_qa_notes (conversation_id, content, timestamp) VALUES (?, ?, ?)" + timestamp = datetime.now().isoformat() + execute_query(query, (conversation_id, content, timestamp)) + logger.info(f"Notes saved for conversation '{conversation_id}'") + except Exception as e: + logger.error(f"Error saving notes for conversation '{conversation_id}': {e}") + raise + +def get_notes(conversation_id): + """Retrieve notes for a given conversation.""" + try: + query = "SELECT content FROM rag_qa_notes WHERE conversation_id = ?" + result = execute_query(query, (conversation_id,)) + notes = [row[0] for row in result] + logger.info(f"Retrieved {len(notes)} notes for conversation '{conversation_id}'") + return notes + except Exception as e: + logger.error(f"Error getting notes for conversation '{conversation_id}': {e}") + raise + +def clear_notes(conversation_id): + """Clear all notes for a given conversation.""" + try: + query = "DELETE FROM rag_qa_notes WHERE conversation_id = ?" + execute_query(query, (conversation_id,)) + logger.info(f"Cleared notes for conversation '{conversation_id}'") + except Exception as e: + logger.error(f"Error clearing notes for conversation '{conversation_id}': {e}") + raise + +def add_keywords_to_note(note_id, keywords): + """Associate keywords with a note.""" + try: + with transaction() as conn: + for keyword in keywords: + validated_keyword = validate_keyword(keyword) + add_keyword(validated_keyword, conn) + + # Retrieve the keyword ID + query = "SELECT id FROM rag_qa_keywords WHERE keyword = ?" + result = execute_query(query, (validated_keyword,), conn) + if result: + keyword_id = result[0][0] + else: + raise Exception(f"Keyword '{validated_keyword}' not found after insertion") + + # Link the note and keyword + query = "INSERT INTO rag_qa_note_keywords (note_id, keyword_id) VALUES (?, ?)" + execute_query(query, (note_id, keyword_id), conn) + + logger.info(f"Keywords added to note ID '{note_id}' successfully") + except Exception as e: + logger.error(f"Error adding keywords to note ID '{note_id}': {e}") + raise + +def get_keywords_for_note(note_id): + """Retrieve keywords associated with a given note.""" + try: + query = ''' + SELECT k.keyword + FROM rag_qa_keywords k + JOIN rag_qa_note_keywords nk ON k.id = nk.keyword_id + WHERE nk.note_id = ? + ''' + result = execute_query(query, (note_id,)) + keywords = [row[0] for row in result] + logger.info(f"Retrieved {len(keywords)} keywords for note ID '{note_id}'") + return keywords + except Exception as e: + logger.error(f"Error getting keywords for note ID '{note_id}': {e}") + raise + +def clear_keywords_from_note(note_id): + """Clear all keywords from a given note.""" + try: + query = "DELETE FROM rag_qa_note_keywords WHERE note_id = ?" + execute_query(query, (note_id,)) + logger.info(f"Cleared keywords for note ID '{note_id}'") + except Exception as e: + logger.error(f"Error clearing keywords for note ID '{note_id}': {e}") + raise + +def save_message(conversation_id, role, content): + try: + timestamp = datetime.now().isoformat() + query = "INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content) VALUES (?, ?, ?, ?)" + execute_query(query, (conversation_id, timestamp, role, content)) + + # Update last_updated in conversation_metadata + update_query = "UPDATE conversation_metadata SET last_updated = ? WHERE conversation_id = ?" + execute_query(update_query, (timestamp, conversation_id)) + + logger.info(f"Message saved for conversation '{conversation_id}'") + except Exception as e: + logger.error(f"Error saving message for conversation '{conversation_id}': {e}") + raise + +def start_new_conversation(title="Untitled Conversation"): + try: + conversation_id = str(uuid.uuid4()) + query = "INSERT INTO conversation_metadata (conversation_id, created_at, last_updated, title) VALUES (?, ?, ?, ?)" + now = datetime.now().isoformat() + execute_query(query, (conversation_id, now, now, title)) + logger.info(f"New conversation '{conversation_id}' started with title '{title}'") + return conversation_id + except Exception as e: + logger.error(f"Error starting new conversation: {e}") + raise + +# Pagination helper function +def get_paginated_results(query, params=None, page=1, page_size=20): + try: + offset = (page - 1) * page_size + paginated_query = f"{query} LIMIT ? OFFSET ?" + if params: + paginated_params = params + (page_size, offset) + else: + paginated_params = (page_size, offset) + + result = execute_query(paginated_query, paginated_params) + + count_query = f"SELECT COUNT(*) FROM ({query}) AS total" + count_params = params if params else () + + total_count = execute_query(count_query, count_params)[0][0] + + total_pages = (total_count + page_size - 1) // page_size + + logger.info(f"Retrieved page {page} of {total_pages} (total items: {total_count})") + return result, total_pages, total_count + except Exception as e: + logger.error(f"Error retrieving paginated results: {e}") + raise + +def get_all_collections(page=1, page_size=20): + try: + query = "SELECT name FROM rag_qa_keyword_collections" + results, total_pages, total_count = get_paginated_results(query, page=page, page_size=page_size) + collections = [row[0] for row in results] + logger.info(f"Retrieved {len(collections)} keyword collections (page {page} of {total_pages})") + return collections, total_pages, total_count + except Exception as e: + logger.error(f"Error getting collections: {e}") + raise + +def search_conversations_by_keywords(keywords, page=1, page_size=20): + try: + placeholders = ','.join(['?' for _ in keywords]) + query = f''' + SELECT DISTINCT cm.conversation_id, cm.title + FROM conversation_metadata cm + JOIN rag_qa_conversation_keywords ck ON cm.conversation_id = ck.conversation_id + JOIN rag_qa_keywords k ON ck.keyword_id = k.id + WHERE k.keyword IN ({placeholders}) + ''' + results, total_pages, total_count = get_paginated_results(query, tuple(keywords), page, page_size) + logger.info( + f"Found {total_count} conversations matching keywords: {', '.join(keywords)} (page {page} of {total_pages})") + return results, total_pages, total_count + except Exception as e: + logger.error(f"Error searching conversations by keywords {keywords}: {e}") + raise + +def load_chat_history(conversation_id, page=1, page_size=50): + try: + query = "SELECT role, content FROM rag_qa_chats WHERE conversation_id = ? ORDER BY timestamp" + results, total_pages, total_count = get_paginated_results(query, (conversation_id,), page, page_size) + logger.info( + f"Loaded {len(results)} messages for conversation '{conversation_id}' (page {page} of {total_pages})") + return results, total_pages, total_count + except Exception as e: + logger.error(f"Error loading chat history for conversation '{conversation_id}': {e}") + raise + +# +# End of RAG_QA_Chat_DB.py +#################################################################################################### diff --git a/App_Function_Libraries/Gradio_Related.py b/App_Function_Libraries/Gradio_Related.py index 91ec99d2..7f6243f0 100644 --- a/App_Function_Libraries/Gradio_Related.py +++ b/App_Function_Libraries/Gradio_Related.py @@ -19,8 +19,7 @@ from App_Function_Libraries.Gradio_UI.Arxiv_tab import create_arxiv_tab from App_Function_Libraries.Gradio_UI.Audio_ingestion_tab import create_audio_processing_tab from App_Function_Libraries.Gradio_UI.Book_Ingestion_tab import create_import_book_tab -from App_Function_Libraries.Gradio_UI.Character_Chat_tab import create_character_card_interaction_tab, \ - create_character_card_interaction_tab, create_character_chat_mgmt_tab, create_custom_character_card_tab, \ +from App_Function_Libraries.Gradio_UI.Character_Chat_tab import create_character_card_interaction_tab, create_character_chat_mgmt_tab, create_custom_character_card_tab, \ create_character_card_validation_tab, create_export_characters_tab from App_Function_Libraries.Gradio_UI.Character_interaction_tab import create_narrator_controlled_conversation_tab, \ create_multiple_character_chat_tab @@ -68,6 +67,7 @@ from App_Function_Libraries.Gradio_UI.Evaluations_Benchmarks_tab import create_geval_tab, create_infinite_bench_tab #from App_Function_Libraries.Local_LLM.Local_LLM_huggingface import create_huggingface_tab from App_Function_Libraries.Local_LLM.Local_LLM_ollama import create_ollama_tab +from App_Function_Libraries.Gradio_UI.RAG_QA_Chat_Notes import create_rag_qa_chat_notes_tab # ####################################################################################################################### @@ -270,7 +270,7 @@ def launch_ui(share_public=None, server_mode=False): gr.Markdown(f"# tl/dw: Your LLM-powered Research Multi-tool") gr.Markdown(f"(Using {db_type.capitalize()} Database)") with gr.Tabs(): - with gr.TabItem("Transcription / Summarization / Ingestion"): + with gr.TabItem("Transcription / Summarization / Ingestion", id="ingestion-grouping"): with gr.Tabs(): create_video_transcription_tab() create_audio_processing_tab() @@ -285,17 +285,18 @@ def launch_ui(share_public=None, server_mode=False): create_live_recording_tab() create_arxiv_tab() - with gr.TabItem("Text Search "): + with gr.TabItem("Text Search", id="text search"): create_search_tab() create_search_summaries_tab() + with gr.TabItem("RAG Chat+Notes", id="RAG Chat Notes group"): + create_rag_qa_chat_notes_tab() - with gr.TabItem("RAG Search"): + with gr.TabItem("RAG Search", id="RAG Search grou"): create_rag_tab() create_rag_qa_chat_tab() - - with gr.TabItem("Chat with an LLM"): + with gr.TabItem("Chat with an LLM", id="LLM Chat group"): create_chat_interface() create_chat_interface_stacked() create_chat_interface_multi_api() @@ -305,7 +306,7 @@ def launch_ui(share_public=None, server_mode=False): chat_workflows_tab() - with gr.TabItem("Character Chat"): + with gr.TabItem("Character Chat", id="character chat group"): with gr.Tabs(): create_character_card_interaction_tab() create_character_chat_mgmt_tab() @@ -316,7 +317,7 @@ def launch_ui(share_public=None, server_mode=False): create_export_characters_tab() - with gr.TabItem("View DB Items"): + with gr.TabItem("View DB Items", id="view db items group"): # This one works create_view_all_with_versions_tab() # This one is WIP @@ -324,7 +325,7 @@ def launch_ui(share_public=None, server_mode=False): create_prompt_view_tab() - with gr.TabItem("Prompts"): + with gr.TabItem("Prompts", id='view prompts group'): create_prompt_view_tab() create_prompt_search_tab() create_prompt_edit_tab() @@ -332,7 +333,7 @@ def launch_ui(share_public=None, server_mode=False): create_prompt_suggestion_tab() - with gr.TabItem("Manage / Edit Existing Items"): + with gr.TabItem("Manage / Edit Existing Items", id="manage group"): create_media_edit_tab() create_manage_items_tab() create_media_edit_and_clone_tab() @@ -340,32 +341,31 @@ def launch_ui(share_public=None, server_mode=False): #create_compare_transcripts_tab() - with gr.TabItem("Embeddings Management"): + with gr.TabItem("Embeddings Management", id="embeddings group"): create_embeddings_tab() create_view_embeddings_tab() create_purge_embeddings_tab() - with gr.TabItem("Writing Tools"): - with gr.Tabs(): - from App_Function_Libraries.Gradio_UI.Writing_tab import create_document_feedback_tab - create_document_feedback_tab() - from App_Function_Libraries.Gradio_UI.Writing_tab import create_grammar_style_check_tab - create_grammar_style_check_tab() - from App_Function_Libraries.Gradio_UI.Writing_tab import create_tone_adjustment_tab - create_tone_adjustment_tab() - from App_Function_Libraries.Gradio_UI.Writing_tab import create_creative_writing_tab - create_creative_writing_tab() - from App_Function_Libraries.Gradio_UI.Writing_tab import create_mikupad_tab - create_mikupad_tab() - - - with gr.TabItem("Keywords"): + with gr.TabItem("Writing Tools", id="writing_tools group"): + from App_Function_Libraries.Gradio_UI.Writing_tab import create_document_feedback_tab + create_document_feedback_tab() + from App_Function_Libraries.Gradio_UI.Writing_tab import create_grammar_style_check_tab + create_grammar_style_check_tab() + from App_Function_Libraries.Gradio_UI.Writing_tab import create_tone_adjustment_tab + create_tone_adjustment_tab() + from App_Function_Libraries.Gradio_UI.Writing_tab import create_creative_writing_tab + create_creative_writing_tab() + from App_Function_Libraries.Gradio_UI.Writing_tab import create_mikupad_tab + create_mikupad_tab() + + + with gr.TabItem("Keywords", id="keywords group"): create_view_keywords_tab() create_add_keyword_tab() create_delete_keyword_tab() create_export_keywords_tab() - with gr.TabItem("Import"): + with gr.TabItem("Import", id="import group"): create_import_item_tab() create_import_obsidian_vault_tab() create_import_single_prompt_tab() @@ -373,40 +373,40 @@ def launch_ui(share_public=None, server_mode=False): create_mediawiki_import_tab() create_mediawiki_config_tab() - with gr.TabItem("Export"): + with gr.TabItem("Export", id="export group"): create_export_tab() - with gr.TabItem("Backup Management"): + with gr.TabItem("Backup Management", id="backup group"): create_backup_tab() create_view_backups_tab() create_restore_backup_tab() - with gr.TabItem("Utilities"): + with gr.TabItem("Utilities", id="util group"): create_utilities_yt_video_tab() create_utilities_yt_audio_tab() create_utilities_yt_timestamp_tab() - with gr.TabItem("Local LLM"): + with gr.TabItem("Local LLM", id="local llm group"): create_chat_with_llamafile_tab() create_ollama_tab() #create_huggingface_tab() - with gr.TabItem("Trashcan"): + with gr.TabItem("Trashcan", id="trashcan group"): create_search_and_mark_trash_tab() create_view_trash_tab() create_delete_trash_tab() create_empty_trash_tab() - with gr.TabItem("Evaluations"): + with gr.TabItem("Evaluations", id="eval"): create_geval_tab() create_infinite_bench_tab() # FIXME #create_mmlu_pro_tab() - with gr.TabItem("Introduction/Help"): + with gr.TabItem("Introduction/Help", id="introduction group"): create_introduction_tab() - with gr.TabItem("Config Editor"): + with gr.TabItem("Config Editor", id="config group"): create_config_editor_tab() # Launch the interface diff --git a/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_Notes.py b/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_Notes.py new file mode 100644 index 00000000..de03932d --- /dev/null +++ b/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_Notes.py @@ -0,0 +1,243 @@ +# RAG_QA_Chat_Notes.py +# Description: This file contains the code for the RAG QA Chat Notes tab in the RAG QA Chat application. +# +# Imports +import logging +# External Imports +import gradio as gr +# +# Local Imports +from App_Function_Libraries.DB.RAG_QA_Chat_DB import save_message, add_keywords_to_conversation, \ + search_conversations_by_keywords, load_chat_history, save_notes, get_notes, clear_notes, \ + add_keywords_to_note, execute_query, start_new_conversation +from App_Function_Libraries.RAG.RAG_QA_Chat import rag_qa_chat +# +#################################################################################################### +# +# Functions +def create_rag_qa_chat_notes_tab(): + with gr.TabItem("RAG QA Chat"): + gr.Markdown("# RAG QA Chat") + + state = gr.State({ + "conversation_id": None, + "page": 1, + "context_source": "Entire Media Database", + }) + + with gr.Row(): + with gr.Column(scale=1): + context_source = gr.Radio( + ["Entire Media Database", "Search Database", "Upload File"], + label="Context Source", + value="Entire Media Database" + ) + existing_file = gr.Dropdown(label="Select Existing File", choices=[], interactive=True) + file_page = gr.State(value=1) + with gr.Row(): + page_number = gr.Number(value=1, label="Page", precision=0) + page_size = gr.Number(value=20, label="Items per page", precision=0) + total_pages = gr.Number(label="Total Pages", interactive=False) + with gr.Row(): + prev_page_btn = gr.Button("Previous Page") + next_page_btn = gr.Button("Next Page") + page_info = gr.HTML("Page 1") + + search_query = gr.Textbox(label="Search Query", visible=False) + search_button = gr.Button("Search", visible=False) + search_results = gr.Dropdown(label="Search Results", choices=[], visible=False) + file_upload = gr.File( + label="Upload File", + visible=False, + file_types=["txt", "pdf", "epub", "md", "rtf", "json", "csv"] + ) + convert_to_text = gr.Checkbox(label="Convert to plain text", visible=False) + keywords = gr.Textbox(label="Keywords (comma-separated)", visible=False) + with gr.Column(scale=1): + api_choice = gr.Dropdown( + choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", + "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"], + label="Select API for RAG", + value="OpenAI" + ) + use_query_rewriting = gr.Checkbox(label="Use Query Rewriting", value=True) + + # FIXME - add load conversations button + load_conversation = gr.Dropdown(label="Load Conversation", choices=[]) + new_conversation = gr.Button("New Conversation") + conversation_title = gr.Textbox(label="Conversation Title", + placeholder="Enter a title for the new conversation") + + with gr.Row(): + with gr.Column(scale=2): + chatbot = gr.Chatbot(height=500) + msg = gr.Textbox(label="Enter your message") + submit = gr.Button("Submit") + clear_chat = gr.Button("Clear Chat History") + + with gr.Column(scale=1): + notes = gr.TextArea(label="Notes", placeholder="Enter your notes here...", lines=20) + keywords_for_notes = gr.Textbox(label="Keywords for Notes (comma-separated)", + placeholder="Enter keywords for the note", visible=True) + save_notes_btn = gr.Button("Save Notes") # Renamed to avoid conflict + clear_notes_btn = gr.Button("Clear Notes") # Renamed to avoid conflict + + loading_indicator = gr.HTML(visible=False) + + def rag_qa_chat_wrapper(message, history, state, context_source, existing_file, search_results, file_upload, + convert_to_text, keywords, api_choice, use_query_rewriting): + try: + conversation_id = state.value["conversation_id"] + if not conversation_id: + conversation_id = start_new_conversation("Untitled Conversation") # Provide a title or handle accordingly + state = update_state(state, conversation_id=conversation_id) + + save_message(conversation_id, 'human', message) + + if keywords: + add_keywords_to_conversation(conversation_id, [kw.strip() for kw in keywords.split(',')]) + + # Implement your actual RAG logic here + response = "response"#rag_qa_chat(message, conversation_id, context_source, existing_file, search_results, + #file_upload, convert_to_text, api_choice, use_query_rewriting) + + save_message(conversation_id, 'ai', response) + + new_history = history + [(message, response)] + + logging.info(f"Successfully processed message for conversation '{conversation_id}'") + return new_history, "", gr.update(visible=False), state + + except Exception as e: + logging.error(f"Error in rag_qa_chat_wrapper: {e}") + gr.Error("An unexpected error occurred. Please try again later.") + return history, "", gr.update(visible=False), state + + def load_conversation_history(selected_conversation_id, page, page_size, state): + if selected_conversation_id: + history, total_pages_val, _ = load_chat_history(selected_conversation_id, page, page_size) + notes_content = get_notes(selected_conversation_id) # Retrieve notes here + updated_state = update_state(state, conversation_id=selected_conversation_id, page=page) + return history, total_pages_val, updated_state, "\n".join(notes_content) + return [], 1, state, "" + + def start_new_conversation_wrapper(title, state): + new_conversation_id = start_new_conversation(title if title else "Untitled Conversation") + return [], update_state(state, conversation_id=new_conversation_id, page=1) + + def update_state(state, **kwargs): + new_state = state.value.copy() + new_state.update(kwargs) + return new_state + + def update_page(direction, current_page, total_pages_val): + new_page = max(1, min(current_page + direction, total_pages_val)) + return new_page + + def update_context_source(choice): + return { + existing_file: gr.update(visible=choice == "Select Existing File"), + prev_page_btn: gr.update(visible=choice == "Search Database"), + next_page_btn: gr.update(visible=choice == "Search Database"), + page_info: gr.update(visible=choice == "Search Database"), + search_query: gr.update(visible=choice == "Search Database"), + search_button: gr.update(visible=choice == "Search Database"), + search_results: gr.update(visible=choice == "Search Database"), + file_upload: gr.update(visible=choice == "Upload File"), + convert_to_text: gr.update(visible=choice == "Upload File"), + keywords: gr.update(visible=choice == "Upload File") + } + + def perform_search(query): + try: + results = search_conversations_by_keywords([kw.strip() for kw in query.split()]) + return gr.update(choices=[f"{title} (ID: {id})" for id, title in results[0]]) + except Exception as e: + logging.error(f"Error performing search: {e}") + gr.Error(f"Error performing search: {str(e)}") + return gr.update(choices=[]) + + def clear_chat_history(): + return [], "" + + def save_notes_function(notes_content, keywords_content): + """Save the notes and associated keywords to the database.""" + conversation_id = state.value["conversation_id"] + if conversation_id and notes_content: + # Save the note + save_notes(conversation_id, notes_content) + + # Get the last inserted note ID + query = "SELECT id FROM rag_qa_notes WHERE conversation_id = ? ORDER BY timestamp DESC LIMIT 1" + note_id = execute_query(query, (conversation_id,))[0][0] + + if keywords_content: + add_keywords_to_note(note_id, [kw.strip() for kw in keywords_content.split(',')]) + + logging.info("Notes and keywords saved successfully!") + return notes_content + else: + logging.warning("No conversation ID or notes to save.") + return "" + + def clear_notes_function(): + """Clear notes for the current conversation.""" + conversation_id = state.value["conversation_id"] + if conversation_id: + clear_notes(conversation_id) + logging.info("Notes cleared successfully!") + return "" + + # Event handlers + submit.click( + rag_qa_chat_wrapper, + inputs=[msg, chatbot, state, context_source, existing_file, search_results, file_upload, + convert_to_text, keywords, api_choice, use_query_rewriting], + outputs=[chatbot, msg, loading_indicator, state] + ) + + load_conversation.change( + load_conversation_history, + inputs=[load_conversation, page_number, page_size, state], + outputs=[chatbot, total_pages, state, notes] + ) + + new_conversation.click( + start_new_conversation_wrapper, + inputs=[conversation_title, state], + outputs=[chatbot, state] + ) + + # Pagination Event handlers + prev_page_btn.click( + lambda current_page, total_pages_val: update_page(-1, current_page, total_pages_val), + inputs=[page_number, total_pages], + outputs=[page_number] + ) + + next_page_btn.click( + lambda current_page, total_pages_val: update_page(1, current_page, total_pages_val), + inputs=[page_number, total_pages], + outputs=[page_number] + ) + + context_source.change(update_context_source, inputs=[context_source], + outputs=[existing_file, prev_page_btn, next_page_btn, page_info, + search_query, search_button, search_results, + file_upload, convert_to_text, keywords]) + + search_button.click(perform_search, inputs=[search_query], outputs=[search_results]) + + clear_chat.click(clear_chat_history, outputs=[chatbot, msg]) + + save_notes_btn.click(save_notes_function, inputs=[notes, keywords_for_notes], outputs=[notes]) + clear_notes_btn.click(clear_notes_function, outputs=[notes]) + + return (context_source, existing_file, search_query, search_button, search_results, file_upload, + convert_to_text, keywords, api_choice, use_query_rewriting, chatbot, msg, submit, clear_chat, + notes, save_notes_btn, clear_notes_btn, load_conversation, new_conversation, conversation_title, + prev_page_btn, next_page_btn, page_number, page_size, total_pages) + +# +# End of RAG_QA_Chat_Notes.py +#################################################################################################### diff --git a/App_Function_Libraries/Gradio_UI/Utilities.py b/App_Function_Libraries/Gradio_UI/Utilities.py index 4a4ba3bd..f87a18fb 100644 --- a/App_Function_Libraries/Gradio_UI/Utilities.py +++ b/App_Function_Libraries/Gradio_UI/Utilities.py @@ -10,7 +10,7 @@ def create_utilities_yt_video_tab(): - with gr.Tab("YouTube Video Downloader"): + with gr.Tab("YouTube Video Downloader", id='youtube_dl'): with gr.Row(): with gr.Column(): gr.Markdown( @@ -28,7 +28,7 @@ def create_utilities_yt_video_tab(): ) def create_utilities_yt_audio_tab(): - with gr.Tab("YouTube Audio Downloader"): + with gr.Tab("YouTube Audio Downloader", id="youtube audio downloader"): with gr.Row(): with gr.Column(): gr.Markdown( @@ -48,7 +48,7 @@ def create_utilities_yt_audio_tab(): ) def create_utilities_yt_timestamp_tab(): - with gr.Tab("YouTube Timestamp URL Generator"): + with gr.Tab("YouTube Timestamp URL Generator", id="timestamp-gen"): gr.Markdown("## Generate YouTube URL with Timestamp") with gr.Row(): with gr.Column(): diff --git a/App_Function_Libraries/Local_LLM/Local_LLM_Inference_Engine_Lib.py b/App_Function_Libraries/Local_LLM/Local_LLM_Inference_Engine_Lib.py index c5533625..0546903b 100644 --- a/App_Function_Libraries/Local_LLM/Local_LLM_Inference_Engine_Lib.py +++ b/App_Function_Libraries/Local_LLM/Local_LLM_Inference_Engine_Lib.py @@ -39,25 +39,25 @@ # LLM models information llm_models = { - "1": { + "Mistral-7B-Instruct-v0.2-Q8.llamafile": { "name": "Mistral-7B-Instruct-v0.2-Q8.llamafile", "url": "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q8_0.llamafile?download=true", "filename": "mistral-7b-instruct-v0.2.Q8_0.llamafile", "hash": "1ee6114517d2f770425c880e5abc443da36b193c82abec8e2885dd7ce3b9bfa6" }, - "2": { + "Samantha-Mistral-Instruct-7B-Bulleted-Notes-Q8.gguf": { "name": "Samantha-Mistral-Instruct-7B-Bulleted-Notes-Q8.gguf", "url": "https://huggingface.co/cognitivetech/samantha-mistral-instruct-7b-bulleted-notes-GGUF/resolve/main/samantha-mistral-instruct-7b-bulleted-notes.Q8_0.gguf?download=true", "filename": "samantha-mistral-instruct-7b-bulleted-notes.Q8_0.gguf", "hash": "6334c1ab56c565afd86535271fab52b03e67a5e31376946bce7bf5c144e847e4" }, - "3": { + "Phi-3-mini-128k-instruct-Q8_0.gguf": { "name": "Phi-3-mini-128k-instruct-Q8_0.gguf", "url": "https://huggingface.co/gaianet/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q8_0.gguf?download=true", "filename": "Phi-3-mini-128k-instruct-Q8_0.gguf", "hash": "6817b66d1c3c59ab06822e9732f0e594eea44e64cae2110906eac9d17f75d193" }, - "4": { + "Meta-Llama-3-8B-Instruct.Q8_0.llamafile": { "name": "Meta-Llama-3-8B-Instruct.Q8_0.llamafile", "url": "https://huggingface.co/Mozilla/Meta-Llama-3-8B-Instruct-llamafile/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.llamafile?download=true", "filename": "Meta-Llama-3-8B-Instruct.Q8_0.llamafile", @@ -286,9 +286,6 @@ def start_llamafile( if numa_checked: command.append('--numa') - if server_timeout_value is not None: - command.extend(['--to', str(server_timeout_value)]) - if host_checked and host_value: command.extend(['--host', host_value]) diff --git a/App_Function_Libraries/RAG/RAG_QA_Chat.py b/App_Function_Libraries/RAG/RAG_QA_Chat.py index 24c60e52..fd3926cd 100644 --- a/App_Function_Libraries/RAG/RAG_QA_Chat.py +++ b/App_Function_Libraries/RAG/RAG_QA_Chat.py @@ -1,5 +1,5 @@ -# Podcast_tab.py -# Description: Gradio UI for ingesting podcasts into the database +# RAG_QA_Chat.py +# Description: Functions supporting the RAG QA Chat functionality # # Imports # diff --git a/Helper_Scripts/Installer_Scripts/Windows_Install_Update.bat b/Helper_Scripts/Installer_Scripts/Windows_Install_Update.bat index 3d545d27..ae02bc0c 100644 --- a/Helper_Scripts/Installer_Scripts/Windows_Install_Update.bat +++ b/Helper_Scripts/Installer_Scripts/Windows_Install_Update.bat @@ -155,6 +155,8 @@ powershell -Command "Expand-Archive -Path 'ffmpeg.zip' -DestinationPath 'ffmpeg' move ffmpeg\ffmpeg-master-latest-win64-gpl\bin\ffmpeg.exe . rmdir /s /q ffmpeg del ffmpeg.zip +mkdir .\Bin +move ffmpeg .\Bin\ goto :eof :cleanup diff --git a/README.md b/README.md index 5223f342..2e79761f 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ - Build up a personal knowledge archive, then turn around and use the LLM to help you learn it at a pace your comfortable with. - Also writing tools! Grammar/Style checker, Tone Analyzer, Writing editor(feedback), and more. - Full Character Chat Support - Create/Edit & Import/Export Character Cards, and chat with them. -#### The original scripts by `the-crypt-keeper` are available here: [scripts here](https://github.com/the-crypt-keeper/tldw/tree/main/tldw-original-scripts) +#### The original scripts by `the-crypt-keeper` for transcribing and summarizing youtube videos are available here: [scripts here](https://github.com/the-crypt-keeper/tldw/tree/main/tldw-original-scripts) ---------- @@ -52,13 +52,16 @@ - **Windows:** `wget https://raw.githubusercontent.com/rmusser01/tldw/main/Helper_Scripts/Installer_Scripts/Windows_Install_Update.bat && wget https://raw.githubusercontent.com/rmusser01/tldw/main/Helper_Scripts/Installer_Scripts/Windows_Run_tldw.bat` - Then double-click the downloaded batch file `Windows_Install_Update.bat` to install it, and `Windows_Run_tldw.bat` to run it. - You should now have a web browser tab opened to `http://127.0.0.1:7860/` with the GUI for the app. + - If you don't have CUDA installed on your system and available in your system path, go here: https://github.com/Purfview/whisper-standalone-win/releases/download/Faster-Whisper-XXL/Faster-Whisper-XXL_r192.3.4_windows.7z + - Extract the two files named `cudnn_ops_infer64_8.dll` and `cudnn_cnn_infer64_8.dll` from the 7z file to the `tldw` directory, and then run the `Windows_Run_tldw.bat` file. + - This will allow you to use the faster whisper models with the app. Otherwise, you won't be able to perform transcription. - **Docker:** - There's a docker build for GPU use(Needs Nvidia CUDA Controller(?): https://github.com/rmusser01/tldw/blob/main/Helper_Scripts/Dockerfiles/tldw-nvidia_amd64_Dockerfile - and plain CPU use: https://github.com/rmusser01/tldw/blob/main/Helper_Scripts/Dockerfiles/tldw_Debian_cpu-Dockerfile - the `Dockerfile` in the main directory is the Nvidia base-image-based one. So you can use your GPU if you want with it. -### What is tl/dw? +### Overview of what tl/dw currenlty is
What is this? - Click-Here @@ -142,7 +145,7 @@ All features are designed to run **locally** on your device, ensuring privacy an ---------- -### What is this (tl/dw)? +### More Detailed explanation of this project (tl/dw)
**What is this Project? (Extended) - Click-Here** diff --git a/requirements.txt b/requirements.txt index caae64a1..50ef360e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,8 @@ faster_whisper fire FlashRank fugashi -gradio +# well fuck gradio. again. +gradio==4.44.1 jieba Jinja2 joblib @@ -28,6 +29,7 @@ pandas Pillow playwright psycopg2 +psutil #psycopg2-binary pyannote.audio PyAudio