Merge pull request #376 from rmusser01/dev

EMBEDDINGS MODEL PATH MOVED FROM 'onnx_models' TO 'models/embedding_models' SO BE AWARE!
rmusser01 · Oct 19, 2024 · af151e1 · af151e1
2 parents 9d0f79c + 5284903
commit af151e1
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 30 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/App_Function_Libraries/RAG/RAG_Library_2.py b/App_Function_Libraries/RAG/RAG_Library_2.py
@@ -117,7 +117,7 @@
 
 
 # RAG Search with keyword filtering
-def enhanced_rag_pipeline(query: str, api_choice: str, keywords: str = None) -> Dict[str, Any]:
+def enhanced_rag_pipeline(query: str, api_choice: str, keywords: str = None, apply_re_ranking=True) -> Dict[str, Any]:
     log_counter("enhanced_rag_pipeline_attempt", labels={"api_choice": api_choice})
     start_time = time.time()
     try:
@@ -150,30 +150,30 @@ def enhanced_rag_pipeline(query: str, api_choice: str, keywords: str = None) ->
         # Combine results
         all_results = vector_results + fts_results
 
-        apply_re_ranking = True
         if apply_re_ranking:
             logging.debug(f"\nenhanced_rag_pipeline - Applying Re-Ranking")
             # FIXME - add option to use re-ranking at call time
             # FIXME - specify model + add param to modify at call time
             # FIXME - add option to set a custom top X results
             # You can specify a model if necessary, e.g., model_name="ms-marco-MiniLM-L-12-v2"
-            ranker = Ranker()
+            if all_results:
+                ranker = Ranker()
 
-            # Prepare passages for re-ranking
-            passages = [{"id": i, "text": result['content']} for i, result in enumerate(all_results)]
-            rerank_request = RerankRequest(query=query, passages=passages)
+                # Prepare passages for re-ranking
+                passages = [{"id": i, "text": result['content']} for i, result in enumerate(all_results)]
+                rerank_request = RerankRequest(query=query, passages=passages)
 
-            # Rerank the results
-            reranked_results = ranker.rerank(rerank_request)
+                # Rerank the results
+                reranked_results = ranker.rerank(rerank_request)
 
-            # Sort results based on the re-ranking score
-            reranked_results = sorted(reranked_results, key=lambda x: x['score'], reverse=True)
+                # Sort results based on the re-ranking score
+                reranked_results = sorted(reranked_results, key=lambda x: x['score'], reverse=True)
 
-            # Log reranked results
-            logging.debug(f"\n\nenhanced_rag_pipeline - Reranked results: {reranked_results}")
+                # Log reranked results
+                logging.debug(f"\n\nenhanced_rag_pipeline - Reranked results: {reranked_results}")
 
-            # Update all_results based on reranking
-            all_results = [all_results[result['id']] for result in reranked_results]
+                # Update all_results based on reranking
+                all_results = [all_results[result['id']] for result in reranked_results]
 
         # Extract content from results (top 10)
         context = "\n".join([result['content'] for result in all_results[:10]])  # Limit to top 10 results

diff --git a/App_Function_Libraries/RAG/RAG_QA_Chat.py b/App_Function_Libraries/RAG/RAG_QA_Chat.py
@@ -20,15 +20,15 @@
 #
 # Functions:
 
-def rag_qa_chat(query, history, context, api_choice):
+def rag_qa_chat(query, history, context, api_choice, keywords=None, apply_re_ranking=False):
     log_counter("rag_qa_chat_attempt", labels={"api_choice": api_choice})
     start_time = time.time()
 
     try:
         if isinstance(context, str):
             log_counter("rag_qa_chat_string_context")
             # Use the answer and context directly from enhanced_rag_pipeline
-            result = enhanced_rag_pipeline(query, api_choice)
+            result = enhanced_rag_pipeline(query, api_choice, keywords, apply_re_ranking)
             answer = result['answer']
         else:
             log_counter("rag_qa_chat_no_context")

diff --git a/Config_Files/config.txt b/Config_Files/config.txt
@@ -8,7 +8,7 @@ groq_model = llama3-70b-8192
 openai_api_key = <openai_api_key>
 openai_model = gpt-4o
 huggingface_api_key = <huggingface_api_token>
-huggingface_model = CohereForAI/c4ai-command-r-plus
+huggingface_model = mistralai/Mistral-Nemo-Instruct-2407
 openrouter_api_key = <openrouter_api_key>
 openrouter_model = mistralai/mistral-7b-instruct:free
 deepseek_api_key = <deepseek-api-key>
@@ -20,20 +20,20 @@ custom_openai_api_ip = <api_ip_here>
 
 [Local-API]
 kobold_api_IP = http://127.0.0.1:5001/api/v1/generate
-kobold_api_key = <kobold api key>
+kobold_api_key =
 llama_api_IP = http://127.0.0.1:8080/completion
-llama_api_key = <llama.cpp api key>
-ooba_api_key = <ooba api key>
+llama_api_key =
+ooba_api_key =
 ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions
 tabby_api_IP = http://127.0.0.1:5000/v1/chat/completions
-tabby_api_key = <tabbyapi key>
+tabby_api_key =
 vllm_api_IP = http://127.0.0.1:8000/v1/chat/completions
-vllm_model = <vllm model>
-ollama_api_IP = http://127.0.0.1:11434/api/generate
-ollama_api_key = <ollama api key>
-ollama_model = <ollama model>
+vllm_model =
+ollama_api_IP = http://127.0.0.1:11434/v1/chat/completions
+ollama_api_key =
+ollama_model = llama3
 aphrodite_api_IP = http://127.0.0.1:8080/completion
-aphrodite_api_key = <aphrodite_api_key>
+aphrodite_api_key =
 
 [Processing]
 processing_choice = cuda
@@ -56,10 +56,13 @@ elasticsearch_port = 9200
 # Additionally you can use elasticsearch as the database type, just replace `sqlite` with `elasticsearch` for `type` and provide the `elasticsearch_host` and `elasticsearch_port` of your configured ES instance.
 chroma_db_path = Databases/chroma_db
 prompts_db_path = Databases/prompts.db
+rag_qa_db_path = Databases/rag_qa.db
 
 [Embeddings]
 embedding_provider = openai
 embedding_model = text-embedding-3-small
+onnx_model_path = ./App_Function_Libraries/models/onnx_models/
+model_dir = ./App_Function_Libraries/models/embedding_models
 embedding_api_url = http://localhost:8080/v1/embeddings
 embedding_api_key = your_api_key_here
 chunk_size = 400
@@ -78,6 +81,14 @@ adaptive = false
 multi_level = false
 language = english
 
+[Metrics]
+log_file_path =
+#os.getenv("tldw_LOG_FILE_PATH", "tldw_app_logs.json")
+max_bytes =
+#int(os.getenv("tldw_LOG_MAX_BYTES", 10 * 1024 * 1024))  # 10 MB
+backup_count = 5
+#int(os.getenv("tldw_LOG_BACKUP_COUNT", 5))
+
 
 #[Comments]
 #OpenAI Models:
@@ -98,7 +109,4 @@ language = english
 #    open-mistral-7b
 #    open-mixtral-8x7b
 #    open-mixtral-8x22b
-#    open-codestral-mamba
-
-
-
+#    open-codestral-mamba