Skip to content

Commit

Permalink
Merge pull request #376 from rmusser01/dev
Browse files Browse the repository at this point in the history
EMBEDDINGS MODEL PATH MOVED FROM 'onnx_models'  TO 'models/embedding_models' SO BE AWARE!
  • Loading branch information
rmusser01 authored Oct 19, 2024
2 parents 9d0f79c + 5284903 commit af151e1
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 30 deletions.
Binary file modified .gitignore
Binary file not shown.
28 changes: 14 additions & 14 deletions App_Function_Libraries/RAG/RAG_Library_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@


# RAG Search with keyword filtering
def enhanced_rag_pipeline(query: str, api_choice: str, keywords: str = None) -> Dict[str, Any]:
def enhanced_rag_pipeline(query: str, api_choice: str, keywords: str = None, apply_re_ranking=True) -> Dict[str, Any]:
log_counter("enhanced_rag_pipeline_attempt", labels={"api_choice": api_choice})
start_time = time.time()
try:
Expand Down Expand Up @@ -150,30 +150,30 @@ def enhanced_rag_pipeline(query: str, api_choice: str, keywords: str = None) ->
# Combine results
all_results = vector_results + fts_results

apply_re_ranking = True
if apply_re_ranking:
logging.debug(f"\nenhanced_rag_pipeline - Applying Re-Ranking")
# FIXME - add option to use re-ranking at call time
# FIXME - specify model + add param to modify at call time
# FIXME - add option to set a custom top X results
# You can specify a model if necessary, e.g., model_name="ms-marco-MiniLM-L-12-v2"
ranker = Ranker()
if all_results:
ranker = Ranker()

# Prepare passages for re-ranking
passages = [{"id": i, "text": result['content']} for i, result in enumerate(all_results)]
rerank_request = RerankRequest(query=query, passages=passages)
# Prepare passages for re-ranking
passages = [{"id": i, "text": result['content']} for i, result in enumerate(all_results)]
rerank_request = RerankRequest(query=query, passages=passages)

# Rerank the results
reranked_results = ranker.rerank(rerank_request)
# Rerank the results
reranked_results = ranker.rerank(rerank_request)

# Sort results based on the re-ranking score
reranked_results = sorted(reranked_results, key=lambda x: x['score'], reverse=True)
# Sort results based on the re-ranking score
reranked_results = sorted(reranked_results, key=lambda x: x['score'], reverse=True)

# Log reranked results
logging.debug(f"\n\nenhanced_rag_pipeline - Reranked results: {reranked_results}")
# Log reranked results
logging.debug(f"\n\nenhanced_rag_pipeline - Reranked results: {reranked_results}")

# Update all_results based on reranking
all_results = [all_results[result['id']] for result in reranked_results]
# Update all_results based on reranking
all_results = [all_results[result['id']] for result in reranked_results]

# Extract content from results (top 10)
context = "\n".join([result['content'] for result in all_results[:10]]) # Limit to top 10 results
Expand Down
4 changes: 2 additions & 2 deletions App_Function_Libraries/RAG/RAG_QA_Chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
#
# Functions:

def rag_qa_chat(query, history, context, api_choice):
def rag_qa_chat(query, history, context, api_choice, keywords=None, apply_re_ranking=False):
log_counter("rag_qa_chat_attempt", labels={"api_choice": api_choice})
start_time = time.time()

try:
if isinstance(context, str):
log_counter("rag_qa_chat_string_context")
# Use the answer and context directly from enhanced_rag_pipeline
result = enhanced_rag_pipeline(query, api_choice)
result = enhanced_rag_pipeline(query, api_choice, keywords, apply_re_ranking)
answer = result['answer']
else:
log_counter("rag_qa_chat_no_context")
Expand Down
36 changes: 22 additions & 14 deletions Config_Files/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ groq_model = llama3-70b-8192
openai_api_key = <openai_api_key>
openai_model = gpt-4o
huggingface_api_key = <huggingface_api_token>
huggingface_model = CohereForAI/c4ai-command-r-plus
huggingface_model = mistralai/Mistral-Nemo-Instruct-2407
openrouter_api_key = <openrouter_api_key>
openrouter_model = mistralai/mistral-7b-instruct:free
deepseek_api_key = <deepseek-api-key>
Expand All @@ -20,20 +20,20 @@ custom_openai_api_ip = <api_ip_here>

[Local-API]
kobold_api_IP = http://127.0.0.1:5001/api/v1/generate
kobold_api_key = <kobold api key>
kobold_api_key =
llama_api_IP = http://127.0.0.1:8080/completion
llama_api_key = <llama.cpp api key>
ooba_api_key = <ooba api key>
llama_api_key =
ooba_api_key =
ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions
tabby_api_IP = http://127.0.0.1:5000/v1/chat/completions
tabby_api_key = <tabbyapi key>
tabby_api_key =
vllm_api_IP = http://127.0.0.1:8000/v1/chat/completions
vllm_model = <vllm model>
ollama_api_IP = http://127.0.0.1:11434/api/generate
ollama_api_key = <ollama api key>
ollama_model = <ollama model>
vllm_model =
ollama_api_IP = http://127.0.0.1:11434/v1/chat/completions
ollama_api_key =
ollama_model = llama3
aphrodite_api_IP = http://127.0.0.1:8080/completion
aphrodite_api_key = <aphrodite_api_key>
aphrodite_api_key =

[Processing]
processing_choice = cuda
Expand All @@ -56,10 +56,13 @@ elasticsearch_port = 9200
# Additionally you can use elasticsearch as the database type, just replace `sqlite` with `elasticsearch` for `type` and provide the `elasticsearch_host` and `elasticsearch_port` of your configured ES instance.
chroma_db_path = Databases/chroma_db
prompts_db_path = Databases/prompts.db
rag_qa_db_path = Databases/rag_qa.db

[Embeddings]
embedding_provider = openai
embedding_model = text-embedding-3-small
onnx_model_path = ./App_Function_Libraries/models/onnx_models/
model_dir = ./App_Function_Libraries/models/embedding_models
embedding_api_url = http://localhost:8080/v1/embeddings
embedding_api_key = your_api_key_here
chunk_size = 400
Expand All @@ -78,6 +81,14 @@ adaptive = false
multi_level = false
language = english

[Metrics]
log_file_path =
#os.getenv("tldw_LOG_FILE_PATH", "tldw_app_logs.json")
max_bytes =
#int(os.getenv("tldw_LOG_MAX_BYTES", 10 * 1024 * 1024)) # 10 MB
backup_count = 5
#int(os.getenv("tldw_LOG_BACKUP_COUNT", 5))


#[Comments]
#OpenAI Models:
Expand All @@ -98,7 +109,4 @@ language = english
# open-mistral-7b
# open-mixtral-8x7b
# open-mixtral-8x22b
# open-codestral-mamba



# open-codestral-mamba

0 comments on commit af151e1

Please sign in to comment.