diff --git a/ingest.py b/ingest.py index a15ee5c8..834d71c5 100644 --- a/ingest.py +++ b/ingest.py @@ -170,6 +170,7 @@ def load_faiss_index(index_path, metadata_path): def main(device_type): + print(f"Running on device: {device_type}") # Load documents and split in chunks logging.info(f"Loading documents from {SOURCE_DIRECTORY}") documents = load_documents(SOURCE_DIRECTORY) @@ -224,9 +225,13 @@ def main(device_type): client_settings=CHROMA_SETTINGS, ) +import argparse if __name__ == "__main__": logging.basicConfig( format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO ) - main() + parser = argparse.ArgumentParser(description="Ingest script for localGPT") + parser.add_argument("--device_type", type=str, required=True, help="Device type (cpu or gpu)") + args = parser.parse_args() + main(args.device_type) diff --git a/run_localGPT.py b/run_localGPT.py index bf157731..4c2a848a 100644 --- a/run_localGPT.py +++ b/run_localGPT.py @@ -143,19 +143,24 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): # embedding_function=embeddings, # client_settings=CHROMA_SETTINGS) + # print(embeddings) + # Initialize the FAISS index - # faiss_index = faiss.IndexFlatL2(embeddings) + faiss_index = faiss.IndexFlatL2(embeddings.embed()) + # # Initialize the docstore - # docstore = InMemoryDocstore() + docstore = InMemoryDocstore() # # Initialize the index_to_docstore_id - # index_to_docstore_id = {} + index_to_docstore_id = {} + # Add the embeddings to the index + faiss_index.add(embeddings) - # db = FAISS( - # embedding_function=embeddings, - # index=faiss_index, - # docstore=docstore, - # index_to_docstore_id=index_to_docstore_id - # ) + db = FAISS( + embedding_function=embeddings, + index=faiss_index, + docstore=docstore, + index_to_docstore_id=index_to_docstore_id + ) # # Add documents and their embeddings to the FAISS index and the docstore # for i, (text, embedding) in enumerate(zip(df['Text'].tolist(), embeddings)):