From 76c820434e4e054c3b630e3447e0eb17150b94f1 Mon Sep 17 00:00:00 2001 From: Derrick Mwiti Date: Tue, 12 Mar 2024 12:16:03 +0300 Subject: [PATCH] Update LangChain examples --- demos/langchain-pdf-chat/app.py | 37 ++++++++------------- demos/langchain-pdf-chat/requirements.txt | 5 +-- demos/langchain-video-chat/app.py | 14 +++----- demos/langchain-video-chat/requirements.txt | 5 +-- demos/medical-chatbot/ingest.py | 6 ++-- demos/medical-chatbot/model.py | 7 ++-- demos/medical-chatbot/requirements.txt | 3 +- 7 files changed, 33 insertions(+), 44 deletions(-) diff --git a/demos/langchain-pdf-chat/app.py b/demos/langchain-pdf-chat/app.py index adc11c9..90a5a3c 100644 --- a/demos/langchain-pdf-chat/app.py +++ b/demos/langchain-pdf-chat/app.py @@ -1,23 +1,20 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma from langchain.chains import RetrievalQA -from langchain.prompts.chat import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, -) import os import chainlit as cl -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.llms import DeepSparse +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.llms import DeepSparse from langchain.text_splitter import RecursiveCharacterTextSplitter from io import BytesIO import PyPDF2 +from langchain_community.document_loaders import PyPDFLoader MODEL_PATH = "hf:neuralmagic/mpt-7b-chat-pruned50-quant" text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) -llm = DeepSparse(model=MODEL_PATH) +llm = DeepSparse(model=MODEL_PATH,streaming=True,generation_config={}) +Loader = PyPDFLoader @cl.on_chat_start async def init(): @@ -37,16 +34,11 @@ async def init(): if file.type != "application/pdf": raise TypeError("Only PDF files are supported") - pdf_stream = BytesIO(file.content) - pdf = PyPDF2.PdfReader(pdf_stream) - pdf_text = "" - for page in pdf.pages: - pdf_text += page.extract_text() - - # texts = text_splitter.create_documents(pdf_text) - texts = text_splitter.create_documents([pdf_text]) + loader = Loader(file.path) + documents = loader.load() + + texts = text_splitter.split_documents(documents) for i, text in enumerate(texts): text.metadata["source"] = f"{i}-pl" - # Create a Chroma vector store docsearch = Chroma.from_documents(texts, embeddings) @@ -69,15 +61,14 @@ async def init(): cl.user_session.set("chain", chain) - @cl.on_message -async def main(message): +async def main(message: cl.Message): chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain cb = cl.AsyncLangchainCallbackHandler( - stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] + stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] ) cb.answer_reached = True - res = await chain.acall(message, callbacks=[cb]) + res = await chain.acall(message.content, callbacks=[cb]) answer = res["result"] source_documents = res["source_documents"] @@ -111,4 +102,4 @@ async def main(message): else: await cl.Message(content=answer, elements=source_elements - ).send() + ).send() \ No newline at end of file diff --git a/demos/langchain-pdf-chat/requirements.txt b/demos/langchain-pdf-chat/requirements.txt index 7a9bff7..3a2bd64 100644 --- a/demos/langchain-pdf-chat/requirements.txt +++ b/demos/langchain-pdf-chat/requirements.txt @@ -1,8 +1,9 @@ datasets transformers -deepsparse-nightly==1.6.0.20231007 chainlit -langchain PyPDF2 sentence_transformers chromadb +langchain-community==0.0.27 +langchain==0.1.11 +deepsparse-nightly==1.7.0.20240304 diff --git a/demos/langchain-video-chat/app.py b/demos/langchain-video-chat/app.py index a06b695..48e044d 100644 --- a/demos/langchain-video-chat/app.py +++ b/demos/langchain-video-chat/app.py @@ -1,16 +1,12 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA -from langchain.prompts.chat import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, -) + import os import chainlit as cl -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.llms import DeepSparse +from langchain_community.vectorstores import Chroma +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.llms import DeepSparse from pytube import YouTube import whisper @@ -19,7 +15,7 @@ MODEL_PATH = "hf:neuralmagic/mpt-7b-chat-pruned50-quant" text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) -llm = DeepSparse(model=MODEL_PATH) +llm = DeepSparse(model=MODEL_PATH,generation_config={}) model = whisper.load_model("base") def transcribe(youtube_url, model): diff --git a/demos/langchain-video-chat/requirements.txt b/demos/langchain-video-chat/requirements.txt index 18c0c80..267c3fe 100644 --- a/demos/langchain-video-chat/requirements.txt +++ b/demos/langchain-video-chat/requirements.txt @@ -1,9 +1,10 @@ datasets transformers -deepsparse-nightly==1.6.0.20231007 chainlit -langchain sentence_transformers chromadb git+https://github.com/openai/whisper.git pytube +langchain-community==0.0.27 +langchain==0.1.11 +deepsparse-nightly==1.7.0.20240304 \ No newline at end of file diff --git a/demos/medical-chatbot/ingest.py b/demos/medical-chatbot/ingest.py index 318b007..12ff4c0 100644 --- a/demos/medical-chatbot/ingest.py +++ b/demos/medical-chatbot/ingest.py @@ -1,6 +1,6 @@ -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.vectorstores import FAISS -from langchain.document_loaders import PyPDFLoader, DirectoryLoader +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.vectorstores import FAISS +from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter DATA_PATH = 'data/' diff --git a/demos/medical-chatbot/model.py b/demos/medical-chatbot/model.py index 99e030b..a9f53fc 100644 --- a/demos/medical-chatbot/model.py +++ b/demos/medical-chatbot/model.py @@ -1,10 +1,9 @@ -from langchain.document_loaders import PyPDFLoader, DirectoryLoader from langchain.prompts import PromptTemplate -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.vectorstores import FAISS +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA import chainlit as cl -from langchain.llms import DeepSparse +from langchain_community.llms import DeepSparse import os MODEL_PATH = os.environ.get("MODEL_PATH") or "hf:neuralmagic/mpt-7b-chat-pruned50-quant" diff --git a/demos/medical-chatbot/requirements.txt b/demos/medical-chatbot/requirements.txt index 09a083d..adac8ec 100644 --- a/demos/medical-chatbot/requirements.txt +++ b/demos/medical-chatbot/requirements.txt @@ -1,5 +1,4 @@ pypdf -langchain torch transformers datasets @@ -7,3 +6,5 @@ sentence_transformers faiss_cpu chainlit deepsparse-nightly[llm] +langchain-community==0.0.27 +langchain==0.1.11