Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update LangChain examples #29

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 14 additions & 23 deletions demos/langchain-pdf-chat/app.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import os
import chainlit as cl
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import DeepSparse
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import DeepSparse
from langchain.text_splitter import RecursiveCharacterTextSplitter
from io import BytesIO
import PyPDF2
from langchain_community.document_loaders import PyPDFLoader

MODEL_PATH = "hf:neuralmagic/mpt-7b-chat-pruned50-quant"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
llm = DeepSparse(model=MODEL_PATH)
llm = DeepSparse(model=MODEL_PATH,streaming=True,generation_config={})
Loader = PyPDFLoader

@cl.on_chat_start
async def init():
Expand All @@ -37,16 +34,11 @@ async def init():
if file.type != "application/pdf":
raise TypeError("Only PDF files are supported")

pdf_stream = BytesIO(file.content)
pdf = PyPDF2.PdfReader(pdf_stream)
pdf_text = ""
for page in pdf.pages:
pdf_text += page.extract_text()

# texts = text_splitter.create_documents(pdf_text)
texts = text_splitter.create_documents([pdf_text])
loader = Loader(file.path)
documents = loader.load()

texts = text_splitter.split_documents(documents)
for i, text in enumerate(texts): text.metadata["source"] = f"{i}-pl"


# Create a Chroma vector store
docsearch = Chroma.from_documents(texts, embeddings)
Expand All @@ -69,15 +61,14 @@ async def init():

cl.user_session.set("chain", chain)


@cl.on_message
async def main(message):
async def main(message: cl.Message):
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
cb = cl.AsyncLangchainCallbackHandler(
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
)
cb.answer_reached = True
res = await chain.acall(message, callbacks=[cb])
res = await chain.acall(message.content, callbacks=[cb])

answer = res["result"]
source_documents = res["source_documents"]
Expand Down Expand Up @@ -111,4 +102,4 @@ async def main(message):
else:
await cl.Message(content=answer,
elements=source_elements
).send()
).send()
5 changes: 3 additions & 2 deletions demos/langchain-pdf-chat/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
datasets
transformers
deepsparse-nightly==1.6.0.20231007
chainlit
langchain
PyPDF2
sentence_transformers
chromadb
langchain-community==0.0.27
langchain==0.1.11
deepsparse-nightly==1.7.0.20240304
14 changes: 5 additions & 9 deletions demos/langchain-video-chat/app.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)

import os
import chainlit as cl

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import DeepSparse
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import DeepSparse

from pytube import YouTube
import whisper
Expand All @@ -19,7 +15,7 @@
MODEL_PATH = "hf:neuralmagic/mpt-7b-chat-pruned50-quant"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
llm = DeepSparse(model=MODEL_PATH)
llm = DeepSparse(model=MODEL_PATH,generation_config={})
model = whisper.load_model("base")

def transcribe(youtube_url, model):
Expand Down
5 changes: 3 additions & 2 deletions demos/langchain-video-chat/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
datasets
transformers
deepsparse-nightly==1.6.0.20231007
chainlit
langchain
sentence_transformers
chromadb
git+https://github.com/openai/whisper.git
pytube
langchain-community==0.0.27
langchain==0.1.11
deepsparse-nightly==1.7.0.20240304
6 changes: 3 additions & 3 deletions demos/medical-chatbot/ingest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

DATA_PATH = 'data/'
Expand Down
7 changes: 3 additions & 4 deletions demos/medical-chatbot/model.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
import chainlit as cl
from langchain.llms import DeepSparse
from langchain_community.llms import DeepSparse
import os

MODEL_PATH = os.environ.get("MODEL_PATH") or "hf:neuralmagic/mpt-7b-chat-pruned50-quant"
Expand Down
3 changes: 2 additions & 1 deletion demos/medical-chatbot/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
pypdf
langchain
torch
transformers
datasets
sentence_transformers
faiss_cpu
chainlit
deepsparse-nightly[llm]
langchain-community==0.0.27
langchain==0.1.11
Loading