-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpandas_df_agent.py
73 lines (54 loc) · 2.59 KB
/
pandas_df_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import json
import streamlit as st
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel,Field
from langchain.output_parsers import PydanticOutputParser
from langchain_groq import ChatGroq
# importing the field descriptions
with open("prompt.json","r") as prompt_file:
prompt = json.load(prompt_file)['df_info_retrieve_desc']
# Building the Pydantic JSON Schema for the output parser
class DataframeInfoRetrive(BaseModel):
"""output response class"""
feature_Explanation:str = Field(...,description=prompt['feature_Explanation'])
shape:tuple = Field(...,description=prompt['shape'])
n_duplicates:int = Field(...,description=prompt['n_duplicates'])
correlation:str = Field(...,description=prompt['correlation'])
feature_data_types:str = Field(...,description=prompt['feature_data_types'])
summarize_statistics:str = Field(...,description=prompt['summarize_statistics'])
@st.cache_resource()
class DataframeAgent:
def __init__(self,data):
self.df = data
self.DataframeInfoRetrive = DataframeInfoRetrive
self.llm = ChatGroq(model="llama3-70b-8192",temperature=0.2)
def get_prompt_template(self):
"""
message prompt template with placeholders: dataframe & instructions
"""
prompt_template = ChatPromptTemplate.from_messages([
("system","You are a pandas dataframe agent. \
Read the given dataframe and give the reponse in the given output format instructions"),
("user", "here is the dataframe \n {dataframe}"),
("user","here are the output format instructions: \n {instructions}")])
return prompt_template
def initialize_output_parser(self):
"""
output parser with output format instructions.
Pydantic Schema is used for the output parser.
"""
pydantic_output_parser = PydanticOutputParser(pydantic_object=self.DataframeInfoRetrive)
format_instructions = pydantic_output_parser.get_format_instructions()
return (pydantic_output_parser, format_instructions)
def run_chain(self):
"""
LCEL Chain for the Agent.
prompt template -> llm -> output parser
"""
df = self.df
prompt_template = self.get_prompt_template()
output_parser,format_instructions = self.initialize_output_parser()
input_grid = {"dataframe":df,"instructions":format_instructions}
chain = prompt_template | self.llm | output_parser
response = chain.invoke(input=input_grid)
return response