-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocess_data.py
32 lines (24 loc) · 983 Bytes
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Import pandas library
import pandas as pd
# Read the text dataset from csv file
text_data = pd.read_csv("CIPtrain.csv")
# Create empty lists for prompts and responses
prompts = []
responses = []
# Loop through the text data
for i in range(len(text_data)):
# Get the sender, message, and timestamp of the current row
prompt = text_data["prompt"][i]
prompt = str(prompt)
response = text_data["response"][i]
response = str(response)
# Add the message to the prompts list with <user> tag
prompts.append("<user>: " + prompt)
#elif sender == "bot":
# Add the message to the responses list with <chatbot> tag
responses.append("<chatbot>: " + response)
# Create a new dataframe with prompts and responses columns
new_data = pd.DataFrame({"prompt": prompts, "response": responses})
#alespalla/chatbot_instruction_prompts
# Write the new dataframe to a csv file
new_data.to_csv("MyData/chatbot_instruction_prompts_train.csv", index=False)