-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
109 lines (83 loc) · 2.7 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import openai
import os
from dotenv import load_dotenv, find_dotenv
from jinja2 import Template
import json
from deepgram import (
DeepgramClient,
PrerecordedOptions,
FileSource,
)
_ = load_dotenv(find_dotenv())
def transcribe(file):
audio_url = {
"url": file
}
deepgram = DeepgramClient(os.environ['DEEPGRAM_API_KEY'])
options = PrerecordedOptions(
model="nova-2",
smart_format=True,
diarize=True,
punctuate=True,
)
response = deepgram.listen.prerecorded.v("1").transcribe_url(audio_url, options)
return response
def extract_transcript(file_content):
output_text = ""
current_speaker = None
words = file_content['results']['channels'][0]['alternatives'][0]['words']
# Iterate through the content word by word:
for word in words:
speaker_label = word['speaker']
content = word['punctuated_word']
# Start the line with the speaker label:
if speaker_label is not None and speaker_label != current_speaker:
current_speaker = speaker_label
output_text += f"\nspeaker_{current_speaker}: "
output_text += f"{content} "
return output_text
def get_sentimental_result(output):
if openai.api_key is not os.environ['OPENAI_API_KEY']:
openai.api_key = os.environ['OPENAI_API_KEY']
client = openai.OpenAI()
with open('prompt_template.txt', "r") as file:
template_string = file.read()
data = {
'transcript': output,
}
template = Template(template_string)
prompt = template.render(data)
sentiment_response = client.chat.completions.create(
model="gpt-4",
temperature=0,
messages=[
{"role": "system", "content": prompt}
]
)
return json.loads(sentiment_response.choices[0].message.content)
def handler(file):
try:
transcript = transcribe(file)
transcript = extract_transcript(transcript)
result = get_sentimental_result(transcript)
except Exception as e:
print(f"Error occurred: {e}")
return {
'statusCode': 500,
'body': json.dumps(f"Error occurred: {e}")
}
return {
'statusCode': 200,
'body': result
}
def format_results(num_speakers, results):
formatted_results = {}
for i in range(num_speakers):
key = f"speaker_{i + 1}"
formatted_results[key] = results['body'][f'speaker_{i}'][0]['sentiment_output']
return formatted_results
def upload_file_to_gcs(client, file, filename, content_type):
bucket = client.bucket(os.environ['GC_BUCKET'])
blob = bucket.blob(filename)
blob.upload_from_file(file, content_type=content_type)
return blob