-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathextract_text_from_video_script.py
36 lines (25 loc) · 1.14 KB
/
extract_text_from_video_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Convert video to audio
# vpath = r"C:\Datasets\MOUD\VideoReviews\178_makeup.mp4"
# from video-converter import Converter
apath = "C:\Users\Roshan Sridhar\Google Drive\Documents\NYU\GILAB\Python\MOUD\Text_Video\178_makeup.wav"
# TRANSLATION
# https://github.com/watson-developer-cloud/python-sdk/blob/f81bea7a44b83bd0d0ee080af66ad90baa3c0a23/examples/speech_to_text_v1.py
import json
from os.path import join, dirname
from watson_developer_cloud import SpeechToTextV1
speech_to_text = SpeechToTextV1(
username='',
password='',
x_watson_learning_opt_out=False
)
# print(json.dumps(speech_to_text.models(), indent=2))
# print(json.dumps(speech_to_text.get_model('es-ES_BroadbandModel'), indent=2))
with open(apath,
'rb') as audio_file:
tran = speech_to_text.recognize(
audio_file, content_type='audio/wav', timestamps=False, model='es-ES_BroadbandModel',
word_confidence=False)
df_t = pd.DataFrame(columns = ['Speech','sentimentAnnotation'])
for i in range(len(tran['results'])):
df_t.loc[len(df_t)]=[tran['results'][i]['alternatives'][0]['transcript'],np.NaN]
print(df_t)