-
Notifications
You must be signed in to change notification settings - Fork 0
/
transcribe_podcasts.py
49 lines (40 loc) · 1.65 KB
/
transcribe_podcasts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/python
#
# Helper script to transcribe podcast files.
#
# Dave T 2024-10-15
# Takes a directory of audio files, and creates one big text file of the transcripts,
# broken into sections for each episode.
# The files need to be alphabetically in the order that you want them to
# appear in the text file, so I recommend using an iso date prefix or similar.
#
# The main purpose is to have a rapidly searchable text file of a lot of audio data.
import sys
from faster_whisper import WhisperModel
from faster_whisper.transcribe import BatchedInferencePipeline
from pathlib import Path
import datetime
TRANSCRIPT_FILE = "podcast_transcript.txt"
USAGE = "transcribe_podcasts.py <file1.mp3> [file2.mp3 ...]"
def process_file(file):
model = WhisperModel("small", device="cpu", compute_type="int8")
batched_model = BatchedInferencePipeline(model=model)
segments, info = batched_model.transcribe(file, batch_size=16)
with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as transcript:
transcript.write(f"{Path(file).stem}:\n")
for segment in segments:
start = str(datetime.timedelta(seconds=int(segment.start)))
end = str(datetime.timedelta(seconds=int(segment.end)))
text = segment.text
print(f"[{start} -> {end}] {text}")
transcript.write(f"[{start} -> {end}] {text}\n")
transcript.write("\n===============================================================\n\n")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Syntax error. Usage:\n")
print(USAGE)
exit(1)
files = sys.argv[1:]
files.sort()
for file in files:
process_file(file)