-
Notifications
You must be signed in to change notification settings - Fork 0
/
renamer.py
37 lines (28 loc) · 1.25 KB
/
renamer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from glob import glob
import subprocess
from pathlib import Path
# input() handles input better if readline is loaded
# e.g. arrow keys work, and backspace works properly with hangeul input
import readline
def prompt(path):
transcript = None
while not transcript:
subprocess.run(["mplayer", str(path)])
print('Transcribe the audio (leave blank to hear again)')
transcript = input('> ')
return transcript
def pad_filename(filename):
"""
pyAudioAnalysis adds stuff like "131.700-132.850" and "14.200-26.500" to the output filenames
this doesn't sort properly because the numbers like 131 and 14 are not padded with zeros.
"""
time_range = Path(filename).stem.replace('2000-essential-korean-words-', '')
from_timestamp, to_timestamp = (float(ts) for ts in time_range.split('-'))
return f'{from_timestamp:08.3f}-{to_timestamp:08.3f}'
if __name__ == '__main__':
for filepath in sorted(glob('output/*_*-*.wav'), key=pad_filename):
path = Path(filepath)
transcript = prompt(path)
# Clean up any repeated spaces. This is to workaround input issues with iterm2
transcript = transcript.replace(' ', ' ')
path.rename(Path(path.parent, f'{path.stem}-{transcript}{path.suffix}'))