-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathtomkv
executable file
·314 lines (294 loc) · 14.6 KB
/
tomkv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#!/usr/bin/env python3
import os, sys, json, re, math, time, pathlib as pl, subprocess as sp
class adict(dict):
@classmethod
def rec_make(cls, *args, _rec=id, **kws):
v, ids, rec_key = (
_rec if _rec is not id else (dict(*args, **kws), adict(), id) )
if isinstance(v, (dict, list)):
if (vid := id(v)) in ids: raise ValueError(
f'Recursive data at [ {".".join(map(str, ids.values()))} ]' )
if rec_key is not id: ids = adict(ids); ids[rec_key] = vid
if isinstance(v, dict): v = cls(
(k, cls.rec_make(_rec=(v, ids, k))) for k,v in v.items() )
elif isinstance(v, list): v = list(
cls.rec_make(_rec=(v, ids, f'[{n}]')) for n,v in enumerate(v) )
return v
def __init__(self, *args, **kws):
super().__init__(*args, **kws)
self.__dict__ = self
def sz_repr(size, _units=list(
reversed(list((u, 2 ** (n * 10)) for n, u in enumerate('BKMGT'))) )):
for u, u1 in _units:
if size > u1: break
if u1 > 1: size = f'{size/u1:.1f}'
return f'{size}{u}'
def td_repr(delta, units_max=2, units_res=None, _units=dict(
h=3600, m=60, s=1, y=365.2422*86400, mo=30.5*86400, w=7*86400, d=1*86400 )):
res, s, n_last = list(), abs(delta), units_max - 1
units = sorted(_units.items(), key=lambda v: v[1], reverse=True)
for unit, unit_s in units:
val = math.floor(s / unit_s)
if not val:
if units_res == unit: break
continue
if len(res) == n_last or units_res == unit:
val, n_last = round(s / unit_s), True
res.append(f'{val:.0f}{unit}')
if n_last is True: break
s -= val * unit_s
return ' '.join(res) if res else '<1s'
def parse_rgb10_pixfmts():
probe = sp.run(['ffmpeg', '-v', 'fatal', '-pix_fmts'], check=True, stdout=sp.PIPE)
parse, pxfmt_set = False, set()
for line in probe.stdout.decode().splitlines():
if not (line := line.strip()): continue
if not parse:
if line == '-----': parse = True
continue
try:
flags, fmt, nc, nb, cbits = line.split()
nc, nb = int(nc), int(nb)
except: raise RuntimeError(f'Failed to decode "ffmpeg -pix_fmts" line: {line!r}')
if flags[0] != 'I': continue
if nc == 3 and nb <= 15 and cbits == '10-10-10': pxfmt_set.add(fmt)
if not parse: raise RuntimeError('Failed to decode "ffmpeg -pix_fmts" output')
return pxfmt_set
def src_probe(p):
probe = sp.run([ 'ffprobe', '-v', 'fatal', '-show_entries',
'stream:format', '-print_format', 'json', str(p) ], check=True, stdout=sp.PIPE)
probe = adict.rec_make(json.loads(probe.stdout))
video = audio = sub = 0; errs, subs, multistream = list(), list(), None
if (fmt := probe.format.format_name) in ['ass', 'srt', 'microdvd']:
return adict(t='format', msg=f'Subtitle file [{fmt}]')
for s in probe.streams:
if not (ct := s.get('codec_type')): continue
if ct == 'video':
if video:
if s.codec_name == 'mjpeg': continue # as long as it's not the first stream
multistream = True; continue
if fps := s.get('avg_frame_rate') or 0:
if '/' not in fps: fps = float(fps)
else: a, b = map(float, fps.split('/')); fps = a and b and a/b
video = adict( c=s.codec_name, fps=fps, w=s.width,
h=s.height, pxf=s.pix_fmt, br=int(s.get('bit_rate') or 0) )
if ct == 'audio':
if audio: multistream = True; continue
audio = adict(c=s.codec_name, chans=s.channels)
if ct == 'subtitle':
sub += 1
if s.codec_name in ['dvb_subtitle', 'dvd_subtitle', 'hdmv_pgs_subtitle']:
errs.append(adict(t='subs', msg='Has bitmap subtitles'))
else: subs.append(sub-1)
if not (audio and video): return adict(t='format', msg='Missing A/V streams')
if sub or subs: subs.append(sub)
return adict( v=video, a=audio, s=subs, ms=multistream,
errs=errs, td=float(probe.format.duration), sz=int(probe.format.size) )
def main(args=None):
import argparse, textwrap
dd = lambda text: re.sub( r' \t+', ' ',
textwrap.dedent(text).strip('\n') + '\n' ).replace('\t', ' ')
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
usage='%(prog)s [options] src [src ...]', description=dd('''
ffprobe-check and convert source file(s)
to a more compact video format, as needed, into current dir.
Encodes to av1/opus/mkv, downscaling to ~720p30b10/96k-stereo.
Initial ffprobe is intended to detect files that might already be
converted or won't benefit from it as much, and skip those by default,
as well as any files that can't be handled by this script correctly
(e.g. ones that have multiple A/V streams or errors of any kind).
Does not run conversion by default, only prints actions to be done.'''))
parser.add_argument('src', nargs='+', help='File(s) to convert.')
parser.add_argument('-x', '--convert', action='store_true', help=dd('''
Run ffmpeg commands to convert all files not
listed as PROBLEM or SKIP (unless -f/--force is used).'''))
parser.add_argument('-f', '--force', action='store_true', help=dd('''
Also process files marked as SKIP, i.e. ones that don't seem to need it.'''))
parser.add_argument('-1', '--force-stream1', action='store_true', help=dd('''
Process files with multiple A/V streams, encoding first stream from those.'''))
parser.add_argument('-q', '--quiet', action='store_true', help=dd('''
Don't print any WARN/SKIP info about files that seem to be encoded properly.'''))
parser.add_argument('-T', '--dst-dir', metavar='path', help=dd('''
Existing path to store resulting files in. Defaults to current dir.'''))
parser.add_argument('--name',
metavar='tpl', default='{name}.mkv', help=dd('''
Template to rename resulting file(s), instead of default: %(default)s
Can be used to set non-mkv container format, e.g. mp4.
ffmpeg auto-detects it from extension, so it must be something conventional.
Names are deduplicated with number-suffix when multiple sources are used.
Substituted keys: "name" - source filename without extension.'''))
parser.add_argument('-r', '--rm-list', metavar='file[:ratio]', help=dd('''
Generate a list of files to cleanup after conversion, one per line.
It will have realpath of all source files by default, unless ratio number
(float in 0-1.0 range) is also specified, colon-separated after filename.
With ratio number, filename on the list is picked
from either source or destination after each operation,
based on resulting filesize difference - source if
resulting size is larger than source*ratio, otherwise destination.
Intended use is to make an easy-to-use list of files to
rm when replacing old ones with converted versions,
without unnecessary replacement if there's not enough benefit.
Specified list file is always overwritten.'''))
parser.add_argument('-R', '--rm-list-regen', action='store_true', help=dd('''
When using -n/--skip-n or similar options,
still check file sizes when they exist, and put them on the list.
Can be used to make -r/--rm-list with new compression ratio target,
by re-running script at any time with -n/--skip-n covering processed files.'''))
parser.add_argument('-n', '--skip-n', metavar='n', type=int, help=dd('''
Skip first N files that'd have been processed otherwise.
Can be used to resume a long operation, using number from
"wc -l" on -r/--rm-list or printed n/m count between/after ffmpeg runs.'''))
parser.add_argument('-F', '--fn-opts', action='store_true', help=dd('''
Apply and strip ffmpeg params stored in filenames.
If filename has ".tomkv<opts>." before file extension, it'll be parsed and removed.
Where "<opts>" part can have any number of following options, concatenated:
+ss=<time> - translated to "-ss <time>" for ffmpeg command.
+to=<time> - "-to <time>" for ffmpeg - stop time to cut video short at.
Filename example: video.tomkv+ss=1:23+to=4:56.mp4'''))
opts = parser.parse_args(sys.argv[1:] if args is None else args)
src_list = list()
for src in opts.src:
try: src_list.append((src := pl.Path(src), srcx := src.resolve(strict=True)))
except FileNotFoundError: parser.error(f'Source path missing/inaccessible: {src}')
if '\n' in f'{src} {srcx}': parser.error(f'Source path with newline in it: {src!r}')
if opts.dst_dir: os.chdir(opts.dst_dir)
if rm_list := opts.rm_list:
rm_list, rm_list_ratio = ( (rm_list, math.inf)
if ':' not in rm_list else rm_list.rsplit(':', 1) )
rm_list, rm_list_ratio = open(rm_list, 'w'), float(rm_list_ratio)
nx = max(0, opts.skip_n or 0)
pxfmt_set = parse_rgb10_pixfmts()
## ffprobe checks
for n, (src, srcx) in enumerate(src_list):
src_list[n] = None
try: p = src_probe(srcx)
except Exception as err: p = adict( t='probe',
msg=f'Failed to process media info: [{err.__class__.__name__}] {err}' )
if p.get('ms') and not opts.force_stream1:
p = adict(t='format', msg='Multiple A/V streams detected')
# Parse fnopts, format dst filename
fn = (opts.name or '{name}.mkv').format(name=src.name.rsplit('.', 1)[0])
p.fn, p.ext = fn.rsplit('.', 1) if '.' in fn else (fn, '')
if opts.fn_opts and (m := re.search(r'\.tomkv(\+\S+)?$', p.fn)):
p.fn, fnopts = fn[:m.start()], m[1] or ''
try:
p.opts = dict(opt.split('=', 1) for opt in fnopts.split('+') if opt)
if set(p.opts) - {'ss', 'to'}: raise ValueError
except: p = adict(t='file', msg=f'Failed to parse -F/--fn-opts: {fnopts!r}')
# Last check for any unfixable issues
if (errs := p.get('errs')) is None:
print(f'\n{src.name} :: PROBLEM {p.get("t") or "-"} :: {p.msg}'); continue
# Check for warnings and skippable issues
p.v.scale, p.v.resample = p.v.w > 1400 or p.v.h > 1500, p.v.fps > 35
p.v.pxconv = p.v.pxf not in pxfmt_set
if p.v.c in ['hevc', 'av1'] and not p.v.scale and not p.v.resample and p.v.br < 2.5e6:
errs.append(adict(t='video', msg=f'Already encoded to <1280x <30fps {p.v.c}'))
if p.a.c == 'opus' and p.a.chans <= 2:
errs.append(adict( t='audio', warn=1,
msg='Already encoded in <200k 2ch opus, will copy it as-is' ))
p.a.clone = True
# Last check for non-fatal errors
if errs:
try: skip = err = next(err for err in errs if not err.get('warn'))
except: err = errs[skip := 0]
if not opts.quiet:
err_verdict = lambda: 'WARN' if err.get('warn') or opts.force else 'SKIP'
if len(errs) == 1:
print(f'\n{src.name} :: {err_verdict()}'.rstrip())
for err in errs: print(f' {err_verdict()} {err.get("t") or "-"} :: {err.msg}')
else: print(f'\n{src.name} :: {err_verdict()} {err.get("t") or "-"} :: {err.msg}')
if skip and not opts.force: continue
src_list[n], p.src = p, srcx
## Deduplication of dst filenames
dst_name_aliases, dst_name_map, ext = dict(), dict(), p.ext and f'.{p.ext}'
dst_name_fmt = lambda p: dst_name_aliases.setdefault(p.src, p.fn + ext)
src_list = list(filter(None, src_list))
for p in sorted(src_list, key=lambda p: (p.fn, str(p.src))):
dst_name_map.setdefault(dst_name_fmt(p), list()).append(p)
for dst_name, ps in dst_name_map.items():
if len(ps) == 1: continue
nf = str(len(str(len(ps))))
for n, p in enumerate(ps, 1):
dst_name_aliases[p.src] = ('{}.{:0'+nf+'d}{}').format(p.fn, n, ext)
for p in src_list: p.dst, p.tmp = (dst := dst_name_fmt(p)), f'_tmp.{dst}'
## Main ffmpeg conversion loop
dry_run, m = not opts.convert, len(src_list)
nx, ts0 = min(nx, m), time.monotonic()
sz_src_done = sz_src_proc = sz_dst_done = 0
def _skipped_stats_catchup(n):
nonlocal sz_src_done, sz_dst_done
for nc, pc in enumerate(src_list, 1):
if nc == n: break
try: sz_dst = os.stat(pc.dst).st_size
except FileNotFoundError: continue
sz_src_done += (sz_src := pc.sz); sz_dst_done += sz_dst
if rm_list and opts.rm_list_regen:
improved = sz_dst/sz_src < rm_list_ratio
print(pc.src if improved else pc.dst, file=rm_list)
if dry_run: print()
for n, p in enumerate(src_list, 1):
filters = list()
if p.v.resample: filters.append('fps=30')
if p.v.scale: filters.append(
"scale='if(gte(iw,ih),min(1280,iw),-2)"
":if(lt(iw,ih),min(1280,ih),-2)',setsar=1:1" )
if filters: filters = ['-filter:v', ','.join(filters)]
if fnopts := p.get('opts'):
filters = sum(([f'-{k}', v] for k, v in fnopts.items()), []) + filters
if p.v.pxconv: filters.extend(['-pix_fmt', 'yuv420p10le'])
if p.a.get('clone'): ac = ['-c:a', 'copy']
else:
ac = ['-c:a', 'libopus', '-b:a', '96k']
if p.a.chans == 6: ac = [ '-filter:a', # -ac2 discards sw channel
'pan=stereo|c0=0.5*c2+0.707*c0+0.707*c4+0.5*c3'
'|c1=0.5*c2+0.707*c1+0.707*c5+0.5*c3,volume=2.0' ] + ac
elif p.a.chans != 2: ac = ['-ac', '2'] + ac
fmt = ['-movflags', '+faststart'] if p.ext.lower() in ['mp4', 'mov', 'm4v'] else []
if p.s or p.ms:
fmt.extend('-map 0:v:0 -map 0:a:0'.split())
if p.s and (sn := (subs := p.s.copy()).pop()):
if subs == list(range(sn)): fmt.extend(['-map', '0:s']); subs.clear()
for n in subs: fmt.extend(['-map', f'0:s:{n}'])
cmd = [ 'ffmpeg', '-hide_banner', '-i', str(p.src), *filters,
*'-c:v libsvtav1 -preset 5 -crf 38'.split(), *ac, *fmt, '-y', p.tmp ]
dt, ts1 = time.strftime('%Y-%m-%d %H:%M:%S'), time.monotonic()
msg = f'\n\n- {dt} --- [ {n} / {m} ] :: {td_repr(p.td)} :: {p.src} -> {p.dst}\n'
if n == nx and not dry_run: _skipped_stats_catchup(n+1)
if n <= nx: continue
if dry_run: msg = msg.strip()
print(msg); print(' '.join((repr(a) if any( c in a for c in
r' \'"|*?!&$`{}[];' ) else a) for a in cmd), end='\n\n', flush=True)
if dry_run: continue
sp.run( cmd, check=True,
env=dict(os.environ, SVT_LOG='2'), stdin=sp.DEVNULL )
os.rename(p.tmp, p.dst)
# Stats/rm-list for last processed file
target = ''
sz_src_done += (sz_src := p.sz); sz_src_proc += sz_src
sz_dst_done += (sz_dst := os.stat(p.dst).st_size)
if rm_list:
improved = sz_dst/sz_src < rm_list_ratio
print(p.src if improved else p.dst, file=rm_list, flush=True)
if rm_list_ratio is not math.inf:
target = 'better' if improved else 'WORSE'
target = f' [ {target} than {round(rm_list_ratio*100)}% target ]'
dt, td = time.strftime('%Y-%m-%d %H:%M:%S'), time.monotonic() - ts1
print( f'- {dt} --- [ {n} / {m} ] :: {p.dst}'
f' :: 100% -> {round(100*sz_dst/sz_src)}%{target}'
f' :: {sz_repr(sz_src)} -> {sz_repr(sz_dst)}'
f' :: encoded {td_repr(p.td)} in {td_repr(td)}, at {p.td/td:.2f}x speed' )
# Total stats and estimates
sz_src_left = sum(pc.sz for nc, pc in enumerate(src_list, 1) if nc > n)
sz_dst_left = sz_src_left * (sz_ratio := sz_dst_done/sz_src_done)
td_left = sz_src_left / (sz_src_proc / (td := time.monotonic() - ts0))
st = ( f'- --- Processed so far :: {sz_repr(sz_src_done)} ->'
f' {sz_repr(sz_dst_done)} [ {round(100*sz_ratio)}% ] in {td_repr(td)}' )
if nx: st += f', with first {nx} file(s) skipped on this run'
print(st)
if n == m: print('- --- all done', flush=True)
else: print(
f'- --- Left to process :: {m-n} file(s) / {sz_repr(sz_src_left)}'
f' -> additional ~{sz_repr(sz_dst_left)} in {td_repr(td_left)}'
f' (est. ~{sz_repr(sz_dst_done+sz_dst_left)} final total)', flush=True )
if __name__ == '__main__': sys.exit(main())