]> jfr.im git - yt-dlp.git/blame - yt_dlp/postprocessor/ffmpeg.py
Native SponsorBlock implementation and related improvements (#360)
[yt-dlp.git] / yt_dlp / postprocessor / ffmpeg.py
CommitLineData
3aa578ca
PH
1from __future__ import unicode_literals
2
e9fade72 3import io
7dde84f3 4import itertools
496c1923
PH
5import os
6import subprocess
496c1923 7import time
fa2a36d9 8import re
06167fbb 9import json
496c1923 10
496c1923
PH
11from .common import AudioConversionError, PostProcessor
12
84601bb7 13from ..compat import compat_str, compat_numeric_types
8c25f81b 14from ..utils import (
7a340e0d 15 dfxp2srt,
f07b74fc 16 encodeArgument,
496c1923 17 encodeFilename,
95807118 18 get_exe_version,
48844745 19 is_outdated_version,
7a340e0d
NA
20 ISO639Utils,
21 orderedSet,
496c1923
PH
22 PostProcessingError,
23 prepend_extension,
f5b1bca9 24 process_communicate_or_kill,
06167fbb 25 replace_extension,
7a340e0d 26 shell_quote,
324ad820 27 traverse_obj,
6606817a 28 variadic,
496c1923
PH
29)
30
31
a755f825 32EXT_TO_OUT_FORMATS = {
21bfcd3d
PH
33 'aac': 'adts',
34 'flac': 'flac',
35 'm4a': 'ipod',
36 'mka': 'matroska',
37 'mkv': 'matroska',
38 'mpg': 'mpeg',
39 'ogv': 'ogg',
40 'ts': 'mpegts',
41 'wma': 'asf',
42 'wmv': 'asf',
43}
44ACODECS = {
45 'mp3': 'libmp3lame',
46 'aac': 'aac',
47 'flac': 'flac',
48 'm4a': 'aac',
d2ae7e24 49 'opus': 'libopus',
21bfcd3d
PH
50 'vorbis': 'libvorbis',
51 'wav': None,
a755f825 52}
53
54
496c1923
PH
55class FFmpegPostProcessorError(PostProcessingError):
56 pass
57
d799b47b 58
496c1923 59class FFmpegPostProcessor(PostProcessor):
d47aeb22 60 def __init__(self, downloader=None):
496c1923 61 PostProcessor.__init__(self, downloader)
73fac4e9 62 self._determine_executables()
496c1923 63
48844745 64 def check_version(self):
f740fae2 65 if not self.available:
beb4b92a 66 raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
48844745 67
65bf37ef 68 required_version = '10-0' if self.basename == 'avconv' else '1.0'
48844745 69 if is_outdated_version(
73fac4e9 70 self._versions[self.basename], required_version):
3aa578ca 71 warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
73fac4e9 72 self.basename, self.basename, required_version)
f446cc66 73 self.report_warning(warning)
48844745 74
496c1923 75 @staticmethod
73fac4e9
PH
76 def get_versions(downloader=None):
77 return FFmpegPostProcessor(downloader)._versions
6271f1ca 78
73fac4e9
PH
79 def _determine_executables(self):
80 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
d4a24f40 81 prefer_ffmpeg = True
73fac4e9 82
a64646e4
RA
83 def get_ffmpeg_version(path):
84 ver = get_exe_version(path, args=['-version'])
85 if ver:
86 regexs = [
cbdc688c 87 r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
5caa531a 88 r'n([0-9.]+)$', # Arch Linux
cbdc688c 89 # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
a64646e4
RA
90 ]
91 for regex in regexs:
92 mobj = re.match(regex, ver)
93 if mobj:
94 ver = mobj.group(1)
95 return ver
96
73fac4e9
PH
97 self.basename = None
98 self.probe_basename = None
99
100 self._paths = None
101 self._versions = None
102 if self._downloader:
f446cc66 103 prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
104 location = self.get_param('ffmpeg_location')
73fac4e9
PH
105 if location is not None:
106 if not os.path.exists(location):
f446cc66 107 self.report_warning(
73fac4e9 108 'ffmpeg-location %s does not exist! '
e4172ac9 109 'Continuing without ffmpeg.' % (location))
73fac4e9
PH
110 self._versions = {}
111 return
8c0ae192 112 elif os.path.isdir(location):
113 dirname, basename = location, None
114 else:
73fac4e9 115 basename = os.path.splitext(os.path.basename(location))[0]
89efdc15 116 basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
117 dirname = os.path.dirname(os.path.abspath(location))
73fac4e9
PH
118 if basename in ('ffmpeg', 'ffprobe'):
119 prefer_ffmpeg = True
120
121 self._paths = dict(
89efdc15 122 (p, os.path.join(dirname, p)) for p in programs)
8c0ae192 123 if basename:
124 self._paths[basename] = location
73fac4e9 125 self._versions = dict(
a64646e4 126 (p, get_ffmpeg_version(self._paths[p])) for p in programs)
73fac4e9
PH
127 if self._versions is None:
128 self._versions = dict(
a64646e4 129 (p, get_ffmpeg_version(p)) for p in programs)
73fac4e9
PH
130 self._paths = dict((p, p) for p in programs)
131
d4a24f40 132 if prefer_ffmpeg is False:
d28b5171 133 prefs = ('avconv', 'ffmpeg')
d4a24f40
S
134 else:
135 prefs = ('ffmpeg', 'avconv')
d28b5171
PH
136 for p in prefs:
137 if self._versions[p]:
73fac4e9
PH
138 self.basename = p
139 break
76b1bd67 140
d4a24f40 141 if prefer_ffmpeg is False:
1a253e13 142 prefs = ('avprobe', 'ffprobe')
d4a24f40
S
143 else:
144 prefs = ('ffprobe', 'avprobe')
1a253e13
PH
145 for p in prefs:
146 if self._versions[p]:
73fac4e9
PH
147 self.probe_basename = p
148 break
149
f740fae2 150 @property
73fac4e9
PH
151 def available(self):
152 return self.basename is not None
1a253e13 153
73fac4e9
PH
154 @property
155 def executable(self):
156 return self._paths[self.basename]
157
3da4b313
JMF
158 @property
159 def probe_available(self):
160 return self.probe_basename is not None
161
73fac4e9
PH
162 @property
163 def probe_executable(self):
164 return self._paths[self.probe_basename]
76b1bd67 165
30d9e209 166 def get_audio_codec(self, path):
eb35b163 167 if not self.probe_available and not self.available:
beb4b92a 168 raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
30d9e209 169 try:
eb35b163
RA
170 if self.probe_available:
171 cmd = [
172 encodeFilename(self.probe_executable, True),
173 encodeArgument('-show_streams')]
174 else:
175 cmd = [
176 encodeFilename(self.executable, True),
177 encodeArgument('-i')]
178 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
f446cc66 179 self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
eb35b163
RA
180 handle = subprocess.Popen(
181 cmd, stderr=subprocess.PIPE,
182 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
f5b1bca9 183 stdout_data, stderr_data = process_communicate_or_kill(handle)
eb35b163
RA
184 expected_ret = 0 if self.probe_available else 1
185 if handle.wait() != expected_ret:
30d9e209
RA
186 return None
187 except (IOError, OSError):
188 return None
eb35b163
RA
189 output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
190 if self.probe_available:
191 audio_codec = None
192 for line in output.split('\n'):
193 if line.startswith('codec_name='):
194 audio_codec = line.split('=')[1].strip()
195 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
196 return audio_codec
197 else:
198 # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
199 mobj = re.search(
200 r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
201 output)
202 if mobj:
203 return mobj.group(1)
30d9e209
RA
204 return None
205
06167fbb 206 def get_metadata_object(self, path, opts=[]):
207 if self.probe_basename != 'ffprobe':
208 if self.probe_available:
209 self.report_warning('Only ffprobe is supported for metadata extraction')
beb4b92a 210 raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
06167fbb 211 self.check_version()
212
213 cmd = [
214 encodeFilename(self.probe_executable, True),
215 encodeArgument('-hide_banner'),
216 encodeArgument('-show_format'),
217 encodeArgument('-show_streams'),
218 encodeArgument('-print_format'),
219 encodeArgument('json'),
220 ]
221
222 cmd += opts
223 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
06869367 224 self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
06167fbb 225 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
226 stdout, stderr = p.communicate()
227 return json.loads(stdout.decode('utf-8', 'replace'))
228
229 def get_stream_number(self, path, keys, value):
230 streams = self.get_metadata_object(path)['streams']
231 num = next(
324ad820 232 (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value),
06167fbb 233 None)
234 return num, len(streams)
235
00034c14 236 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
e92caff5 237 return self.real_run_ffmpeg(
238 [(path, []) for path in input_paths],
00034c14 239 [(out_path, opts)], **kwargs)
e92caff5 240
00034c14 241 def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
48844745 242 self.check_version()
496c1923 243
52afb2ac 244 oldest_mtime = min(
7dde84f3 245 os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
43bc8890 246
ce52c7c1
S
247 cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
248 # avconv does not have repeat option
249 if self.basename == 'ffmpeg':
250 cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
5b1ecbb3 251
e92caff5 252 def make_args(file, args, name, number):
253 keys = ['_%s%d' % (name, number), '_%s' % name]
254 if name == 'o' and number == 1:
255 keys.append('')
256 args += self._configuration_args(self.basename, keys)
257 if name == 'i':
258 args.append('-i')
5b1ecbb3 259 return (
e92caff5 260 [encodeArgument(arg) for arg in args]
5b1ecbb3 261 + [encodeFilename(self._ffmpeg_filename_argument(file), True)])
262
e92caff5 263 for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
7dde84f3 264 cmd += itertools.chain.from_iterable(
265 make_args(path, list(opts), arg_type, i + 1)
266 for i, (path, opts) in enumerate(path_opts) if path)
496c1923 267
f446cc66 268 self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
cffcbc02 269 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
f5b1bca9 270 stdout, stderr = process_communicate_or_kill(p)
00034c14 271 if p.returncode not in variadic(expected_retcodes):
06167fbb 272 stderr = stderr.decode('utf-8', 'replace').strip()
06869367 273 if self.get_param('verbose', False):
06167fbb 274 self.report_error(stderr)
275 raise FFmpegPostProcessorError(stderr.split('\n')[-1])
e92caff5 276 for out_path, _ in output_path_opts:
7dde84f3 277 if out_path:
278 self.try_utime(out_path, oldest_mtime, oldest_mtime)
06167fbb 279 return stderr.decode('utf-8', 'replace')
cc55d088 280
00034c14 281 def run_ffmpeg(self, path, out_path, opts, **kwargs):
282 return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
496c1923 283
7a340e0d
NA
284 @staticmethod
285 def _ffmpeg_filename_argument(fn):
8a7bbd16
JMF
286 # Always use 'file:' because the filename may contain ':' (ffmpeg
287 # interprets that as a protocol) or can start with '-' (-- is broken in
288 # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
b9f2fdd3 289 # Also leave '-' intact in order not to break streaming to stdout.
06167fbb 290 if fn.startswith(('http://', 'https://')):
291 return fn
d868f43c 292 return 'file:' + fn if fn != '-' else fn
496c1923 293
7a340e0d
NA
294 @staticmethod
295 def _quote_for_ffmpeg(string):
296 # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping
297 # A sequence of '' produces '\'''\'';
298 # final replace removes the empty '' between \' \'.
299 string = string.replace("'", r"'\''").replace("'''", "'")
300 # Handle potential ' at string boundaries.
301 string = string[1:] if string[0] == "'" else "'" + string
302 return string[:-1] if string[-1] == "'" else string + "'"
303
304 def force_keyframes(self, filename, timestamps):
305 timestamps = orderedSet(timestamps)
306 if timestamps[0] == 0:
307 timestamps = timestamps[1:]
308 keyframe_file = prepend_extension(filename, 'keyframes.temp')
309 self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes')
310 self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join(
311 f'{t:.6f}' for t in timestamps)])
312 return keyframe_file
313
314 def concat_files(self, in_files, out_file, concat_opts=None):
315 """
316 Use concat demuxer to concatenate multiple files having identical streams.
317
318 Only inpoint, outpoint, and duration concat options are supported.
319 See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details
320 """
321 concat_file = f'{out_file}.concat'
322 self.write_debug(f'Writing concat spec to {concat_file}')
323 with open(concat_file, 'wt', encoding='utf-8') as f:
324 f.writelines(self._concat_spec(in_files, concat_opts))
325
326 out_flags = ['-c', 'copy']
327 if out_file.rpartition('.')[-1] in ('mp4', 'mov'):
328 # For some reason, '-c copy' is not enough to copy subtitles
329 out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart'])
330
331 try:
332 self.real_run_ffmpeg(
333 [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
334 [(out_file, out_flags)])
335 finally:
336 os.remove(concat_file)
337
338 @classmethod
339 def _concat_spec(cls, in_files, concat_opts=None):
340 if concat_opts is None:
341 concat_opts = [{}] * len(in_files)
342 yield 'ffconcat version 1.0\n'
343 for file, opts in zip(in_files, concat_opts):
344 yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n'
345 # Iterate explicitly to yield the following directives in order, ignoring the rest.
346 for directive in 'inpoint', 'outpoint', 'duration':
347 if directive in opts:
348 yield f'{directive} {opts[directive]}\n'
349
496c1923
PH
350
351class FFmpegExtractAudioPP(FFmpegPostProcessor):
81a23040 352 COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
353 SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav')
1de75fa1 354
496c1923
PH
355 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
356 FFmpegPostProcessor.__init__(self, downloader)
81a23040 357 self._preferredcodec = preferredcodec or 'best'
496c1923
PH
358 self._preferredquality = preferredquality
359 self._nopostoverwrites = nopostoverwrites
360
496c1923 361 def run_ffmpeg(self, path, out_path, codec, more_opts):
496c1923
PH
362 if codec is None:
363 acodec_opts = []
364 else:
365 acodec_opts = ['-acodec', codec]
366 opts = ['-vn'] + acodec_opts + more_opts
367 try:
368 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
369 except FFmpegPostProcessorError as err:
370 raise AudioConversionError(err.msg)
371
8326b00a 372 @PostProcessor._restrict_to(images=False)
496c1923
PH
373 def run(self, information):
374 path = information['filepath']
1de75fa1 375 orig_ext = information['ext']
376
81a23040 377 if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
1de75fa1 378 self.to_screen('Skipping audio extraction since the file is already in a common audio format')
55b53b33 379 return [], information
496c1923
PH
380
381 filecodec = self.get_audio_codec(path)
382 if filecodec is None:
3aa578ca 383 raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
496c1923
PH
384
385 more_opts = []
386 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
387 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
388 # Lossless, but in another container
389 acodec = 'copy'
390 extension = 'm4a'
467d3c9a 391 more_opts = ['-bsf:a', 'aac_adtstoasc']
21bfcd3d 392 elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
496c1923
PH
393 # Lossless if possible
394 acodec = 'copy'
395 extension = filecodec
396 if filecodec == 'aac':
397 more_opts = ['-f', 'adts']
398 if filecodec == 'vorbis':
399 extension = 'ogg'
400 else:
401 # MP3 otherwise.
402 acodec = 'libmp3lame'
403 extension = 'mp3'
404 more_opts = []
405 if self._preferredquality is not None:
406 if int(self._preferredquality) < 10:
467d3c9a 407 more_opts += ['-q:a', self._preferredquality]
496c1923 408 else:
467d3c9a 409 more_opts += ['-b:a', self._preferredquality + 'k']
496c1923 410 else:
21bfcd3d
PH
411 # We convert the audio (lossy if codec is lossy)
412 acodec = ACODECS[self._preferredcodec]
496c1923
PH
413 extension = self._preferredcodec
414 more_opts = []
415 if self._preferredquality is not None:
416 # The opus codec doesn't support the -aq option
417 if int(self._preferredquality) < 10 and extension != 'opus':
467d3c9a 418 more_opts += ['-q:a', self._preferredquality]
496c1923 419 else:
467d3c9a 420 more_opts += ['-b:a', self._preferredquality + 'k']
496c1923
PH
421 if self._preferredcodec == 'aac':
422 more_opts += ['-f', 'adts']
423 if self._preferredcodec == 'm4a':
467d3c9a 424 more_opts += ['-bsf:a', 'aac_adtstoasc']
496c1923
PH
425 if self._preferredcodec == 'vorbis':
426 extension = 'ogg'
427 if self._preferredcodec == 'wav':
428 extension = 'wav'
429 more_opts += ['-f', 'wav']
430
3aa578ca 431 prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
496c1923 432 new_path = prefix + sep + extension
0b94dbb1 433
2273e2c5
PM
434 information['filepath'] = new_path
435 information['ext'] = extension
496c1923
PH
436
437 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
3089bc74
S
438 if (new_path == path
439 or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
1b77b347 440 self.to_screen('Post-process file %s exists, skipping' % new_path)
592e97e8 441 return [], information
496c1923
PH
442
443 try:
1b77b347 444 self.to_screen('Destination: ' + new_path)
ce81b141 445 self.run_ffmpeg(path, new_path, acodec, more_opts)
70a1165b
JMF
446 except AudioConversionError as e:
447 raise PostProcessingError(
448 'audio conversion failed: ' + e.msg)
449 except Exception:
450 raise PostProcessingError('error running ' + self.basename)
496c1923
PH
451
452 # Try to update the date time for extracted audio file.
453 if information.get('filetime') is not None:
dd29eb7f
S
454 self.try_utime(
455 new_path, time.time(), information['filetime'],
456 errnote='Cannot update utime of audio file')
496c1923 457
592e97e8 458 return [path], information
496c1923
PH
459
460
857f6313 461class FFmpegVideoConvertorPP(FFmpegPostProcessor):
81a23040 462 SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
463 FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
857f6313 464 _action = 'converting'
465
efe87a10 466 def __init__(self, downloader=None, preferedformat=None):
857f6313 467 super(FFmpegVideoConvertorPP, self).__init__(downloader)
06167fbb 468 self._preferedformats = preferedformat.lower().split('/')
efe87a10 469
857f6313 470 def _target_ext(self, source_ext):
06167fbb 471 for pair in self._preferedformats:
472 kv = pair.split('>')
857f6313 473 if len(kv) == 1 or kv[0].strip() == source_ext:
474 return kv[-1].strip()
06167fbb 475
857f6313 476 @staticmethod
477 def _options(target_ext):
478 if target_ext == 'avi':
479 return ['-c:v', 'libxvid', '-vtag', 'XVID']
480 return []
481
8326b00a 482 @PostProcessor._restrict_to(images=False)
857f6313 483 def run(self, information):
81a23040 484 path, source_ext = information['filepath'], information['ext'].lower()
485 target_ext = self._target_ext(source_ext)
06167fbb 486 _skip_msg = (
857f6313 487 'could not find a mapping for %s' if not target_ext
488 else 'already is in target format %s' if source_ext == target_ext
06167fbb 489 else None)
490 if _skip_msg:
81a23040 491 self.to_screen('Not %s media file "%s"; %s' % (self._action, path, _skip_msg % source_ext))
efe87a10 492 return [], information
06167fbb 493
06167fbb 494 prefix, sep, oldext = path.rpartition('.')
857f6313 495 outpath = prefix + sep + target_ext
496 self.to_screen('%s video from %s to %s; Destination: %s' % (self._action.title(), source_ext, target_ext, outpath))
497 self.run_ffmpeg(path, outpath, self._options(target_ext))
498
efe87a10 499 information['filepath'] = outpath
857f6313 500 information['format'] = information['ext'] = target_ext
efe87a10
FS
501 return [path], information
502
503
857f6313 504class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
505 _action = 'remuxing'
496c1923 506
857f6313 507 @staticmethod
508 def _options(target_ext):
509 options = ['-c', 'copy', '-map', '0', '-dn']
510 if target_ext in ['mp4', 'm4a', 'mov']:
511 options.extend(['-movflags', '+faststart'])
512 return options
496c1923
PH
513
514
515class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
cffab0ee 516 def __init__(self, downloader=None, already_have_subtitle=False):
517 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
518 self._already_have_subtitle = already_have_subtitle
519
8326b00a 520 @PostProcessor._restrict_to(images=False)
496c1923 521 def run(self, information):
40025ee2 522 if information['ext'] not in ('mp4', 'webm', 'mkv'):
1b77b347 523 self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
592e97e8 524 return [], information
c84dd8a9
JMF
525 subtitles = information.get('requested_subtitles')
526 if not subtitles:
1b77b347 527 self.to_screen('There aren\'t any subtitles to embed')
592e97e8 528 return [], information
496c1923 529
496c1923 530 filename = information['filepath']
40025ee2
S
531
532 ext = information['ext']
2412044c 533 sub_langs, sub_names, sub_filenames = [], [], []
40025ee2 534 webm_vtt_warn = False
06167fbb 535 mp4_ass_warn = False
40025ee2
S
536
537 for lang, sub_info in subtitles.items():
8e25d624 538 if not os.path.exists(information.get('filepath', '')):
539 self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
540 continue
40025ee2 541 sub_ext = sub_info['ext']
503d4a44 542 if sub_ext == 'json':
06167fbb 543 self.report_warning('JSON subtitles cannot be embedded')
503d4a44 544 elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
40025ee2 545 sub_langs.append(lang)
2412044c 546 sub_names.append(sub_info.get('name'))
dcf64d43 547 sub_filenames.append(sub_info['filepath'])
40025ee2
S
548 else:
549 if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
550 webm_vtt_warn = True
06167fbb 551 self.report_warning('Only WebVTT subtitles can be embedded in webm files')
552 if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
553 mp4_ass_warn = True
554 self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
40025ee2
S
555
556 if not sub_langs:
557 return [], information
558
14523ed9 559 input_files = [filename] + sub_filenames
496c1923 560
e205db3b 561 opts = [
e0da59fe 562 '-c', 'copy', '-map', '0', '-dn',
e205db3b
JMF
563 # Don't copy the existing subtitles, we may be running the
564 # postprocessor a second time
565 '-map', '-0:s',
7f903dd8
T
566 # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
567 # https://trac.ffmpeg.org/ticket/6016)
568 '-map', '-0:d',
e205db3b 569 ]
083c1bb9
N
570 if information['ext'] == 'mp4':
571 opts += ['-c:s', 'mov_text']
2412044c 572 for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
2875cf01 573 opts.extend(['-map', '%d:0' % (i + 1)])
04fb6928
S
574 lang_code = ISO639Utils.short2long(lang) or lang
575 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
2412044c 576 if name:
577 opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name,
578 '-metadata:s:s:%d' % i, 'title=%s' % name])
496c1923 579
2875cf01 580 temp_filename = prepend_extension(filename, 'temp')
06167fbb 581 self.to_screen('Embedding subtitles in "%s"' % filename)
496c1923 582 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
d75201a8 583 os.replace(temp_filename, filename)
496c1923 584
cffab0ee 585 files_to_delete = [] if self._already_have_subtitle else sub_filenames
586 return files_to_delete, information
496c1923
PH
587
588
589class FFmpegMetadataPP(FFmpegPostProcessor):
7dde84f3 590
7a340e0d
NA
591 def __init__(self, downloader, add_metadata=True, add_chapters=True):
592 FFmpegPostProcessor.__init__(self, downloader)
593 self._add_metadata = add_metadata
594 self._add_chapters = add_chapters
595
7dde84f3 596 @staticmethod
597 def _options(target_ext):
598 yield from ('-map', '0', '-dn')
599 if target_ext == 'm4a':
600 yield from ('-vn', '-acodec', 'copy')
601 else:
602 yield from ('-c', 'copy')
603
8326b00a 604 @PostProcessor._restrict_to(images=False)
496c1923 605 def run(self, info):
7a340e0d
NA
606 filename, metadata_filename = info['filepath'], None
607 options = []
608 if self._add_chapters and info.get('chapters'):
609 metadata_filename = replace_extension(filename, 'meta')
610 options.extend(self._get_chapter_opts(info['chapters'], metadata_filename))
611 if self._add_metadata:
612 options.extend(self._get_metadata_opts(info))
613
614 if not options:
615 self.to_screen('There isn\'t any metadata to add')
616 return [], info
617
618 temp_filename = prepend_extension(filename, 'temp')
619 self.to_screen('Adding metadata to "%s"' % filename)
620 self.run_ffmpeg_multiple_files(
621 (filename, metadata_filename), temp_filename,
622 itertools.chain(self._options(info['ext']), *options))
623 if metadata_filename:
624 os.remove(metadata_filename)
625 os.replace(temp_filename, filename)
626 return [], info
627
628 @staticmethod
629 def _get_chapter_opts(chapters, metadata_filename):
630 with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
631 def ffmpeg_escape(text):
632 return re.sub(r'([\\=;#\n])', r'\\\1', text)
633
634 metadata_file_content = ';FFMETADATA1\n'
635 for chapter in chapters:
636 metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
637 metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
638 metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
639 chapter_title = chapter.get('title')
640 if chapter_title:
641 metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
642 f.write(metadata_file_content)
643 yield ('-map_metadata', '1')
644
645 def _get_metadata_opts(self, info):
496c1923 646 metadata = {}
4bd143a3
S
647
648 def add(meta_list, info_list=None):
84601bb7 649 if not meta_list:
650 return
6606817a 651 for info_f in variadic(info_list or meta_list):
84601bb7 652 if isinstance(info.get(info_f), (compat_str, compat_numeric_types)):
6606817a 653 for meta_f in variadic(meta_list):
4bd143a3
S
654 metadata[meta_f] = info[info_f]
655 break
656
2791e80b
S
657 # See [1-4] for some info on media metadata/metadata supported
658 # by ffmpeg.
659 # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
660 # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
661 # 3. https://kodi.wiki/view/Video_file_tagging
2791e80b 662
4bd143a3
S
663 add('title', ('track', 'title'))
664 add('date', 'upload_date')
cd9b384c 665 add(('description', 'synopsis'), 'description')
666 add(('purl', 'comment'), 'webpage_url')
4bd143a3
S
667 add('track', 'track_number')
668 add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
669 add('genre')
670 add('album')
671 add('album_artist')
672 add('disc', 'disc_number')
2791e80b
S
673 add('show', 'series')
674 add('season_number')
675 add('episode_id', ('episode', 'episode_id'))
676 add('episode_sort', 'episode_number')
496c1923 677
84601bb7 678 prefix = 'meta_'
679 for key in filter(lambda k: k.startswith(prefix), info.keys()):
680 add(key[len(prefix):], key)
681
7a340e0d
NA
682 for name, value in metadata.items():
683 yield ('-metadata', f'{name}={value}')
39c68260 684
7dde84f3 685 stream_idx = 0
686 for fmt in info.get('requested_formats') or []:
687 stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
688 if fmt.get('language'):
689 lang = ISO639Utils.short2long(fmt['language']) or fmt['language']
7a340e0d
NA
690 for i in range(stream_count):
691 yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang)
7dde84f3 692 stream_idx += stream_count
496c1923 693
41712218 694 if ('no-attach-info-json' not in self.get_param('compat_opts', [])
695 and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')):
7a340e0d 696 old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
06167fbb 697 if old_stream is not None:
7a340e0d 698 yield ('-map', '-0:%d' % old_stream)
06167fbb 699 new_stream -= 1
700
7a340e0d
NA
701 yield ('-attach', info['__infojson_filename'],
702 '-metadata:s:%d' % new_stream, 'mimetype=application/json')
496c1923
PH
703
704
705class FFmpegMergerPP(FFmpegPostProcessor):
8326b00a 706 @PostProcessor._restrict_to(images=False)
496c1923
PH
707 def run(self, info):
708 filename = info['filepath']
5b5fbc08 709 temp_filename = prepend_extension(filename, 'temp')
d03cfdce 710 args = ['-c', 'copy']
711 for (i, fmt) in enumerate(info['requested_formats']):
712 if fmt.get('acodec') != 'none':
713 args.extend(['-map', '%u:a:0' % (i)])
714 if fmt.get('vcodec') != 'none':
715 args.extend(['-map', '%u:v:0' % (i)])
1b77b347 716 self.to_screen('Merging formats into "%s"' % filename)
5b5fbc08
JMF
717 self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
718 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
d47aeb22 719 return info['__files_to_merge'], info
496c1923 720
13763ce5
S
721 def can_merge(self):
722 # TODO: figure out merge-capable ffmpeg version
723 if self.basename != 'avconv':
724 return True
725
726 required_version = '10-0'
727 if is_outdated_version(
728 self._versions[self.basename], required_version):
729 warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
7a5c1cfe 730 'yt-dlp will download single file media. '
13763ce5
S
731 'Update %s to version %s or newer to fix this.') % (
732 self.basename, self.basename, required_version)
f446cc66 733 self.report_warning(warning)
13763ce5
S
734 return False
735 return True
736
0c14e2fb 737
fd7cfb64 738class FFmpegFixupPostProcessor(FFmpegPostProcessor):
739 def _fixup(self, msg, filename, options):
6271f1ca
PH
740 temp_filename = prepend_extension(filename, 'temp')
741
f89b3e2d 742 self.to_screen(f'{msg} of "{filename}"')
6271f1ca
PH
743 self.run_ffmpeg(filename, temp_filename, options)
744
d75201a8 745 os.replace(temp_filename, filename)
6271f1ca 746
fd7cfb64 747
748class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
749 @PostProcessor._restrict_to(images=False, audio=False)
750 def run(self, info):
751 stretched_ratio = info.get('stretched_ratio')
752 if stretched_ratio not in (None, 1):
753 self._fixup('Fixing aspect ratio', info['filepath'], [
754 '-c', 'copy', '-map', '0', '-dn', '-aspect', '%f' % stretched_ratio])
592e97e8 755 return [], info
62cd676c
PH
756
757
fd7cfb64 758class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
8326b00a 759 @PostProcessor._restrict_to(images=False, video=False)
62cd676c 760 def run(self, info):
fd7cfb64 761 if info.get('container') == 'm4a_dash':
762 self._fixup('Correcting container', info['filepath'], [
763 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4'])
592e97e8 764 return [], info
e9fade72
JMF
765
766
fd7cfb64 767class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
8326b00a 768 @PostProcessor._restrict_to(images=False)
f17f8651 769 def run(self, info):
fd7cfb64 770 if self.get_audio_codec(info['filepath']) == 'aac':
771 self._fixup('Fixing malformed AAC bitstream', info['filepath'], [
772 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
f17f8651 773 return [], info
774
775
e36d50c5 776class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
777
778 def __init__(self, downloader=None, trim=0.001):
779 # "trim" should be used when the video contains unintended packets
780 super(FFmpegFixupTimestampPP, self).__init__(downloader)
781 assert isinstance(trim, (int, float))
782 self.trim = str(trim)
783
784 @PostProcessor._restrict_to(images=False)
785 def run(self, info):
786 required_version = '4.4'
787 if is_outdated_version(self._versions[self.basename], required_version):
788 self.report_warning(
789 'A re-encode is needed to fix timestamps in older versions of ffmpeg. '
790 f'Please install ffmpeg {required_version} or later to fixup without re-encoding')
791 opts = ['-vf', 'setpts=PTS-STARTPTS']
792 else:
793 opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
794 self._fixup('Fixing frame timestamp', info['filepath'], opts + ['-map', '0', '-dn', '-ss', self.trim])
795 return [], info
796
797
798class FFmpegFixupDurationPP(FFmpegFixupPostProcessor):
799 @PostProcessor._restrict_to(images=False)
800 def run(self, info):
801 self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn'])
802 return [], info
803
804
e9fade72 805class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
81a23040 806 SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
807
e9fade72
JMF
808 def __init__(self, downloader=None, format=None):
809 super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
810 self.format = format
811
812 def run(self, info):
813 subs = info.get('requested_subtitles')
e9fade72
JMF
814 new_ext = self.format
815 new_format = new_ext
816 if new_format == 'vtt':
817 new_format = 'webvtt'
818 if subs is None:
1b77b347 819 self.to_screen('There aren\'t any subtitles to convert')
592e97e8 820 return [], info
1b77b347 821 self.to_screen('Converting subtitles')
e04398e3 822 sub_filenames = []
e9fade72
JMF
823 for lang, sub in subs.items():
824 ext = sub['ext']
825 if ext == new_ext:
1b77b347 826 self.to_screen('Subtitle file for %s is already in the requested format' % new_ext)
e9fade72 827 continue
503d4a44 828 elif ext == 'json':
1b77b347 829 self.to_screen(
830 'You have requested to convert json subtitles into another format, '
503d4a44 831 'which is currently not possible')
832 continue
dcf64d43 833 old_file = sub['filepath']
e04398e3 834 sub_filenames.append(old_file)
dcf64d43 835 new_file = replace_extension(old_file, new_ext)
bf6427d2 836
40fcba5e 837 if ext in ('dfxp', 'ttml', 'tt'):
f446cc66 838 self.report_warning(
1b77b347 839 'You have requested to convert dfxp (TTML) subtitles into another format, '
bf6427d2
YCH
840 'which results in style information loss')
841
e04398e3 842 dfxp_file = old_file
dcf64d43 843 srt_file = replace_extension(old_file, 'srt')
bf6427d2 844
3869028f 845 with open(dfxp_file, 'rb') as f:
bf6427d2
YCH
846 srt_data = dfxp2srt(f.read())
847
848 with io.open(srt_file, 'wt', encoding='utf-8') as f:
849 f.write(srt_data)
7e62c2eb 850 old_file = srt_file
bf6427d2 851
bf6427d2
YCH
852 subs[lang] = {
853 'ext': 'srt',
dcf64d43 854 'data': srt_data,
855 'filepath': srt_file,
bf6427d2
YCH
856 }
857
858 if new_ext == 'srt':
859 continue
7b8b007c
JMF
860 else:
861 sub_filenames.append(srt_file)
bf6427d2 862
e04398e3 863 self.run_ffmpeg(old_file, new_file, ['-f', new_format])
e9fade72
JMF
864
865 with io.open(new_file, 'rt', encoding='utf-8') as f:
866 subs[lang] = {
3547d265 867 'ext': new_ext,
e9fade72 868 'data': f.read(),
dcf64d43 869 'filepath': new_file,
e9fade72
JMF
870 }
871
dcf64d43 872 info['__files_to_move'][new_file] = replace_extension(
37242e56 873 info['__files_to_move'][sub['filepath']], new_ext)
dcf64d43 874
e04398e3 875 return sub_filenames, info
72755351 876
877
878class FFmpegSplitChaptersPP(FFmpegPostProcessor):
7a340e0d
NA
879 def __init__(self, downloader, force_keyframes=False):
880 FFmpegPostProcessor.__init__(self, downloader)
881 self._force_keyframes = force_keyframes
72755351 882
883 def _prepare_filename(self, number, chapter, info):
884 info = info.copy()
885 info.update({
886 'section_number': number,
887 'section_title': chapter.get('title'),
888 'section_start': chapter.get('start_time'),
889 'section_end': chapter.get('end_time'),
890 })
891 return self._downloader.prepare_filename(info, 'chapter')
892
893 def _ffmpeg_args_for_chapter(self, number, chapter, info):
894 destination = self._prepare_filename(number, chapter, info)
895 if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
896 return
897
dcf64d43 898 chapter['filepath'] = destination
72755351 899 self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
900 return (
901 destination,
902 ['-ss', compat_str(chapter['start_time']),
a94bfd6c 903 '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
72755351 904
8326b00a 905 @PostProcessor._restrict_to(images=False)
72755351 906 def run(self, info):
907 chapters = info.get('chapters') or []
908 if not chapters:
7a340e0d 909 self.to_screen('Chapter information is unavailable')
72755351 910 return [], info
911
7a340e0d
NA
912 in_file = info['filepath']
913 if self._force_keyframes and len(chapters) > 1:
914 in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters))
72755351 915 self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
916 for idx, chapter in enumerate(chapters):
917 destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
7a340e0d
NA
918 self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])])
919 if in_file != info['filepath']:
920 os.remove(in_file)
72755351 921 return [], info
8fa43c73 922
923
924class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
81a23040 925 SUPPORTED_EXTS = ('jpg', 'png')
926
8fa43c73 927 def __init__(self, downloader=None, format=None):
928 super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
929 self.format = format
930
931 @staticmethod
932 def is_webp(path):
933 with open(encodeFilename(path), 'rb') as f:
934 b = f.read(12)
935 return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
936
937 def fixup_webp(self, info, idx=-1):
938 thumbnail_filename = info['thumbnails'][idx]['filepath']
939 _, thumbnail_ext = os.path.splitext(thumbnail_filename)
940 if thumbnail_ext:
941 thumbnail_ext = thumbnail_ext[1:].lower()
942 if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
943 self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
944 webp_filename = replace_extension(thumbnail_filename, 'webp')
d75201a8 945 os.replace(thumbnail_filename, webp_filename)
8fa43c73 946 info['thumbnails'][idx]['filepath'] = webp_filename
947 info['__files_to_move'][webp_filename] = replace_extension(
948 info['__files_to_move'].pop(thumbnail_filename), 'webp')
949
81a23040 950 @staticmethod
951 def _options(target_ext):
952 if target_ext == 'jpg':
953 return ['-bsf:v', 'mjpeg2jpeg']
954 return []
955
956 def convert_thumbnail(self, thumbnail_filename, target_ext):
81a23040 957 thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
337e0c62 958
959 self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
960 self.real_run_ffmpeg(
961 [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
962 [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
a927acb1 963 return thumbnail_conv_filename
8fa43c73 964
965 def run(self, info):
8fa43c73 966 files_to_delete = []
967 has_thumbnail = False
968
969 for idx, thumbnail_dict in enumerate(info['thumbnails']):
970 if 'filepath' not in thumbnail_dict:
971 continue
972 has_thumbnail = True
973 self.fixup_webp(info, idx)
974 original_thumbnail = thumbnail_dict['filepath']
975 _, thumbnail_ext = os.path.splitext(original_thumbnail)
976 if thumbnail_ext:
977 thumbnail_ext = thumbnail_ext[1:].lower()
15a4fd53 978 if thumbnail_ext == 'jpeg':
979 thumbnail_ext = 'jpg'
8fa43c73 980 if thumbnail_ext == self.format:
981 self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
982 continue
983 thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
984 files_to_delete.append(original_thumbnail)
985 info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
986 info['__files_to_move'][original_thumbnail], self.format)
987
988 if not has_thumbnail:
989 self.to_screen('There aren\'t any thumbnails to convert')
990 return files_to_delete, info