]> jfr.im git - yt-dlp.git/blame - yt_dlp/postprocessor/ffmpeg.py
[docs,cleanup] Some minor refactoring and improve docs
[yt-dlp.git] / yt_dlp / postprocessor / ffmpeg.py
CommitLineData
3aa578ca
PH
1from __future__ import unicode_literals
2
e9fade72 3import io
7dde84f3 4import itertools
496c1923
PH
5import os
6import subprocess
496c1923 7import time
fa2a36d9 8import re
06167fbb 9import json
496c1923 10
496c1923
PH
11from .common import AudioConversionError, PostProcessor
12
84601bb7 13from ..compat import compat_str, compat_numeric_types
8c25f81b 14from ..utils import (
7a340e0d 15 dfxp2srt,
f07b74fc 16 encodeArgument,
496c1923 17 encodeFilename,
165efb82 18 float_or_none,
95807118 19 get_exe_version,
48844745 20 is_outdated_version,
7a340e0d
NA
21 ISO639Utils,
22 orderedSet,
496c1923
PH
23 PostProcessingError,
24 prepend_extension,
f5b1bca9 25 process_communicate_or_kill,
06167fbb 26 replace_extension,
7a340e0d 27 shell_quote,
324ad820 28 traverse_obj,
6606817a 29 variadic,
496c1923
PH
30)
31
32
a755f825 33EXT_TO_OUT_FORMATS = {
21bfcd3d
PH
34 'aac': 'adts',
35 'flac': 'flac',
36 'm4a': 'ipod',
37 'mka': 'matroska',
38 'mkv': 'matroska',
39 'mpg': 'mpeg',
40 'ogv': 'ogg',
41 'ts': 'mpegts',
42 'wma': 'asf',
43 'wmv': 'asf',
44}
45ACODECS = {
46 'mp3': 'libmp3lame',
47 'aac': 'aac',
48 'flac': 'flac',
49 'm4a': 'aac',
d2ae7e24 50 'opus': 'libopus',
21bfcd3d
PH
51 'vorbis': 'libvorbis',
52 'wav': None,
a755f825 53}
54
55
496c1923
PH
56class FFmpegPostProcessorError(PostProcessingError):
57 pass
58
d799b47b 59
496c1923 60class FFmpegPostProcessor(PostProcessor):
d47aeb22 61 def __init__(self, downloader=None):
496c1923 62 PostProcessor.__init__(self, downloader)
73fac4e9 63 self._determine_executables()
496c1923 64
48844745 65 def check_version(self):
f740fae2 66 if not self.available:
beb4b92a 67 raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
48844745 68
65bf37ef 69 required_version = '10-0' if self.basename == 'avconv' else '1.0'
48844745 70 if is_outdated_version(
73fac4e9 71 self._versions[self.basename], required_version):
3aa578ca 72 warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
73fac4e9 73 self.basename, self.basename, required_version)
f446cc66 74 self.report_warning(warning)
48844745 75
496c1923 76 @staticmethod
73fac4e9
PH
77 def get_versions(downloader=None):
78 return FFmpegPostProcessor(downloader)._versions
6271f1ca 79
73fac4e9
PH
80 def _determine_executables(self):
81 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
d4a24f40 82 prefer_ffmpeg = True
73fac4e9 83
a64646e4
RA
84 def get_ffmpeg_version(path):
85 ver = get_exe_version(path, args=['-version'])
86 if ver:
87 regexs = [
cbdc688c 88 r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
5caa531a 89 r'n([0-9.]+)$', # Arch Linux
cbdc688c 90 # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
a64646e4
RA
91 ]
92 for regex in regexs:
93 mobj = re.match(regex, ver)
94 if mobj:
95 ver = mobj.group(1)
96 return ver
97
73fac4e9
PH
98 self.basename = None
99 self.probe_basename = None
100
101 self._paths = None
102 self._versions = None
103 if self._downloader:
f446cc66 104 prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
105 location = self.get_param('ffmpeg_location')
73fac4e9
PH
106 if location is not None:
107 if not os.path.exists(location):
f446cc66 108 self.report_warning(
73fac4e9 109 'ffmpeg-location %s does not exist! '
e4172ac9 110 'Continuing without ffmpeg.' % (location))
73fac4e9
PH
111 self._versions = {}
112 return
8c0ae192 113 elif os.path.isdir(location):
114 dirname, basename = location, None
115 else:
73fac4e9 116 basename = os.path.splitext(os.path.basename(location))[0]
89efdc15 117 basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
118 dirname = os.path.dirname(os.path.abspath(location))
73fac4e9
PH
119 if basename in ('ffmpeg', 'ffprobe'):
120 prefer_ffmpeg = True
121
122 self._paths = dict(
89efdc15 123 (p, os.path.join(dirname, p)) for p in programs)
8c0ae192 124 if basename:
125 self._paths[basename] = location
73fac4e9 126 self._versions = dict(
a64646e4 127 (p, get_ffmpeg_version(self._paths[p])) for p in programs)
73fac4e9
PH
128 if self._versions is None:
129 self._versions = dict(
a64646e4 130 (p, get_ffmpeg_version(p)) for p in programs)
73fac4e9
PH
131 self._paths = dict((p, p) for p in programs)
132
d4a24f40 133 if prefer_ffmpeg is False:
d28b5171 134 prefs = ('avconv', 'ffmpeg')
d4a24f40
S
135 else:
136 prefs = ('ffmpeg', 'avconv')
d28b5171
PH
137 for p in prefs:
138 if self._versions[p]:
73fac4e9
PH
139 self.basename = p
140 break
76b1bd67 141
d4a24f40 142 if prefer_ffmpeg is False:
1a253e13 143 prefs = ('avprobe', 'ffprobe')
d4a24f40
S
144 else:
145 prefs = ('ffprobe', 'avprobe')
1a253e13
PH
146 for p in prefs:
147 if self._versions[p]:
73fac4e9
PH
148 self.probe_basename = p
149 break
150
f740fae2 151 @property
73fac4e9
PH
152 def available(self):
153 return self.basename is not None
1a253e13 154
73fac4e9
PH
155 @property
156 def executable(self):
157 return self._paths[self.basename]
158
3da4b313
JMF
159 @property
160 def probe_available(self):
161 return self.probe_basename is not None
162
73fac4e9
PH
163 @property
164 def probe_executable(self):
165 return self._paths[self.probe_basename]
76b1bd67 166
30d9e209 167 def get_audio_codec(self, path):
eb35b163 168 if not self.probe_available and not self.available:
beb4b92a 169 raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
30d9e209 170 try:
eb35b163
RA
171 if self.probe_available:
172 cmd = [
173 encodeFilename(self.probe_executable, True),
174 encodeArgument('-show_streams')]
175 else:
176 cmd = [
177 encodeFilename(self.executable, True),
178 encodeArgument('-i')]
179 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
f446cc66 180 self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
eb35b163
RA
181 handle = subprocess.Popen(
182 cmd, stderr=subprocess.PIPE,
183 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
f5b1bca9 184 stdout_data, stderr_data = process_communicate_or_kill(handle)
eb35b163
RA
185 expected_ret = 0 if self.probe_available else 1
186 if handle.wait() != expected_ret:
30d9e209
RA
187 return None
188 except (IOError, OSError):
189 return None
eb35b163
RA
190 output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
191 if self.probe_available:
192 audio_codec = None
193 for line in output.split('\n'):
194 if line.startswith('codec_name='):
195 audio_codec = line.split('=')[1].strip()
196 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
197 return audio_codec
198 else:
199 # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
200 mobj = re.search(
201 r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
202 output)
203 if mobj:
204 return mobj.group(1)
30d9e209
RA
205 return None
206
06167fbb 207 def get_metadata_object(self, path, opts=[]):
208 if self.probe_basename != 'ffprobe':
209 if self.probe_available:
210 self.report_warning('Only ffprobe is supported for metadata extraction')
beb4b92a 211 raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
06167fbb 212 self.check_version()
213
214 cmd = [
215 encodeFilename(self.probe_executable, True),
216 encodeArgument('-hide_banner'),
217 encodeArgument('-show_format'),
218 encodeArgument('-show_streams'),
219 encodeArgument('-print_format'),
220 encodeArgument('json'),
221 ]
222
223 cmd += opts
224 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
06869367 225 self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
06167fbb 226 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
227 stdout, stderr = p.communicate()
228 return json.loads(stdout.decode('utf-8', 'replace'))
229
230 def get_stream_number(self, path, keys, value):
231 streams = self.get_metadata_object(path)['streams']
232 num = next(
324ad820 233 (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value),
06167fbb 234 None)
235 return num, len(streams)
236
165efb82 237 def _get_real_video_duration(self, info, fatal=True):
238 try:
239 if '_real_duration' not in info:
240 info['_real_duration'] = float_or_none(
241 traverse_obj(self.get_metadata_object(info['filepath']), ('format', 'duration')))
242 if not info['_real_duration']:
243 raise PostProcessingError('ffprobe returned empty duration')
244 except PostProcessingError as e:
245 if fatal:
246 raise PostProcessingError(f'Unable to determine video duration; {e}')
247 return info.setdefault('_real_duration', None)
248
249 def _duration_mismatch(self, d1, d2):
250 if not d1 or not d2:
251 return None
252 return abs(d1 - d2) > 1
253
00034c14 254 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
e92caff5 255 return self.real_run_ffmpeg(
256 [(path, []) for path in input_paths],
00034c14 257 [(out_path, opts)], **kwargs)
e92caff5 258
00034c14 259 def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
48844745 260 self.check_version()
496c1923 261
52afb2ac 262 oldest_mtime = min(
7dde84f3 263 os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
43bc8890 264
250a938d 265 cmd = [encodeFilename(self.executable, True), encodeArgument('-y'), encodeArgument('-probesize'), encodeArgument('max')]
ce52c7c1
S
266 # avconv does not have repeat option
267 if self.basename == 'ffmpeg':
268 cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
5b1ecbb3 269
e92caff5 270 def make_args(file, args, name, number):
271 keys = ['_%s%d' % (name, number), '_%s' % name]
272 if name == 'o' and number == 1:
273 keys.append('')
274 args += self._configuration_args(self.basename, keys)
275 if name == 'i':
276 args.append('-i')
5b1ecbb3 277 return (
e92caff5 278 [encodeArgument(arg) for arg in args]
5b1ecbb3 279 + [encodeFilename(self._ffmpeg_filename_argument(file), True)])
280
e92caff5 281 for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
7dde84f3 282 cmd += itertools.chain.from_iterable(
283 make_args(path, list(opts), arg_type, i + 1)
284 for i, (path, opts) in enumerate(path_opts) if path)
496c1923 285
f446cc66 286 self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
cffcbc02 287 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
f5b1bca9 288 stdout, stderr = process_communicate_or_kill(p)
00034c14 289 if p.returncode not in variadic(expected_retcodes):
06167fbb 290 stderr = stderr.decode('utf-8', 'replace').strip()
b1940459 291 self.write_debug(stderr)
06167fbb 292 raise FFmpegPostProcessorError(stderr.split('\n')[-1])
e92caff5 293 for out_path, _ in output_path_opts:
7dde84f3 294 if out_path:
295 self.try_utime(out_path, oldest_mtime, oldest_mtime)
06167fbb 296 return stderr.decode('utf-8', 'replace')
cc55d088 297
00034c14 298 def run_ffmpeg(self, path, out_path, opts, **kwargs):
299 return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
496c1923 300
7a340e0d
NA
301 @staticmethod
302 def _ffmpeg_filename_argument(fn):
8a7bbd16
JMF
303 # Always use 'file:' because the filename may contain ':' (ffmpeg
304 # interprets that as a protocol) or can start with '-' (-- is broken in
305 # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
b9f2fdd3 306 # Also leave '-' intact in order not to break streaming to stdout.
06167fbb 307 if fn.startswith(('http://', 'https://')):
308 return fn
d868f43c 309 return 'file:' + fn if fn != '-' else fn
496c1923 310
7a340e0d
NA
311 @staticmethod
312 def _quote_for_ffmpeg(string):
313 # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping
314 # A sequence of '' produces '\'''\'';
315 # final replace removes the empty '' between \' \'.
316 string = string.replace("'", r"'\''").replace("'''", "'")
317 # Handle potential ' at string boundaries.
318 string = string[1:] if string[0] == "'" else "'" + string
319 return string[:-1] if string[-1] == "'" else string + "'"
320
321 def force_keyframes(self, filename, timestamps):
322 timestamps = orderedSet(timestamps)
323 if timestamps[0] == 0:
324 timestamps = timestamps[1:]
325 keyframe_file = prepend_extension(filename, 'keyframes.temp')
326 self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes')
327 self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join(
328 f'{t:.6f}' for t in timestamps)])
329 return keyframe_file
330
331 def concat_files(self, in_files, out_file, concat_opts=None):
332 """
333 Use concat demuxer to concatenate multiple files having identical streams.
334
335 Only inpoint, outpoint, and duration concat options are supported.
336 See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details
337 """
338 concat_file = f'{out_file}.concat'
339 self.write_debug(f'Writing concat spec to {concat_file}')
340 with open(concat_file, 'wt', encoding='utf-8') as f:
341 f.writelines(self._concat_spec(in_files, concat_opts))
342
343 out_flags = ['-c', 'copy']
344 if out_file.rpartition('.')[-1] in ('mp4', 'mov'):
345 # For some reason, '-c copy' is not enough to copy subtitles
346 out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart'])
347
348 try:
349 self.real_run_ffmpeg(
350 [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
351 [(out_file, out_flags)])
352 finally:
353 os.remove(concat_file)
354
355 @classmethod
356 def _concat_spec(cls, in_files, concat_opts=None):
357 if concat_opts is None:
358 concat_opts = [{}] * len(in_files)
359 yield 'ffconcat version 1.0\n'
360 for file, opts in zip(in_files, concat_opts):
361 yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n'
362 # Iterate explicitly to yield the following directives in order, ignoring the rest.
363 for directive in 'inpoint', 'outpoint', 'duration':
364 if directive in opts:
365 yield f'{directive} {opts[directive]}\n'
366
496c1923
PH
367
368class FFmpegExtractAudioPP(FFmpegPostProcessor):
81a23040 369 COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
370 SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav')
1de75fa1 371
496c1923
PH
372 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
373 FFmpegPostProcessor.__init__(self, downloader)
81a23040 374 self._preferredcodec = preferredcodec or 'best'
496c1923
PH
375 self._preferredquality = preferredquality
376 self._nopostoverwrites = nopostoverwrites
377
496c1923 378 def run_ffmpeg(self, path, out_path, codec, more_opts):
496c1923
PH
379 if codec is None:
380 acodec_opts = []
381 else:
382 acodec_opts = ['-acodec', codec]
383 opts = ['-vn'] + acodec_opts + more_opts
384 try:
385 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
386 except FFmpegPostProcessorError as err:
387 raise AudioConversionError(err.msg)
388
8326b00a 389 @PostProcessor._restrict_to(images=False)
496c1923
PH
390 def run(self, information):
391 path = information['filepath']
1de75fa1 392 orig_ext = information['ext']
393
81a23040 394 if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
1de75fa1 395 self.to_screen('Skipping audio extraction since the file is already in a common audio format')
55b53b33 396 return [], information
496c1923
PH
397
398 filecodec = self.get_audio_codec(path)
399 if filecodec is None:
3aa578ca 400 raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
496c1923
PH
401
402 more_opts = []
403 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
404 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
405 # Lossless, but in another container
406 acodec = 'copy'
407 extension = 'm4a'
467d3c9a 408 more_opts = ['-bsf:a', 'aac_adtstoasc']
21bfcd3d 409 elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
496c1923
PH
410 # Lossless if possible
411 acodec = 'copy'
412 extension = filecodec
413 if filecodec == 'aac':
414 more_opts = ['-f', 'adts']
415 if filecodec == 'vorbis':
416 extension = 'ogg'
417 else:
418 # MP3 otherwise.
419 acodec = 'libmp3lame'
420 extension = 'mp3'
421 more_opts = []
422 if self._preferredquality is not None:
423 if int(self._preferredquality) < 10:
467d3c9a 424 more_opts += ['-q:a', self._preferredquality]
496c1923 425 else:
467d3c9a 426 more_opts += ['-b:a', self._preferredquality + 'k']
496c1923 427 else:
21bfcd3d
PH
428 # We convert the audio (lossy if codec is lossy)
429 acodec = ACODECS[self._preferredcodec]
496c1923
PH
430 extension = self._preferredcodec
431 more_opts = []
432 if self._preferredquality is not None:
433 # The opus codec doesn't support the -aq option
434 if int(self._preferredquality) < 10 and extension != 'opus':
467d3c9a 435 more_opts += ['-q:a', self._preferredquality]
496c1923 436 else:
467d3c9a 437 more_opts += ['-b:a', self._preferredquality + 'k']
496c1923
PH
438 if self._preferredcodec == 'aac':
439 more_opts += ['-f', 'adts']
440 if self._preferredcodec == 'm4a':
467d3c9a 441 more_opts += ['-bsf:a', 'aac_adtstoasc']
496c1923
PH
442 if self._preferredcodec == 'vorbis':
443 extension = 'ogg'
444 if self._preferredcodec == 'wav':
445 extension = 'wav'
446 more_opts += ['-f', 'wav']
447
3aa578ca 448 prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
496c1923 449 new_path = prefix + sep + extension
0b94dbb1 450
2273e2c5
PM
451 information['filepath'] = new_path
452 information['ext'] = extension
496c1923
PH
453
454 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
3089bc74
S
455 if (new_path == path
456 or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
1b77b347 457 self.to_screen('Post-process file %s exists, skipping' % new_path)
592e97e8 458 return [], information
496c1923
PH
459
460 try:
1b77b347 461 self.to_screen('Destination: ' + new_path)
ce81b141 462 self.run_ffmpeg(path, new_path, acodec, more_opts)
70a1165b
JMF
463 except AudioConversionError as e:
464 raise PostProcessingError(
465 'audio conversion failed: ' + e.msg)
466 except Exception:
467 raise PostProcessingError('error running ' + self.basename)
496c1923
PH
468
469 # Try to update the date time for extracted audio file.
470 if information.get('filetime') is not None:
dd29eb7f
S
471 self.try_utime(
472 new_path, time.time(), information['filetime'],
473 errnote='Cannot update utime of audio file')
496c1923 474
592e97e8 475 return [path], information
496c1923
PH
476
477
857f6313 478class FFmpegVideoConvertorPP(FFmpegPostProcessor):
81a23040 479 SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
480 FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
e6f21b3d 481 _ACTION = 'converting'
857f6313 482
efe87a10 483 def __init__(self, downloader=None, preferedformat=None):
857f6313 484 super(FFmpegVideoConvertorPP, self).__init__(downloader)
06167fbb 485 self._preferedformats = preferedformat.lower().split('/')
efe87a10 486
857f6313 487 def _target_ext(self, source_ext):
06167fbb 488 for pair in self._preferedformats:
489 kv = pair.split('>')
857f6313 490 if len(kv) == 1 or kv[0].strip() == source_ext:
491 return kv[-1].strip()
06167fbb 492
857f6313 493 @staticmethod
494 def _options(target_ext):
495 if target_ext == 'avi':
496 return ['-c:v', 'libxvid', '-vtag', 'XVID']
497 return []
498
8326b00a 499 @PostProcessor._restrict_to(images=False)
e6f21b3d 500 def run(self, info):
501 filename, source_ext = info['filepath'], info['ext'].lower()
81a23040 502 target_ext = self._target_ext(source_ext)
06167fbb 503 _skip_msg = (
e6f21b3d 504 f'could not find a mapping for {source_ext}' if not target_ext
505 else f'already is in target format {source_ext}' if source_ext == target_ext
06167fbb 506 else None)
507 if _skip_msg:
e6f21b3d 508 self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}')
509 return [], info
06167fbb 510
e6f21b3d 511 outpath = replace_extension(filename, target_ext, source_ext)
512 self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}')
513 self.run_ffmpeg(filename, outpath, self._options(target_ext))
857f6313 514
e6f21b3d 515 info['filepath'] = outpath
516 info['format'] = info['ext'] = target_ext
517 return [filename], info
efe87a10
FS
518
519
857f6313 520class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
e6f21b3d 521 _ACTION = 'remuxing'
496c1923 522
857f6313 523 @staticmethod
524 def _options(target_ext):
525 options = ['-c', 'copy', '-map', '0', '-dn']
526 if target_ext in ['mp4', 'm4a', 'mov']:
527 options.extend(['-movflags', '+faststart'])
528 return options
496c1923
PH
529
530
531class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
cffab0ee 532 def __init__(self, downloader=None, already_have_subtitle=False):
533 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
534 self._already_have_subtitle = already_have_subtitle
535
8326b00a 536 @PostProcessor._restrict_to(images=False)
496c1923 537 def run(self, information):
40025ee2 538 if information['ext'] not in ('mp4', 'webm', 'mkv'):
1b77b347 539 self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
592e97e8 540 return [], information
c84dd8a9
JMF
541 subtitles = information.get('requested_subtitles')
542 if not subtitles:
1b77b347 543 self.to_screen('There aren\'t any subtitles to embed')
592e97e8 544 return [], information
496c1923 545
496c1923 546 filename = information['filepath']
1f2a268b 547 if information.get('duration') and self._duration_mismatch(
165efb82 548 self._get_real_video_duration(information, False), information['duration']):
549 self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch')
550 return [], information
40025ee2
S
551
552 ext = information['ext']
2412044c 553 sub_langs, sub_names, sub_filenames = [], [], []
40025ee2 554 webm_vtt_warn = False
06167fbb 555 mp4_ass_warn = False
40025ee2
S
556
557 for lang, sub_info in subtitles.items():
8e25d624 558 if not os.path.exists(information.get('filepath', '')):
559 self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
560 continue
40025ee2 561 sub_ext = sub_info['ext']
503d4a44 562 if sub_ext == 'json':
06167fbb 563 self.report_warning('JSON subtitles cannot be embedded')
503d4a44 564 elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
40025ee2 565 sub_langs.append(lang)
2412044c 566 sub_names.append(sub_info.get('name'))
dcf64d43 567 sub_filenames.append(sub_info['filepath'])
40025ee2
S
568 else:
569 if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
570 webm_vtt_warn = True
06167fbb 571 self.report_warning('Only WebVTT subtitles can be embedded in webm files')
572 if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
573 mp4_ass_warn = True
574 self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
40025ee2
S
575
576 if not sub_langs:
577 return [], information
578
14523ed9 579 input_files = [filename] + sub_filenames
496c1923 580
e205db3b 581 opts = [
e0da59fe 582 '-c', 'copy', '-map', '0', '-dn',
e205db3b
JMF
583 # Don't copy the existing subtitles, we may be running the
584 # postprocessor a second time
585 '-map', '-0:s',
7f903dd8
T
586 # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
587 # https://trac.ffmpeg.org/ticket/6016)
588 '-map', '-0:d',
e205db3b 589 ]
083c1bb9
N
590 if information['ext'] == 'mp4':
591 opts += ['-c:s', 'mov_text']
2412044c 592 for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
2875cf01 593 opts.extend(['-map', '%d:0' % (i + 1)])
04fb6928
S
594 lang_code = ISO639Utils.short2long(lang) or lang
595 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
2412044c 596 if name:
597 opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name,
598 '-metadata:s:s:%d' % i, 'title=%s' % name])
496c1923 599
2875cf01 600 temp_filename = prepend_extension(filename, 'temp')
06167fbb 601 self.to_screen('Embedding subtitles in "%s"' % filename)
496c1923 602 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
d75201a8 603 os.replace(temp_filename, filename)
496c1923 604
cffab0ee 605 files_to_delete = [] if self._already_have_subtitle else sub_filenames
606 return files_to_delete, information
496c1923
PH
607
608
609class FFmpegMetadataPP(FFmpegPostProcessor):
7dde84f3 610
7a340e0d
NA
611 def __init__(self, downloader, add_metadata=True, add_chapters=True):
612 FFmpegPostProcessor.__init__(self, downloader)
613 self._add_metadata = add_metadata
614 self._add_chapters = add_chapters
615
7dde84f3 616 @staticmethod
617 def _options(target_ext):
618 yield from ('-map', '0', '-dn')
619 if target_ext == 'm4a':
620 yield from ('-vn', '-acodec', 'copy')
621 else:
622 yield from ('-c', 'copy')
623
8326b00a 624 @PostProcessor._restrict_to(images=False)
496c1923 625 def run(self, info):
7a340e0d
NA
626 filename, metadata_filename = info['filepath'], None
627 options = []
628 if self._add_chapters and info.get('chapters'):
629 metadata_filename = replace_extension(filename, 'meta')
630 options.extend(self._get_chapter_opts(info['chapters'], metadata_filename))
631 if self._add_metadata:
632 options.extend(self._get_metadata_opts(info))
633
634 if not options:
635 self.to_screen('There isn\'t any metadata to add')
636 return [], info
637
638 temp_filename = prepend_extension(filename, 'temp')
639 self.to_screen('Adding metadata to "%s"' % filename)
640 self.run_ffmpeg_multiple_files(
641 (filename, metadata_filename), temp_filename,
642 itertools.chain(self._options(info['ext']), *options))
643 if metadata_filename:
644 os.remove(metadata_filename)
645 os.replace(temp_filename, filename)
646 return [], info
647
648 @staticmethod
649 def _get_chapter_opts(chapters, metadata_filename):
650 with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
651 def ffmpeg_escape(text):
652 return re.sub(r'([\\=;#\n])', r'\\\1', text)
653
654 metadata_file_content = ';FFMETADATA1\n'
655 for chapter in chapters:
656 metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
657 metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
658 metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
659 chapter_title = chapter.get('title')
660 if chapter_title:
661 metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
662 f.write(metadata_file_content)
663 yield ('-map_metadata', '1')
664
665 def _get_metadata_opts(self, info):
496c1923 666 metadata = {}
4bd143a3
S
667
668 def add(meta_list, info_list=None):
84601bb7 669 if not meta_list:
670 return
6606817a 671 for info_f in variadic(info_list or meta_list):
84601bb7 672 if isinstance(info.get(info_f), (compat_str, compat_numeric_types)):
6606817a 673 for meta_f in variadic(meta_list):
4bd143a3
S
674 metadata[meta_f] = info[info_f]
675 break
676
2791e80b
S
677 # See [1-4] for some info on media metadata/metadata supported
678 # by ffmpeg.
679 # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
680 # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
681 # 3. https://kodi.wiki/view/Video_file_tagging
2791e80b 682
4bd143a3
S
683 add('title', ('track', 'title'))
684 add('date', 'upload_date')
cd9b384c 685 add(('description', 'synopsis'), 'description')
686 add(('purl', 'comment'), 'webpage_url')
4bd143a3
S
687 add('track', 'track_number')
688 add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
689 add('genre')
690 add('album')
691 add('album_artist')
692 add('disc', 'disc_number')
2791e80b
S
693 add('show', 'series')
694 add('season_number')
695 add('episode_id', ('episode', 'episode_id'))
696 add('episode_sort', 'episode_number')
496c1923 697
84601bb7 698 prefix = 'meta_'
699 for key in filter(lambda k: k.startswith(prefix), info.keys()):
700 add(key[len(prefix):], key)
701
7a340e0d
NA
702 for name, value in metadata.items():
703 yield ('-metadata', f'{name}={value}')
39c68260 704
7dde84f3 705 stream_idx = 0
706 for fmt in info.get('requested_formats') or []:
707 stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
708 if fmt.get('language'):
709 lang = ISO639Utils.short2long(fmt['language']) or fmt['language']
7a340e0d
NA
710 for i in range(stream_count):
711 yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang)
7dde84f3 712 stream_idx += stream_count
496c1923 713
41712218 714 if ('no-attach-info-json' not in self.get_param('compat_opts', [])
715 and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')):
7a340e0d 716 old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
06167fbb 717 if old_stream is not None:
7a340e0d 718 yield ('-map', '-0:%d' % old_stream)
06167fbb 719 new_stream -= 1
720
7a340e0d
NA
721 yield ('-attach', info['__infojson_filename'],
722 '-metadata:s:%d' % new_stream, 'mimetype=application/json')
496c1923
PH
723
724
725class FFmpegMergerPP(FFmpegPostProcessor):
8326b00a 726 @PostProcessor._restrict_to(images=False)
496c1923
PH
727 def run(self, info):
728 filename = info['filepath']
5b5fbc08 729 temp_filename = prepend_extension(filename, 'temp')
d03cfdce 730 args = ['-c', 'copy']
50eff38c 731 audio_streams = 0
d03cfdce 732 for (i, fmt) in enumerate(info['requested_formats']):
733 if fmt.get('acodec') != 'none':
a21e0ab1 734 args.extend(['-map', f'{i}:a:0'])
735 if self.get_audio_codec(fmt['filepath']) == 'aac':
50eff38c 736 args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc'])
737 audio_streams += 1
d03cfdce 738 if fmt.get('vcodec') != 'none':
739 args.extend(['-map', '%u:v:0' % (i)])
1b77b347 740 self.to_screen('Merging formats into "%s"' % filename)
5b5fbc08
JMF
741 self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
742 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
d47aeb22 743 return info['__files_to_merge'], info
496c1923 744
13763ce5
S
745 def can_merge(self):
746 # TODO: figure out merge-capable ffmpeg version
747 if self.basename != 'avconv':
748 return True
749
750 required_version = '10-0'
751 if is_outdated_version(
752 self._versions[self.basename], required_version):
753 warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
7a5c1cfe 754 'yt-dlp will download single file media. '
13763ce5
S
755 'Update %s to version %s or newer to fix this.') % (
756 self.basename, self.basename, required_version)
f446cc66 757 self.report_warning(warning)
13763ce5
S
758 return False
759 return True
760
0c14e2fb 761
fd7cfb64 762class FFmpegFixupPostProcessor(FFmpegPostProcessor):
763 def _fixup(self, msg, filename, options):
6271f1ca
PH
764 temp_filename = prepend_extension(filename, 'temp')
765
f89b3e2d 766 self.to_screen(f'{msg} of "{filename}"')
6271f1ca
PH
767 self.run_ffmpeg(filename, temp_filename, options)
768
d75201a8 769 os.replace(temp_filename, filename)
6271f1ca 770
fd7cfb64 771
772class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
773 @PostProcessor._restrict_to(images=False, audio=False)
774 def run(self, info):
775 stretched_ratio = info.get('stretched_ratio')
776 if stretched_ratio not in (None, 1):
777 self._fixup('Fixing aspect ratio', info['filepath'], [
778 '-c', 'copy', '-map', '0', '-dn', '-aspect', '%f' % stretched_ratio])
592e97e8 779 return [], info
62cd676c
PH
780
781
fd7cfb64 782class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
8326b00a 783 @PostProcessor._restrict_to(images=False, video=False)
62cd676c 784 def run(self, info):
fd7cfb64 785 if info.get('container') == 'm4a_dash':
786 self._fixup('Correcting container', info['filepath'], [
787 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4'])
592e97e8 788 return [], info
e9fade72
JMF
789
790
fd7cfb64 791class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
8326b00a 792 @PostProcessor._restrict_to(images=False)
f17f8651 793 def run(self, info):
fd7cfb64 794 if self.get_audio_codec(info['filepath']) == 'aac':
795 self._fixup('Fixing malformed AAC bitstream', info['filepath'], [
796 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
f17f8651 797 return [], info
798
799
e36d50c5 800class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
801
802 def __init__(self, downloader=None, trim=0.001):
803 # "trim" should be used when the video contains unintended packets
804 super(FFmpegFixupTimestampPP, self).__init__(downloader)
805 assert isinstance(trim, (int, float))
806 self.trim = str(trim)
807
808 @PostProcessor._restrict_to(images=False)
809 def run(self, info):
810 required_version = '4.4'
811 if is_outdated_version(self._versions[self.basename], required_version):
812 self.report_warning(
813 'A re-encode is needed to fix timestamps in older versions of ffmpeg. '
814 f'Please install ffmpeg {required_version} or later to fixup without re-encoding')
815 opts = ['-vf', 'setpts=PTS-STARTPTS']
816 else:
817 opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
818 self._fixup('Fixing frame timestamp', info['filepath'], opts + ['-map', '0', '-dn', '-ss', self.trim])
819 return [], info
820
821
822class FFmpegFixupDurationPP(FFmpegFixupPostProcessor):
823 @PostProcessor._restrict_to(images=False)
824 def run(self, info):
825 self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn'])
826 return [], info
827
828
e9fade72 829class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
81a23040 830 SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
831
e9fade72
JMF
832 def __init__(self, downloader=None, format=None):
833 super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
834 self.format = format
835
836 def run(self, info):
837 subs = info.get('requested_subtitles')
e9fade72
JMF
838 new_ext = self.format
839 new_format = new_ext
840 if new_format == 'vtt':
841 new_format = 'webvtt'
842 if subs is None:
1b77b347 843 self.to_screen('There aren\'t any subtitles to convert')
592e97e8 844 return [], info
1b77b347 845 self.to_screen('Converting subtitles')
e04398e3 846 sub_filenames = []
e9fade72
JMF
847 for lang, sub in subs.items():
848 ext = sub['ext']
849 if ext == new_ext:
1b77b347 850 self.to_screen('Subtitle file for %s is already in the requested format' % new_ext)
e9fade72 851 continue
503d4a44 852 elif ext == 'json':
1b77b347 853 self.to_screen(
854 'You have requested to convert json subtitles into another format, '
503d4a44 855 'which is currently not possible')
856 continue
dcf64d43 857 old_file = sub['filepath']
e04398e3 858 sub_filenames.append(old_file)
dcf64d43 859 new_file = replace_extension(old_file, new_ext)
bf6427d2 860
40fcba5e 861 if ext in ('dfxp', 'ttml', 'tt'):
f446cc66 862 self.report_warning(
1b77b347 863 'You have requested to convert dfxp (TTML) subtitles into another format, '
bf6427d2
YCH
864 'which results in style information loss')
865
e04398e3 866 dfxp_file = old_file
dcf64d43 867 srt_file = replace_extension(old_file, 'srt')
bf6427d2 868
3869028f 869 with open(dfxp_file, 'rb') as f:
bf6427d2
YCH
870 srt_data = dfxp2srt(f.read())
871
872 with io.open(srt_file, 'wt', encoding='utf-8') as f:
873 f.write(srt_data)
7e62c2eb 874 old_file = srt_file
bf6427d2 875
bf6427d2
YCH
876 subs[lang] = {
877 'ext': 'srt',
dcf64d43 878 'data': srt_data,
879 'filepath': srt_file,
bf6427d2
YCH
880 }
881
882 if new_ext == 'srt':
883 continue
7b8b007c
JMF
884 else:
885 sub_filenames.append(srt_file)
bf6427d2 886
e04398e3 887 self.run_ffmpeg(old_file, new_file, ['-f', new_format])
e9fade72
JMF
888
889 with io.open(new_file, 'rt', encoding='utf-8') as f:
890 subs[lang] = {
3547d265 891 'ext': new_ext,
e9fade72 892 'data': f.read(),
dcf64d43 893 'filepath': new_file,
e9fade72
JMF
894 }
895
dcf64d43 896 info['__files_to_move'][new_file] = replace_extension(
37242e56 897 info['__files_to_move'][sub['filepath']], new_ext)
dcf64d43 898
e04398e3 899 return sub_filenames, info
72755351 900
901
902class FFmpegSplitChaptersPP(FFmpegPostProcessor):
7a340e0d
NA
903 def __init__(self, downloader, force_keyframes=False):
904 FFmpegPostProcessor.__init__(self, downloader)
905 self._force_keyframes = force_keyframes
72755351 906
907 def _prepare_filename(self, number, chapter, info):
908 info = info.copy()
909 info.update({
910 'section_number': number,
911 'section_title': chapter.get('title'),
912 'section_start': chapter.get('start_time'),
913 'section_end': chapter.get('end_time'),
914 })
915 return self._downloader.prepare_filename(info, 'chapter')
916
917 def _ffmpeg_args_for_chapter(self, number, chapter, info):
918 destination = self._prepare_filename(number, chapter, info)
919 if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
920 return
921
dcf64d43 922 chapter['filepath'] = destination
72755351 923 self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
924 return (
925 destination,
926 ['-ss', compat_str(chapter['start_time']),
a94bfd6c 927 '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
72755351 928
8326b00a 929 @PostProcessor._restrict_to(images=False)
72755351 930 def run(self, info):
931 chapters = info.get('chapters') or []
932 if not chapters:
7a340e0d 933 self.to_screen('Chapter information is unavailable')
72755351 934 return [], info
935
7a340e0d
NA
936 in_file = info['filepath']
937 if self._force_keyframes and len(chapters) > 1:
938 in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters))
72755351 939 self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
940 for idx, chapter in enumerate(chapters):
941 destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
7a340e0d
NA
942 self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])])
943 if in_file != info['filepath']:
944 os.remove(in_file)
72755351 945 return [], info
8fa43c73 946
947
948class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
81a23040 949 SUPPORTED_EXTS = ('jpg', 'png')
950
8fa43c73 951 def __init__(self, downloader=None, format=None):
952 super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
953 self.format = format
954
955 @staticmethod
956 def is_webp(path):
957 with open(encodeFilename(path), 'rb') as f:
958 b = f.read(12)
959 return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
960
961 def fixup_webp(self, info, idx=-1):
962 thumbnail_filename = info['thumbnails'][idx]['filepath']
963 _, thumbnail_ext = os.path.splitext(thumbnail_filename)
964 if thumbnail_ext:
965 thumbnail_ext = thumbnail_ext[1:].lower()
966 if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
967 self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
968 webp_filename = replace_extension(thumbnail_filename, 'webp')
d75201a8 969 os.replace(thumbnail_filename, webp_filename)
8fa43c73 970 info['thumbnails'][idx]['filepath'] = webp_filename
971 info['__files_to_move'][webp_filename] = replace_extension(
972 info['__files_to_move'].pop(thumbnail_filename), 'webp')
973
81a23040 974 @staticmethod
975 def _options(target_ext):
976 if target_ext == 'jpg':
977 return ['-bsf:v', 'mjpeg2jpeg']
978 return []
979
980 def convert_thumbnail(self, thumbnail_filename, target_ext):
81a23040 981 thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
337e0c62 982
983 self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
984 self.real_run_ffmpeg(
985 [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
986 [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
a927acb1 987 return thumbnail_conv_filename
8fa43c73 988
989 def run(self, info):
8fa43c73 990 files_to_delete = []
991 has_thumbnail = False
992
993 for idx, thumbnail_dict in enumerate(info['thumbnails']):
994 if 'filepath' not in thumbnail_dict:
995 continue
996 has_thumbnail = True
997 self.fixup_webp(info, idx)
998 original_thumbnail = thumbnail_dict['filepath']
999 _, thumbnail_ext = os.path.splitext(original_thumbnail)
1000 if thumbnail_ext:
1001 thumbnail_ext = thumbnail_ext[1:].lower()
15a4fd53 1002 if thumbnail_ext == 'jpeg':
1003 thumbnail_ext = 'jpg'
8fa43c73 1004 if thumbnail_ext == self.format:
1005 self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
1006 continue
1007 thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
1008 files_to_delete.append(original_thumbnail)
1009 info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
1010 info['__files_to_move'][original_thumbnail], self.format)
1011
1012 if not has_thumbnail:
1013 self.to_screen('There aren\'t any thumbnails to convert')
1014 return files_to_delete, info