]> jfr.im git - yt-dlp.git/blame - yt_dlp/postprocessor/ffmpeg.py
[reddit] Workaround for 429 by redirecting to old.reddit.com
[yt-dlp.git] / yt_dlp / postprocessor / ffmpeg.py
CommitLineData
3aa578ca
PH
1from __future__ import unicode_literals
2
e9fade72 3import io
7dde84f3 4import itertools
496c1923
PH
5import os
6import subprocess
496c1923 7import time
fa2a36d9 8import re
06167fbb 9import json
496c1923 10
496c1923
PH
11from .common import AudioConversionError, PostProcessor
12
84601bb7 13from ..compat import compat_str, compat_numeric_types
8c25f81b 14from ..utils import (
7a340e0d 15 dfxp2srt,
f07b74fc 16 encodeArgument,
496c1923 17 encodeFilename,
165efb82 18 float_or_none,
95807118 19 get_exe_version,
48844745 20 is_outdated_version,
7a340e0d
NA
21 ISO639Utils,
22 orderedSet,
496c1923
PH
23 PostProcessingError,
24 prepend_extension,
f5b1bca9 25 process_communicate_or_kill,
06167fbb 26 replace_extension,
7a340e0d 27 shell_quote,
324ad820 28 traverse_obj,
6606817a 29 variadic,
496c1923
PH
30)
31
32
a755f825 33EXT_TO_OUT_FORMATS = {
21bfcd3d
PH
34 'aac': 'adts',
35 'flac': 'flac',
36 'm4a': 'ipod',
37 'mka': 'matroska',
38 'mkv': 'matroska',
39 'mpg': 'mpeg',
40 'ogv': 'ogg',
41 'ts': 'mpegts',
42 'wma': 'asf',
43 'wmv': 'asf',
44}
45ACODECS = {
46 'mp3': 'libmp3lame',
47 'aac': 'aac',
48 'flac': 'flac',
49 'm4a': 'aac',
d2ae7e24 50 'opus': 'libopus',
21bfcd3d
PH
51 'vorbis': 'libvorbis',
52 'wav': None,
a755f825 53}
54
55
496c1923
PH
56class FFmpegPostProcessorError(PostProcessingError):
57 pass
58
d799b47b 59
496c1923 60class FFmpegPostProcessor(PostProcessor):
d47aeb22 61 def __init__(self, downloader=None):
496c1923 62 PostProcessor.__init__(self, downloader)
73fac4e9 63 self._determine_executables()
496c1923 64
48844745 65 def check_version(self):
f740fae2 66 if not self.available:
beb4b92a 67 raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
48844745 68
65bf37ef 69 required_version = '10-0' if self.basename == 'avconv' else '1.0'
48844745 70 if is_outdated_version(
73fac4e9 71 self._versions[self.basename], required_version):
3aa578ca 72 warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
73fac4e9 73 self.basename, self.basename, required_version)
f446cc66 74 self.report_warning(warning)
48844745 75
496c1923 76 @staticmethod
73fac4e9
PH
77 def get_versions(downloader=None):
78 return FFmpegPostProcessor(downloader)._versions
6271f1ca 79
73fac4e9
PH
80 def _determine_executables(self):
81 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
d4a24f40 82 prefer_ffmpeg = True
73fac4e9 83
a64646e4
RA
84 def get_ffmpeg_version(path):
85 ver = get_exe_version(path, args=['-version'])
86 if ver:
87 regexs = [
cbdc688c 88 r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
5caa531a 89 r'n([0-9.]+)$', # Arch Linux
cbdc688c 90 # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
a64646e4
RA
91 ]
92 for regex in regexs:
93 mobj = re.match(regex, ver)
94 if mobj:
95 ver = mobj.group(1)
96 return ver
97
73fac4e9
PH
98 self.basename = None
99 self.probe_basename = None
100
101 self._paths = None
102 self._versions = None
103 if self._downloader:
f446cc66 104 prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
105 location = self.get_param('ffmpeg_location')
73fac4e9
PH
106 if location is not None:
107 if not os.path.exists(location):
f446cc66 108 self.report_warning(
73fac4e9 109 'ffmpeg-location %s does not exist! '
e4172ac9 110 'Continuing without ffmpeg.' % (location))
73fac4e9
PH
111 self._versions = {}
112 return
8c0ae192 113 elif os.path.isdir(location):
114 dirname, basename = location, None
115 else:
73fac4e9 116 basename = os.path.splitext(os.path.basename(location))[0]
89efdc15 117 basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
118 dirname = os.path.dirname(os.path.abspath(location))
73fac4e9
PH
119 if basename in ('ffmpeg', 'ffprobe'):
120 prefer_ffmpeg = True
121
122 self._paths = dict(
89efdc15 123 (p, os.path.join(dirname, p)) for p in programs)
8c0ae192 124 if basename:
125 self._paths[basename] = location
73fac4e9 126 self._versions = dict(
a64646e4 127 (p, get_ffmpeg_version(self._paths[p])) for p in programs)
73fac4e9
PH
128 if self._versions is None:
129 self._versions = dict(
a64646e4 130 (p, get_ffmpeg_version(p)) for p in programs)
73fac4e9
PH
131 self._paths = dict((p, p) for p in programs)
132
d4a24f40 133 if prefer_ffmpeg is False:
d28b5171 134 prefs = ('avconv', 'ffmpeg')
d4a24f40
S
135 else:
136 prefs = ('ffmpeg', 'avconv')
d28b5171
PH
137 for p in prefs:
138 if self._versions[p]:
73fac4e9
PH
139 self.basename = p
140 break
76b1bd67 141
d4a24f40 142 if prefer_ffmpeg is False:
1a253e13 143 prefs = ('avprobe', 'ffprobe')
d4a24f40
S
144 else:
145 prefs = ('ffprobe', 'avprobe')
1a253e13
PH
146 for p in prefs:
147 if self._versions[p]:
73fac4e9
PH
148 self.probe_basename = p
149 break
150
f740fae2 151 @property
73fac4e9
PH
152 def available(self):
153 return self.basename is not None
1a253e13 154
73fac4e9
PH
155 @property
156 def executable(self):
157 return self._paths[self.basename]
158
3da4b313
JMF
159 @property
160 def probe_available(self):
161 return self.probe_basename is not None
162
73fac4e9
PH
163 @property
164 def probe_executable(self):
165 return self._paths[self.probe_basename]
76b1bd67 166
30d9e209 167 def get_audio_codec(self, path):
eb35b163 168 if not self.probe_available and not self.available:
beb4b92a 169 raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
30d9e209 170 try:
eb35b163
RA
171 if self.probe_available:
172 cmd = [
173 encodeFilename(self.probe_executable, True),
174 encodeArgument('-show_streams')]
175 else:
176 cmd = [
177 encodeFilename(self.executable, True),
178 encodeArgument('-i')]
179 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
f446cc66 180 self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
eb35b163
RA
181 handle = subprocess.Popen(
182 cmd, stderr=subprocess.PIPE,
183 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
f5b1bca9 184 stdout_data, stderr_data = process_communicate_or_kill(handle)
eb35b163
RA
185 expected_ret = 0 if self.probe_available else 1
186 if handle.wait() != expected_ret:
30d9e209
RA
187 return None
188 except (IOError, OSError):
189 return None
eb35b163
RA
190 output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
191 if self.probe_available:
192 audio_codec = None
193 for line in output.split('\n'):
194 if line.startswith('codec_name='):
195 audio_codec = line.split('=')[1].strip()
196 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
197 return audio_codec
198 else:
199 # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
200 mobj = re.search(
201 r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
202 output)
203 if mobj:
204 return mobj.group(1)
30d9e209
RA
205 return None
206
06167fbb 207 def get_metadata_object(self, path, opts=[]):
208 if self.probe_basename != 'ffprobe':
209 if self.probe_available:
210 self.report_warning('Only ffprobe is supported for metadata extraction')
beb4b92a 211 raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
06167fbb 212 self.check_version()
213
214 cmd = [
215 encodeFilename(self.probe_executable, True),
216 encodeArgument('-hide_banner'),
217 encodeArgument('-show_format'),
218 encodeArgument('-show_streams'),
219 encodeArgument('-print_format'),
220 encodeArgument('json'),
221 ]
222
223 cmd += opts
224 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
06869367 225 self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
06167fbb 226 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
227 stdout, stderr = p.communicate()
228 return json.loads(stdout.decode('utf-8', 'replace'))
229
230 def get_stream_number(self, path, keys, value):
231 streams = self.get_metadata_object(path)['streams']
232 num = next(
324ad820 233 (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value),
06167fbb 234 None)
235 return num, len(streams)
236
165efb82 237 def _get_real_video_duration(self, info, fatal=True):
238 try:
239 if '_real_duration' not in info:
240 info['_real_duration'] = float_or_none(
241 traverse_obj(self.get_metadata_object(info['filepath']), ('format', 'duration')))
242 if not info['_real_duration']:
243 raise PostProcessingError('ffprobe returned empty duration')
244 except PostProcessingError as e:
245 if fatal:
246 raise PostProcessingError(f'Unable to determine video duration; {e}')
247 return info.setdefault('_real_duration', None)
248
249 def _duration_mismatch(self, d1, d2):
250 if not d1 or not d2:
251 return None
252 return abs(d1 - d2) > 1
253
00034c14 254 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
e92caff5 255 return self.real_run_ffmpeg(
256 [(path, []) for path in input_paths],
00034c14 257 [(out_path, opts)], **kwargs)
e92caff5 258
00034c14 259 def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
48844745 260 self.check_version()
496c1923 261
52afb2ac 262 oldest_mtime = min(
7dde84f3 263 os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
43bc8890 264
ce52c7c1
S
265 cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
266 # avconv does not have repeat option
267 if self.basename == 'ffmpeg':
268 cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
5b1ecbb3 269
e92caff5 270 def make_args(file, args, name, number):
271 keys = ['_%s%d' % (name, number), '_%s' % name]
272 if name == 'o' and number == 1:
273 keys.append('')
274 args += self._configuration_args(self.basename, keys)
275 if name == 'i':
276 args.append('-i')
5b1ecbb3 277 return (
e92caff5 278 [encodeArgument(arg) for arg in args]
5b1ecbb3 279 + [encodeFilename(self._ffmpeg_filename_argument(file), True)])
280
e92caff5 281 for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
7dde84f3 282 cmd += itertools.chain.from_iterable(
283 make_args(path, list(opts), arg_type, i + 1)
284 for i, (path, opts) in enumerate(path_opts) if path)
496c1923 285
f446cc66 286 self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
cffcbc02 287 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
f5b1bca9 288 stdout, stderr = process_communicate_or_kill(p)
00034c14 289 if p.returncode not in variadic(expected_retcodes):
06167fbb 290 stderr = stderr.decode('utf-8', 'replace').strip()
06869367 291 if self.get_param('verbose', False):
06167fbb 292 self.report_error(stderr)
293 raise FFmpegPostProcessorError(stderr.split('\n')[-1])
e92caff5 294 for out_path, _ in output_path_opts:
7dde84f3 295 if out_path:
296 self.try_utime(out_path, oldest_mtime, oldest_mtime)
06167fbb 297 return stderr.decode('utf-8', 'replace')
cc55d088 298
00034c14 299 def run_ffmpeg(self, path, out_path, opts, **kwargs):
300 return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
496c1923 301
7a340e0d
NA
302 @staticmethod
303 def _ffmpeg_filename_argument(fn):
8a7bbd16
JMF
304 # Always use 'file:' because the filename may contain ':' (ffmpeg
305 # interprets that as a protocol) or can start with '-' (-- is broken in
306 # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
b9f2fdd3 307 # Also leave '-' intact in order not to break streaming to stdout.
06167fbb 308 if fn.startswith(('http://', 'https://')):
309 return fn
d868f43c 310 return 'file:' + fn if fn != '-' else fn
496c1923 311
7a340e0d
NA
312 @staticmethod
313 def _quote_for_ffmpeg(string):
314 # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping
315 # A sequence of '' produces '\'''\'';
316 # final replace removes the empty '' between \' \'.
317 string = string.replace("'", r"'\''").replace("'''", "'")
318 # Handle potential ' at string boundaries.
319 string = string[1:] if string[0] == "'" else "'" + string
320 return string[:-1] if string[-1] == "'" else string + "'"
321
322 def force_keyframes(self, filename, timestamps):
323 timestamps = orderedSet(timestamps)
324 if timestamps[0] == 0:
325 timestamps = timestamps[1:]
326 keyframe_file = prepend_extension(filename, 'keyframes.temp')
327 self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes')
328 self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join(
329 f'{t:.6f}' for t in timestamps)])
330 return keyframe_file
331
332 def concat_files(self, in_files, out_file, concat_opts=None):
333 """
334 Use concat demuxer to concatenate multiple files having identical streams.
335
336 Only inpoint, outpoint, and duration concat options are supported.
337 See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details
338 """
339 concat_file = f'{out_file}.concat'
340 self.write_debug(f'Writing concat spec to {concat_file}')
341 with open(concat_file, 'wt', encoding='utf-8') as f:
342 f.writelines(self._concat_spec(in_files, concat_opts))
343
344 out_flags = ['-c', 'copy']
345 if out_file.rpartition('.')[-1] in ('mp4', 'mov'):
346 # For some reason, '-c copy' is not enough to copy subtitles
347 out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart'])
348
349 try:
350 self.real_run_ffmpeg(
351 [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
352 [(out_file, out_flags)])
353 finally:
354 os.remove(concat_file)
355
356 @classmethod
357 def _concat_spec(cls, in_files, concat_opts=None):
358 if concat_opts is None:
359 concat_opts = [{}] * len(in_files)
360 yield 'ffconcat version 1.0\n'
361 for file, opts in zip(in_files, concat_opts):
362 yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n'
363 # Iterate explicitly to yield the following directives in order, ignoring the rest.
364 for directive in 'inpoint', 'outpoint', 'duration':
365 if directive in opts:
366 yield f'{directive} {opts[directive]}\n'
367
496c1923
PH
368
369class FFmpegExtractAudioPP(FFmpegPostProcessor):
81a23040 370 COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
371 SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav')
1de75fa1 372
496c1923
PH
373 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
374 FFmpegPostProcessor.__init__(self, downloader)
81a23040 375 self._preferredcodec = preferredcodec or 'best'
496c1923
PH
376 self._preferredquality = preferredquality
377 self._nopostoverwrites = nopostoverwrites
378
496c1923 379 def run_ffmpeg(self, path, out_path, codec, more_opts):
496c1923
PH
380 if codec is None:
381 acodec_opts = []
382 else:
383 acodec_opts = ['-acodec', codec]
384 opts = ['-vn'] + acodec_opts + more_opts
385 try:
386 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
387 except FFmpegPostProcessorError as err:
388 raise AudioConversionError(err.msg)
389
8326b00a 390 @PostProcessor._restrict_to(images=False)
496c1923
PH
391 def run(self, information):
392 path = information['filepath']
1de75fa1 393 orig_ext = information['ext']
394
81a23040 395 if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
1de75fa1 396 self.to_screen('Skipping audio extraction since the file is already in a common audio format')
55b53b33 397 return [], information
496c1923
PH
398
399 filecodec = self.get_audio_codec(path)
400 if filecodec is None:
3aa578ca 401 raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
496c1923
PH
402
403 more_opts = []
404 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
405 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
406 # Lossless, but in another container
407 acodec = 'copy'
408 extension = 'm4a'
467d3c9a 409 more_opts = ['-bsf:a', 'aac_adtstoasc']
21bfcd3d 410 elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
496c1923
PH
411 # Lossless if possible
412 acodec = 'copy'
413 extension = filecodec
414 if filecodec == 'aac':
415 more_opts = ['-f', 'adts']
416 if filecodec == 'vorbis':
417 extension = 'ogg'
418 else:
419 # MP3 otherwise.
420 acodec = 'libmp3lame'
421 extension = 'mp3'
422 more_opts = []
423 if self._preferredquality is not None:
424 if int(self._preferredquality) < 10:
467d3c9a 425 more_opts += ['-q:a', self._preferredquality]
496c1923 426 else:
467d3c9a 427 more_opts += ['-b:a', self._preferredquality + 'k']
496c1923 428 else:
21bfcd3d
PH
429 # We convert the audio (lossy if codec is lossy)
430 acodec = ACODECS[self._preferredcodec]
496c1923
PH
431 extension = self._preferredcodec
432 more_opts = []
433 if self._preferredquality is not None:
434 # The opus codec doesn't support the -aq option
435 if int(self._preferredquality) < 10 and extension != 'opus':
467d3c9a 436 more_opts += ['-q:a', self._preferredquality]
496c1923 437 else:
467d3c9a 438 more_opts += ['-b:a', self._preferredquality + 'k']
496c1923
PH
439 if self._preferredcodec == 'aac':
440 more_opts += ['-f', 'adts']
441 if self._preferredcodec == 'm4a':
467d3c9a 442 more_opts += ['-bsf:a', 'aac_adtstoasc']
496c1923
PH
443 if self._preferredcodec == 'vorbis':
444 extension = 'ogg'
445 if self._preferredcodec == 'wav':
446 extension = 'wav'
447 more_opts += ['-f', 'wav']
448
3aa578ca 449 prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
496c1923 450 new_path = prefix + sep + extension
0b94dbb1 451
2273e2c5
PM
452 information['filepath'] = new_path
453 information['ext'] = extension
496c1923
PH
454
455 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
3089bc74
S
456 if (new_path == path
457 or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
1b77b347 458 self.to_screen('Post-process file %s exists, skipping' % new_path)
592e97e8 459 return [], information
496c1923
PH
460
461 try:
1b77b347 462 self.to_screen('Destination: ' + new_path)
ce81b141 463 self.run_ffmpeg(path, new_path, acodec, more_opts)
70a1165b
JMF
464 except AudioConversionError as e:
465 raise PostProcessingError(
466 'audio conversion failed: ' + e.msg)
467 except Exception:
468 raise PostProcessingError('error running ' + self.basename)
496c1923
PH
469
470 # Try to update the date time for extracted audio file.
471 if information.get('filetime') is not None:
dd29eb7f
S
472 self.try_utime(
473 new_path, time.time(), information['filetime'],
474 errnote='Cannot update utime of audio file')
496c1923 475
592e97e8 476 return [path], information
496c1923
PH
477
478
857f6313 479class FFmpegVideoConvertorPP(FFmpegPostProcessor):
81a23040 480 SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
481 FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
857f6313 482 _action = 'converting'
483
efe87a10 484 def __init__(self, downloader=None, preferedformat=None):
857f6313 485 super(FFmpegVideoConvertorPP, self).__init__(downloader)
06167fbb 486 self._preferedformats = preferedformat.lower().split('/')
efe87a10 487
857f6313 488 def _target_ext(self, source_ext):
06167fbb 489 for pair in self._preferedformats:
490 kv = pair.split('>')
857f6313 491 if len(kv) == 1 or kv[0].strip() == source_ext:
492 return kv[-1].strip()
06167fbb 493
857f6313 494 @staticmethod
495 def _options(target_ext):
496 if target_ext == 'avi':
497 return ['-c:v', 'libxvid', '-vtag', 'XVID']
498 return []
499
8326b00a 500 @PostProcessor._restrict_to(images=False)
857f6313 501 def run(self, information):
81a23040 502 path, source_ext = information['filepath'], information['ext'].lower()
503 target_ext = self._target_ext(source_ext)
06167fbb 504 _skip_msg = (
857f6313 505 'could not find a mapping for %s' if not target_ext
506 else 'already is in target format %s' if source_ext == target_ext
06167fbb 507 else None)
508 if _skip_msg:
81a23040 509 self.to_screen('Not %s media file "%s"; %s' % (self._action, path, _skip_msg % source_ext))
efe87a10 510 return [], information
06167fbb 511
06167fbb 512 prefix, sep, oldext = path.rpartition('.')
857f6313 513 outpath = prefix + sep + target_ext
514 self.to_screen('%s video from %s to %s; Destination: %s' % (self._action.title(), source_ext, target_ext, outpath))
515 self.run_ffmpeg(path, outpath, self._options(target_ext))
516
efe87a10 517 information['filepath'] = outpath
857f6313 518 information['format'] = information['ext'] = target_ext
efe87a10
FS
519 return [path], information
520
521
857f6313 522class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
523 _action = 'remuxing'
496c1923 524
857f6313 525 @staticmethod
526 def _options(target_ext):
527 options = ['-c', 'copy', '-map', '0', '-dn']
528 if target_ext in ['mp4', 'm4a', 'mov']:
529 options.extend(['-movflags', '+faststart'])
530 return options
496c1923
PH
531
532
533class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
cffab0ee 534 def __init__(self, downloader=None, already_have_subtitle=False):
535 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
536 self._already_have_subtitle = already_have_subtitle
537
8326b00a 538 @PostProcessor._restrict_to(images=False)
496c1923 539 def run(self, information):
40025ee2 540 if information['ext'] not in ('mp4', 'webm', 'mkv'):
1b77b347 541 self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
592e97e8 542 return [], information
c84dd8a9
JMF
543 subtitles = information.get('requested_subtitles')
544 if not subtitles:
1b77b347 545 self.to_screen('There aren\'t any subtitles to embed')
592e97e8 546 return [], information
496c1923 547
496c1923 548 filename = information['filepath']
165efb82 549 if self._duration_mismatch(
550 self._get_real_video_duration(information, False), information['duration']):
551 self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch')
552 return [], information
40025ee2
S
553
554 ext = information['ext']
2412044c 555 sub_langs, sub_names, sub_filenames = [], [], []
40025ee2 556 webm_vtt_warn = False
06167fbb 557 mp4_ass_warn = False
40025ee2
S
558
559 for lang, sub_info in subtitles.items():
8e25d624 560 if not os.path.exists(information.get('filepath', '')):
561 self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
562 continue
40025ee2 563 sub_ext = sub_info['ext']
503d4a44 564 if sub_ext == 'json':
06167fbb 565 self.report_warning('JSON subtitles cannot be embedded')
503d4a44 566 elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
40025ee2 567 sub_langs.append(lang)
2412044c 568 sub_names.append(sub_info.get('name'))
dcf64d43 569 sub_filenames.append(sub_info['filepath'])
40025ee2
S
570 else:
571 if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
572 webm_vtt_warn = True
06167fbb 573 self.report_warning('Only WebVTT subtitles can be embedded in webm files')
574 if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
575 mp4_ass_warn = True
576 self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
40025ee2
S
577
578 if not sub_langs:
579 return [], information
580
14523ed9 581 input_files = [filename] + sub_filenames
496c1923 582
e205db3b 583 opts = [
e0da59fe 584 '-c', 'copy', '-map', '0', '-dn',
e205db3b
JMF
585 # Don't copy the existing subtitles, we may be running the
586 # postprocessor a second time
587 '-map', '-0:s',
7f903dd8
T
588 # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
589 # https://trac.ffmpeg.org/ticket/6016)
590 '-map', '-0:d',
e205db3b 591 ]
083c1bb9
N
592 if information['ext'] == 'mp4':
593 opts += ['-c:s', 'mov_text']
2412044c 594 for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
2875cf01 595 opts.extend(['-map', '%d:0' % (i + 1)])
04fb6928
S
596 lang_code = ISO639Utils.short2long(lang) or lang
597 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
2412044c 598 if name:
599 opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name,
600 '-metadata:s:s:%d' % i, 'title=%s' % name])
496c1923 601
2875cf01 602 temp_filename = prepend_extension(filename, 'temp')
06167fbb 603 self.to_screen('Embedding subtitles in "%s"' % filename)
496c1923 604 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
d75201a8 605 os.replace(temp_filename, filename)
496c1923 606
cffab0ee 607 files_to_delete = [] if self._already_have_subtitle else sub_filenames
608 return files_to_delete, information
496c1923
PH
609
610
611class FFmpegMetadataPP(FFmpegPostProcessor):
7dde84f3 612
7a340e0d
NA
613 def __init__(self, downloader, add_metadata=True, add_chapters=True):
614 FFmpegPostProcessor.__init__(self, downloader)
615 self._add_metadata = add_metadata
616 self._add_chapters = add_chapters
617
7dde84f3 618 @staticmethod
619 def _options(target_ext):
620 yield from ('-map', '0', '-dn')
621 if target_ext == 'm4a':
622 yield from ('-vn', '-acodec', 'copy')
623 else:
624 yield from ('-c', 'copy')
625
8326b00a 626 @PostProcessor._restrict_to(images=False)
496c1923 627 def run(self, info):
7a340e0d
NA
628 filename, metadata_filename = info['filepath'], None
629 options = []
630 if self._add_chapters and info.get('chapters'):
631 metadata_filename = replace_extension(filename, 'meta')
632 options.extend(self._get_chapter_opts(info['chapters'], metadata_filename))
633 if self._add_metadata:
634 options.extend(self._get_metadata_opts(info))
635
636 if not options:
637 self.to_screen('There isn\'t any metadata to add')
638 return [], info
639
640 temp_filename = prepend_extension(filename, 'temp')
641 self.to_screen('Adding metadata to "%s"' % filename)
642 self.run_ffmpeg_multiple_files(
643 (filename, metadata_filename), temp_filename,
644 itertools.chain(self._options(info['ext']), *options))
645 if metadata_filename:
646 os.remove(metadata_filename)
647 os.replace(temp_filename, filename)
648 return [], info
649
650 @staticmethod
651 def _get_chapter_opts(chapters, metadata_filename):
652 with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
653 def ffmpeg_escape(text):
654 return re.sub(r'([\\=;#\n])', r'\\\1', text)
655
656 metadata_file_content = ';FFMETADATA1\n'
657 for chapter in chapters:
658 metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
659 metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
660 metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
661 chapter_title = chapter.get('title')
662 if chapter_title:
663 metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
664 f.write(metadata_file_content)
665 yield ('-map_metadata', '1')
666
667 def _get_metadata_opts(self, info):
496c1923 668 metadata = {}
4bd143a3
S
669
670 def add(meta_list, info_list=None):
84601bb7 671 if not meta_list:
672 return
6606817a 673 for info_f in variadic(info_list or meta_list):
84601bb7 674 if isinstance(info.get(info_f), (compat_str, compat_numeric_types)):
6606817a 675 for meta_f in variadic(meta_list):
4bd143a3
S
676 metadata[meta_f] = info[info_f]
677 break
678
2791e80b
S
679 # See [1-4] for some info on media metadata/metadata supported
680 # by ffmpeg.
681 # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
682 # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
683 # 3. https://kodi.wiki/view/Video_file_tagging
2791e80b 684
4bd143a3
S
685 add('title', ('track', 'title'))
686 add('date', 'upload_date')
cd9b384c 687 add(('description', 'synopsis'), 'description')
688 add(('purl', 'comment'), 'webpage_url')
4bd143a3
S
689 add('track', 'track_number')
690 add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
691 add('genre')
692 add('album')
693 add('album_artist')
694 add('disc', 'disc_number')
2791e80b
S
695 add('show', 'series')
696 add('season_number')
697 add('episode_id', ('episode', 'episode_id'))
698 add('episode_sort', 'episode_number')
496c1923 699
84601bb7 700 prefix = 'meta_'
701 for key in filter(lambda k: k.startswith(prefix), info.keys()):
702 add(key[len(prefix):], key)
703
7a340e0d
NA
704 for name, value in metadata.items():
705 yield ('-metadata', f'{name}={value}')
39c68260 706
7dde84f3 707 stream_idx = 0
708 for fmt in info.get('requested_formats') or []:
709 stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
710 if fmt.get('language'):
711 lang = ISO639Utils.short2long(fmt['language']) or fmt['language']
7a340e0d
NA
712 for i in range(stream_count):
713 yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang)
7dde84f3 714 stream_idx += stream_count
496c1923 715
41712218 716 if ('no-attach-info-json' not in self.get_param('compat_opts', [])
717 and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')):
7a340e0d 718 old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
06167fbb 719 if old_stream is not None:
7a340e0d 720 yield ('-map', '-0:%d' % old_stream)
06167fbb 721 new_stream -= 1
722
7a340e0d
NA
723 yield ('-attach', info['__infojson_filename'],
724 '-metadata:s:%d' % new_stream, 'mimetype=application/json')
496c1923
PH
725
726
727class FFmpegMergerPP(FFmpegPostProcessor):
8326b00a 728 @PostProcessor._restrict_to(images=False)
496c1923
PH
729 def run(self, info):
730 filename = info['filepath']
5b5fbc08 731 temp_filename = prepend_extension(filename, 'temp')
d03cfdce 732 args = ['-c', 'copy']
733 for (i, fmt) in enumerate(info['requested_formats']):
734 if fmt.get('acodec') != 'none':
735 args.extend(['-map', '%u:a:0' % (i)])
736 if fmt.get('vcodec') != 'none':
737 args.extend(['-map', '%u:v:0' % (i)])
1b77b347 738 self.to_screen('Merging formats into "%s"' % filename)
5b5fbc08
JMF
739 self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
740 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
d47aeb22 741 return info['__files_to_merge'], info
496c1923 742
13763ce5
S
743 def can_merge(self):
744 # TODO: figure out merge-capable ffmpeg version
745 if self.basename != 'avconv':
746 return True
747
748 required_version = '10-0'
749 if is_outdated_version(
750 self._versions[self.basename], required_version):
751 warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
7a5c1cfe 752 'yt-dlp will download single file media. '
13763ce5
S
753 'Update %s to version %s or newer to fix this.') % (
754 self.basename, self.basename, required_version)
f446cc66 755 self.report_warning(warning)
13763ce5
S
756 return False
757 return True
758
0c14e2fb 759
fd7cfb64 760class FFmpegFixupPostProcessor(FFmpegPostProcessor):
761 def _fixup(self, msg, filename, options):
6271f1ca
PH
762 temp_filename = prepend_extension(filename, 'temp')
763
f89b3e2d 764 self.to_screen(f'{msg} of "{filename}"')
6271f1ca
PH
765 self.run_ffmpeg(filename, temp_filename, options)
766
d75201a8 767 os.replace(temp_filename, filename)
6271f1ca 768
fd7cfb64 769
770class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
771 @PostProcessor._restrict_to(images=False, audio=False)
772 def run(self, info):
773 stretched_ratio = info.get('stretched_ratio')
774 if stretched_ratio not in (None, 1):
775 self._fixup('Fixing aspect ratio', info['filepath'], [
776 '-c', 'copy', '-map', '0', '-dn', '-aspect', '%f' % stretched_ratio])
592e97e8 777 return [], info
62cd676c
PH
778
779
fd7cfb64 780class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
8326b00a 781 @PostProcessor._restrict_to(images=False, video=False)
62cd676c 782 def run(self, info):
fd7cfb64 783 if info.get('container') == 'm4a_dash':
784 self._fixup('Correcting container', info['filepath'], [
785 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4'])
592e97e8 786 return [], info
e9fade72
JMF
787
788
fd7cfb64 789class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
8326b00a 790 @PostProcessor._restrict_to(images=False)
f17f8651 791 def run(self, info):
fd7cfb64 792 if self.get_audio_codec(info['filepath']) == 'aac':
793 self._fixup('Fixing malformed AAC bitstream', info['filepath'], [
794 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
f17f8651 795 return [], info
796
797
e36d50c5 798class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
799
800 def __init__(self, downloader=None, trim=0.001):
801 # "trim" should be used when the video contains unintended packets
802 super(FFmpegFixupTimestampPP, self).__init__(downloader)
803 assert isinstance(trim, (int, float))
804 self.trim = str(trim)
805
806 @PostProcessor._restrict_to(images=False)
807 def run(self, info):
808 required_version = '4.4'
809 if is_outdated_version(self._versions[self.basename], required_version):
810 self.report_warning(
811 'A re-encode is needed to fix timestamps in older versions of ffmpeg. '
812 f'Please install ffmpeg {required_version} or later to fixup without re-encoding')
813 opts = ['-vf', 'setpts=PTS-STARTPTS']
814 else:
815 opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
816 self._fixup('Fixing frame timestamp', info['filepath'], opts + ['-map', '0', '-dn', '-ss', self.trim])
817 return [], info
818
819
820class FFmpegFixupDurationPP(FFmpegFixupPostProcessor):
821 @PostProcessor._restrict_to(images=False)
822 def run(self, info):
823 self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn'])
824 return [], info
825
826
e9fade72 827class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
81a23040 828 SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
829
e9fade72
JMF
830 def __init__(self, downloader=None, format=None):
831 super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
832 self.format = format
833
834 def run(self, info):
835 subs = info.get('requested_subtitles')
e9fade72
JMF
836 new_ext = self.format
837 new_format = new_ext
838 if new_format == 'vtt':
839 new_format = 'webvtt'
840 if subs is None:
1b77b347 841 self.to_screen('There aren\'t any subtitles to convert')
592e97e8 842 return [], info
1b77b347 843 self.to_screen('Converting subtitles')
e04398e3 844 sub_filenames = []
e9fade72
JMF
845 for lang, sub in subs.items():
846 ext = sub['ext']
847 if ext == new_ext:
1b77b347 848 self.to_screen('Subtitle file for %s is already in the requested format' % new_ext)
e9fade72 849 continue
503d4a44 850 elif ext == 'json':
1b77b347 851 self.to_screen(
852 'You have requested to convert json subtitles into another format, '
503d4a44 853 'which is currently not possible')
854 continue
dcf64d43 855 old_file = sub['filepath']
e04398e3 856 sub_filenames.append(old_file)
dcf64d43 857 new_file = replace_extension(old_file, new_ext)
bf6427d2 858
40fcba5e 859 if ext in ('dfxp', 'ttml', 'tt'):
f446cc66 860 self.report_warning(
1b77b347 861 'You have requested to convert dfxp (TTML) subtitles into another format, '
bf6427d2
YCH
862 'which results in style information loss')
863
e04398e3 864 dfxp_file = old_file
dcf64d43 865 srt_file = replace_extension(old_file, 'srt')
bf6427d2 866
3869028f 867 with open(dfxp_file, 'rb') as f:
bf6427d2
YCH
868 srt_data = dfxp2srt(f.read())
869
870 with io.open(srt_file, 'wt', encoding='utf-8') as f:
871 f.write(srt_data)
7e62c2eb 872 old_file = srt_file
bf6427d2 873
bf6427d2
YCH
874 subs[lang] = {
875 'ext': 'srt',
dcf64d43 876 'data': srt_data,
877 'filepath': srt_file,
bf6427d2
YCH
878 }
879
880 if new_ext == 'srt':
881 continue
7b8b007c
JMF
882 else:
883 sub_filenames.append(srt_file)
bf6427d2 884
e04398e3 885 self.run_ffmpeg(old_file, new_file, ['-f', new_format])
e9fade72
JMF
886
887 with io.open(new_file, 'rt', encoding='utf-8') as f:
888 subs[lang] = {
3547d265 889 'ext': new_ext,
e9fade72 890 'data': f.read(),
dcf64d43 891 'filepath': new_file,
e9fade72
JMF
892 }
893
dcf64d43 894 info['__files_to_move'][new_file] = replace_extension(
37242e56 895 info['__files_to_move'][sub['filepath']], new_ext)
dcf64d43 896
e04398e3 897 return sub_filenames, info
72755351 898
899
900class FFmpegSplitChaptersPP(FFmpegPostProcessor):
7a340e0d
NA
901 def __init__(self, downloader, force_keyframes=False):
902 FFmpegPostProcessor.__init__(self, downloader)
903 self._force_keyframes = force_keyframes
72755351 904
905 def _prepare_filename(self, number, chapter, info):
906 info = info.copy()
907 info.update({
908 'section_number': number,
909 'section_title': chapter.get('title'),
910 'section_start': chapter.get('start_time'),
911 'section_end': chapter.get('end_time'),
912 })
913 return self._downloader.prepare_filename(info, 'chapter')
914
915 def _ffmpeg_args_for_chapter(self, number, chapter, info):
916 destination = self._prepare_filename(number, chapter, info)
917 if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
918 return
919
dcf64d43 920 chapter['filepath'] = destination
72755351 921 self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
922 return (
923 destination,
924 ['-ss', compat_str(chapter['start_time']),
a94bfd6c 925 '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
72755351 926
8326b00a 927 @PostProcessor._restrict_to(images=False)
72755351 928 def run(self, info):
929 chapters = info.get('chapters') or []
930 if not chapters:
7a340e0d 931 self.to_screen('Chapter information is unavailable')
72755351 932 return [], info
933
7a340e0d
NA
934 in_file = info['filepath']
935 if self._force_keyframes and len(chapters) > 1:
936 in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters))
72755351 937 self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
938 for idx, chapter in enumerate(chapters):
939 destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
7a340e0d
NA
940 self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])])
941 if in_file != info['filepath']:
942 os.remove(in_file)
72755351 943 return [], info
8fa43c73 944
945
946class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
81a23040 947 SUPPORTED_EXTS = ('jpg', 'png')
948
8fa43c73 949 def __init__(self, downloader=None, format=None):
950 super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
951 self.format = format
952
953 @staticmethod
954 def is_webp(path):
955 with open(encodeFilename(path), 'rb') as f:
956 b = f.read(12)
957 return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
958
959 def fixup_webp(self, info, idx=-1):
960 thumbnail_filename = info['thumbnails'][idx]['filepath']
961 _, thumbnail_ext = os.path.splitext(thumbnail_filename)
962 if thumbnail_ext:
963 thumbnail_ext = thumbnail_ext[1:].lower()
964 if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
965 self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
966 webp_filename = replace_extension(thumbnail_filename, 'webp')
d75201a8 967 os.replace(thumbnail_filename, webp_filename)
8fa43c73 968 info['thumbnails'][idx]['filepath'] = webp_filename
969 info['__files_to_move'][webp_filename] = replace_extension(
970 info['__files_to_move'].pop(thumbnail_filename), 'webp')
971
81a23040 972 @staticmethod
973 def _options(target_ext):
974 if target_ext == 'jpg':
975 return ['-bsf:v', 'mjpeg2jpeg']
976 return []
977
978 def convert_thumbnail(self, thumbnail_filename, target_ext):
81a23040 979 thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
337e0c62 980
981 self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
982 self.real_run_ffmpeg(
983 [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
984 [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
a927acb1 985 return thumbnail_conv_filename
8fa43c73 986
987 def run(self, info):
8fa43c73 988 files_to_delete = []
989 has_thumbnail = False
990
991 for idx, thumbnail_dict in enumerate(info['thumbnails']):
992 if 'filepath' not in thumbnail_dict:
993 continue
994 has_thumbnail = True
995 self.fixup_webp(info, idx)
996 original_thumbnail = thumbnail_dict['filepath']
997 _, thumbnail_ext = os.path.splitext(original_thumbnail)
998 if thumbnail_ext:
999 thumbnail_ext = thumbnail_ext[1:].lower()
15a4fd53 1000 if thumbnail_ext == 'jpeg':
1001 thumbnail_ext = 'jpg'
8fa43c73 1002 if thumbnail_ext == self.format:
1003 self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
1004 continue
1005 thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
1006 files_to_delete.append(original_thumbnail)
1007 info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
1008 info['__files_to_move'][original_thumbnail], self.format)
1009
1010 if not has_thumbnail:
1011 self.to_screen('There aren\'t any thumbnails to convert')
1012 return files_to_delete, info