]> jfr.im git - yt-dlp.git/blame - yt_dlp/postprocessor/ffmpeg.py
[ExtractAudio] Rescale --audio-quality correctly
[yt-dlp.git] / yt_dlp / postprocessor / ffmpeg.py
CommitLineData
3aa578ca
PH
1from __future__ import unicode_literals
2
e9fade72 3import io
7dde84f3 4import itertools
496c1923
PH
5import os
6import subprocess
496c1923 7import time
fa2a36d9 8import re
06167fbb 9import json
496c1923 10
496c1923
PH
11from .common import AudioConversionError, PostProcessor
12
b11d2101 13from ..compat import compat_str
8c25f81b 14from ..utils import (
7a340e0d 15 dfxp2srt,
f07b74fc 16 encodeArgument,
496c1923 17 encodeFilename,
165efb82 18 float_or_none,
95807118 19 get_exe_version,
48844745 20 is_outdated_version,
7a340e0d
NA
21 ISO639Utils,
22 orderedSet,
d3c93ec2 23 Popen,
496c1923
PH
24 PostProcessingError,
25 prepend_extension,
06167fbb 26 replace_extension,
7a340e0d 27 shell_quote,
324ad820 28 traverse_obj,
6606817a 29 variadic,
496c1923
PH
30)
31
32
a755f825 33EXT_TO_OUT_FORMATS = {
21bfcd3d
PH
34 'aac': 'adts',
35 'flac': 'flac',
36 'm4a': 'ipod',
37 'mka': 'matroska',
38 'mkv': 'matroska',
39 'mpg': 'mpeg',
40 'ogv': 'ogg',
41 'ts': 'mpegts',
42 'wma': 'asf',
43 'wmv': 'asf',
abad8000 44 'vtt': 'webvtt',
21bfcd3d
PH
45}
46ACODECS = {
47 'mp3': 'libmp3lame',
48 'aac': 'aac',
49 'flac': 'flac',
50 'm4a': 'aac',
d2ae7e24 51 'opus': 'libopus',
21bfcd3d
PH
52 'vorbis': 'libvorbis',
53 'wav': None,
a755f825 54}
55
56
496c1923
PH
57class FFmpegPostProcessorError(PostProcessingError):
58 pass
59
d799b47b 60
496c1923 61class FFmpegPostProcessor(PostProcessor):
d47aeb22 62 def __init__(self, downloader=None):
496c1923 63 PostProcessor.__init__(self, downloader)
73fac4e9 64 self._determine_executables()
496c1923 65
48844745 66 def check_version(self):
f740fae2 67 if not self.available:
beb4b92a 68 raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
48844745 69
65bf37ef 70 required_version = '10-0' if self.basename == 'avconv' else '1.0'
48844745 71 if is_outdated_version(
73fac4e9 72 self._versions[self.basename], required_version):
3aa578ca 73 warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
73fac4e9 74 self.basename, self.basename, required_version)
f446cc66 75 self.report_warning(warning)
48844745 76
496c1923 77 @staticmethod
73fac4e9
PH
78 def get_versions(downloader=None):
79 return FFmpegPostProcessor(downloader)._versions
6271f1ca 80
73fac4e9
PH
81 def _determine_executables(self):
82 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
d4a24f40 83 prefer_ffmpeg = True
73fac4e9 84
a64646e4
RA
85 def get_ffmpeg_version(path):
86 ver = get_exe_version(path, args=['-version'])
87 if ver:
88 regexs = [
cbdc688c 89 r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
5caa531a 90 r'n([0-9.]+)$', # Arch Linux
cbdc688c 91 # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
a64646e4
RA
92 ]
93 for regex in regexs:
94 mobj = re.match(regex, ver)
95 if mobj:
96 ver = mobj.group(1)
97 return ver
98
73fac4e9
PH
99 self.basename = None
100 self.probe_basename = None
101
102 self._paths = None
103 self._versions = None
104 if self._downloader:
f446cc66 105 prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
106 location = self.get_param('ffmpeg_location')
73fac4e9
PH
107 if location is not None:
108 if not os.path.exists(location):
f446cc66 109 self.report_warning(
73fac4e9 110 'ffmpeg-location %s does not exist! '
e4172ac9 111 'Continuing without ffmpeg.' % (location))
73fac4e9
PH
112 self._versions = {}
113 return
8c0ae192 114 elif os.path.isdir(location):
115 dirname, basename = location, None
116 else:
73fac4e9 117 basename = os.path.splitext(os.path.basename(location))[0]
89efdc15 118 basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
119 dirname = os.path.dirname(os.path.abspath(location))
73fac4e9
PH
120 if basename in ('ffmpeg', 'ffprobe'):
121 prefer_ffmpeg = True
122
123 self._paths = dict(
89efdc15 124 (p, os.path.join(dirname, p)) for p in programs)
8c0ae192 125 if basename:
126 self._paths[basename] = location
73fac4e9 127 self._versions = dict(
a64646e4 128 (p, get_ffmpeg_version(self._paths[p])) for p in programs)
73fac4e9
PH
129 if self._versions is None:
130 self._versions = dict(
a64646e4 131 (p, get_ffmpeg_version(p)) for p in programs)
73fac4e9
PH
132 self._paths = dict((p, p) for p in programs)
133
d4a24f40 134 if prefer_ffmpeg is False:
d28b5171 135 prefs = ('avconv', 'ffmpeg')
d4a24f40
S
136 else:
137 prefs = ('ffmpeg', 'avconv')
d28b5171
PH
138 for p in prefs:
139 if self._versions[p]:
73fac4e9
PH
140 self.basename = p
141 break
76b1bd67 142
d4a24f40 143 if prefer_ffmpeg is False:
1a253e13 144 prefs = ('avprobe', 'ffprobe')
d4a24f40
S
145 else:
146 prefs = ('ffprobe', 'avprobe')
1a253e13
PH
147 for p in prefs:
148 if self._versions[p]:
73fac4e9
PH
149 self.probe_basename = p
150 break
151
f740fae2 152 @property
73fac4e9
PH
153 def available(self):
154 return self.basename is not None
1a253e13 155
73fac4e9
PH
156 @property
157 def executable(self):
158 return self._paths[self.basename]
159
3da4b313
JMF
160 @property
161 def probe_available(self):
162 return self.probe_basename is not None
163
73fac4e9
PH
164 @property
165 def probe_executable(self):
166 return self._paths[self.probe_basename]
76b1bd67 167
30d9e209 168 def get_audio_codec(self, path):
eb35b163 169 if not self.probe_available and not self.available:
beb4b92a 170 raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
30d9e209 171 try:
eb35b163
RA
172 if self.probe_available:
173 cmd = [
174 encodeFilename(self.probe_executable, True),
175 encodeArgument('-show_streams')]
176 else:
177 cmd = [
178 encodeFilename(self.executable, True),
179 encodeArgument('-i')]
180 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
f446cc66 181 self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
d3c93ec2 182 handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
183 stdout_data, stderr_data = handle.communicate_or_kill()
eb35b163
RA
184 expected_ret = 0 if self.probe_available else 1
185 if handle.wait() != expected_ret:
30d9e209
RA
186 return None
187 except (IOError, OSError):
188 return None
eb35b163
RA
189 output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
190 if self.probe_available:
191 audio_codec = None
192 for line in output.split('\n'):
193 if line.startswith('codec_name='):
194 audio_codec = line.split('=')[1].strip()
195 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
196 return audio_codec
197 else:
198 # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
199 mobj = re.search(
200 r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
201 output)
202 if mobj:
203 return mobj.group(1)
30d9e209
RA
204 return None
205
06167fbb 206 def get_metadata_object(self, path, opts=[]):
207 if self.probe_basename != 'ffprobe':
208 if self.probe_available:
209 self.report_warning('Only ffprobe is supported for metadata extraction')
beb4b92a 210 raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
06167fbb 211 self.check_version()
212
213 cmd = [
214 encodeFilename(self.probe_executable, True),
215 encodeArgument('-hide_banner'),
216 encodeArgument('-show_format'),
217 encodeArgument('-show_streams'),
218 encodeArgument('-print_format'),
219 encodeArgument('json'),
220 ]
221
222 cmd += opts
223 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
06869367 224 self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
d3c93ec2 225 p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
06167fbb 226 stdout, stderr = p.communicate()
227 return json.loads(stdout.decode('utf-8', 'replace'))
228
229 def get_stream_number(self, path, keys, value):
230 streams = self.get_metadata_object(path)['streams']
231 num = next(
324ad820 232 (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value),
06167fbb 233 None)
234 return num, len(streams)
235
165efb82 236 def _get_real_video_duration(self, info, fatal=True):
237 try:
238 if '_real_duration' not in info:
239 info['_real_duration'] = float_or_none(
240 traverse_obj(self.get_metadata_object(info['filepath']), ('format', 'duration')))
241 if not info['_real_duration']:
242 raise PostProcessingError('ffprobe returned empty duration')
243 except PostProcessingError as e:
244 if fatal:
245 raise PostProcessingError(f'Unable to determine video duration; {e}')
246 return info.setdefault('_real_duration', None)
247
248 def _duration_mismatch(self, d1, d2):
249 if not d1 or not d2:
250 return None
251 return abs(d1 - d2) > 1
252
00034c14 253 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
e92caff5 254 return self.real_run_ffmpeg(
255 [(path, []) for path in input_paths],
00034c14 256 [(out_path, opts)], **kwargs)
e92caff5 257
00034c14 258 def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
48844745 259 self.check_version()
496c1923 260
52afb2ac 261 oldest_mtime = min(
7dde84f3 262 os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
43bc8890 263
91b6c884 264 cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
ce52c7c1
S
265 # avconv does not have repeat option
266 if self.basename == 'ffmpeg':
267 cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
5b1ecbb3 268
e92caff5 269 def make_args(file, args, name, number):
270 keys = ['_%s%d' % (name, number), '_%s' % name]
271 if name == 'o' and number == 1:
272 keys.append('')
273 args += self._configuration_args(self.basename, keys)
274 if name == 'i':
275 args.append('-i')
5b1ecbb3 276 return (
e92caff5 277 [encodeArgument(arg) for arg in args]
5b1ecbb3 278 + [encodeFilename(self._ffmpeg_filename_argument(file), True)])
279
e92caff5 280 for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
7dde84f3 281 cmd += itertools.chain.from_iterable(
282 make_args(path, list(opts), arg_type, i + 1)
283 for i, (path, opts) in enumerate(path_opts) if path)
496c1923 284
f446cc66 285 self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
d3c93ec2 286 p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
287 stdout, stderr = p.communicate_or_kill()
00034c14 288 if p.returncode not in variadic(expected_retcodes):
06167fbb 289 stderr = stderr.decode('utf-8', 'replace').strip()
b1940459 290 self.write_debug(stderr)
06167fbb 291 raise FFmpegPostProcessorError(stderr.split('\n')[-1])
e92caff5 292 for out_path, _ in output_path_opts:
7dde84f3 293 if out_path:
294 self.try_utime(out_path, oldest_mtime, oldest_mtime)
06167fbb 295 return stderr.decode('utf-8', 'replace')
cc55d088 296
00034c14 297 def run_ffmpeg(self, path, out_path, opts, **kwargs):
298 return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
496c1923 299
7a340e0d
NA
300 @staticmethod
301 def _ffmpeg_filename_argument(fn):
8a7bbd16
JMF
302 # Always use 'file:' because the filename may contain ':' (ffmpeg
303 # interprets that as a protocol) or can start with '-' (-- is broken in
304 # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
b9f2fdd3 305 # Also leave '-' intact in order not to break streaming to stdout.
06167fbb 306 if fn.startswith(('http://', 'https://')):
307 return fn
d868f43c 308 return 'file:' + fn if fn != '-' else fn
496c1923 309
7a340e0d
NA
310 @staticmethod
311 def _quote_for_ffmpeg(string):
312 # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping
313 # A sequence of '' produces '\'''\'';
314 # final replace removes the empty '' between \' \'.
315 string = string.replace("'", r"'\''").replace("'''", "'")
316 # Handle potential ' at string boundaries.
317 string = string[1:] if string[0] == "'" else "'" + string
318 return string[:-1] if string[-1] == "'" else string + "'"
319
320 def force_keyframes(self, filename, timestamps):
321 timestamps = orderedSet(timestamps)
322 if timestamps[0] == 0:
323 timestamps = timestamps[1:]
324 keyframe_file = prepend_extension(filename, 'keyframes.temp')
325 self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes')
326 self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join(
327 f'{t:.6f}' for t in timestamps)])
328 return keyframe_file
329
330 def concat_files(self, in_files, out_file, concat_opts=None):
331 """
332 Use concat demuxer to concatenate multiple files having identical streams.
333
334 Only inpoint, outpoint, and duration concat options are supported.
335 See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details
336 """
337 concat_file = f'{out_file}.concat'
338 self.write_debug(f'Writing concat spec to {concat_file}')
339 with open(concat_file, 'wt', encoding='utf-8') as f:
340 f.writelines(self._concat_spec(in_files, concat_opts))
341
342 out_flags = ['-c', 'copy']
343 if out_file.rpartition('.')[-1] in ('mp4', 'mov'):
344 # For some reason, '-c copy' is not enough to copy subtitles
345 out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart'])
346
347 try:
348 self.real_run_ffmpeg(
349 [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
350 [(out_file, out_flags)])
351 finally:
352 os.remove(concat_file)
353
354 @classmethod
355 def _concat_spec(cls, in_files, concat_opts=None):
356 if concat_opts is None:
357 concat_opts = [{}] * len(in_files)
358 yield 'ffconcat version 1.0\n'
359 for file, opts in zip(in_files, concat_opts):
360 yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n'
361 # Iterate explicitly to yield the following directives in order, ignoring the rest.
362 for directive in 'inpoint', 'outpoint', 'duration':
363 if directive in opts:
364 yield f'{directive} {opts[directive]}\n'
365
496c1923
PH
366
367class FFmpegExtractAudioPP(FFmpegPostProcessor):
81a23040 368 COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
369 SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav')
1de75fa1 370
496c1923
PH
371 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
372 FFmpegPostProcessor.__init__(self, downloader)
81a23040 373 self._preferredcodec = preferredcodec or 'best'
31c49255 374 self._preferredquality = float_or_none(preferredquality)
496c1923
PH
375 self._nopostoverwrites = nopostoverwrites
376
31c49255 377 def _quality_args(self, codec):
378 if self._preferredquality is None:
379 return []
380 elif self._preferredquality > 10:
381 return ['-b:a', f'{self._preferredquality}k']
382
383 limits = {
384 'libmp3lame': (10, 0),
385 'aac': (0.1, 11),
386 'vorbis': (0, 10),
387 'opus': None, # doesn't support -q:a
388 'wav': None,
389 'flac': None,
390 }[codec]
391 if not limits:
392 return []
393
394 q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10)
395 return ['-q:a', f'{q}']
396
496c1923 397 def run_ffmpeg(self, path, out_path, codec, more_opts):
496c1923
PH
398 if codec is None:
399 acodec_opts = []
400 else:
401 acodec_opts = ['-acodec', codec]
402 opts = ['-vn'] + acodec_opts + more_opts
403 try:
404 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
405 except FFmpegPostProcessorError as err:
406 raise AudioConversionError(err.msg)
407
8326b00a 408 @PostProcessor._restrict_to(images=False)
496c1923
PH
409 def run(self, information):
410 path = information['filepath']
1de75fa1 411 orig_ext = information['ext']
412
81a23040 413 if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
1de75fa1 414 self.to_screen('Skipping audio extraction since the file is already in a common audio format')
55b53b33 415 return [], information
496c1923
PH
416
417 filecodec = self.get_audio_codec(path)
418 if filecodec is None:
3aa578ca 419 raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
496c1923
PH
420
421 more_opts = []
422 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
423 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
424 # Lossless, but in another container
425 acodec = 'copy'
426 extension = 'm4a'
467d3c9a 427 more_opts = ['-bsf:a', 'aac_adtstoasc']
21bfcd3d 428 elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
496c1923
PH
429 # Lossless if possible
430 acodec = 'copy'
431 extension = filecodec
432 if filecodec == 'aac':
433 more_opts = ['-f', 'adts']
434 if filecodec == 'vorbis':
435 extension = 'ogg'
436 else:
437 # MP3 otherwise.
438 acodec = 'libmp3lame'
439 extension = 'mp3'
31c49255 440 more_opts = self._quality_args(acodec)
496c1923 441 else:
21bfcd3d
PH
442 # We convert the audio (lossy if codec is lossy)
443 acodec = ACODECS[self._preferredcodec]
496c1923 444 extension = self._preferredcodec
31c49255 445 more_opts = self._quality_args(acodec)
496c1923
PH
446 if self._preferredcodec == 'aac':
447 more_opts += ['-f', 'adts']
448 if self._preferredcodec == 'm4a':
467d3c9a 449 more_opts += ['-bsf:a', 'aac_adtstoasc']
496c1923
PH
450 if self._preferredcodec == 'vorbis':
451 extension = 'ogg'
452 if self._preferredcodec == 'wav':
453 extension = 'wav'
454 more_opts += ['-f', 'wav']
455
3aa578ca 456 prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
496c1923 457 new_path = prefix + sep + extension
0b94dbb1 458
2273e2c5
PM
459 information['filepath'] = new_path
460 information['ext'] = extension
496c1923
PH
461
462 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
3089bc74
S
463 if (new_path == path
464 or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
1b77b347 465 self.to_screen('Post-process file %s exists, skipping' % new_path)
592e97e8 466 return [], information
496c1923
PH
467
468 try:
1b77b347 469 self.to_screen('Destination: ' + new_path)
ce81b141 470 self.run_ffmpeg(path, new_path, acodec, more_opts)
70a1165b
JMF
471 except AudioConversionError as e:
472 raise PostProcessingError(
473 'audio conversion failed: ' + e.msg)
474 except Exception:
475 raise PostProcessingError('error running ' + self.basename)
496c1923
PH
476
477 # Try to update the date time for extracted audio file.
478 if information.get('filetime') is not None:
dd29eb7f
S
479 self.try_utime(
480 new_path, time.time(), information['filetime'],
481 errnote='Cannot update utime of audio file')
496c1923 482
592e97e8 483 return [path], information
496c1923
PH
484
485
857f6313 486class FFmpegVideoConvertorPP(FFmpegPostProcessor):
81a23040 487 SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
488 FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
e6f21b3d 489 _ACTION = 'converting'
857f6313 490
efe87a10 491 def __init__(self, downloader=None, preferedformat=None):
857f6313 492 super(FFmpegVideoConvertorPP, self).__init__(downloader)
06167fbb 493 self._preferedformats = preferedformat.lower().split('/')
efe87a10 494
857f6313 495 def _target_ext(self, source_ext):
06167fbb 496 for pair in self._preferedformats:
497 kv = pair.split('>')
857f6313 498 if len(kv) == 1 or kv[0].strip() == source_ext:
499 return kv[-1].strip()
06167fbb 500
857f6313 501 @staticmethod
502 def _options(target_ext):
503 if target_ext == 'avi':
504 return ['-c:v', 'libxvid', '-vtag', 'XVID']
505 return []
506
8326b00a 507 @PostProcessor._restrict_to(images=False)
e6f21b3d 508 def run(self, info):
509 filename, source_ext = info['filepath'], info['ext'].lower()
81a23040 510 target_ext = self._target_ext(source_ext)
06167fbb 511 _skip_msg = (
e6f21b3d 512 f'could not find a mapping for {source_ext}' if not target_ext
513 else f'already is in target format {source_ext}' if source_ext == target_ext
06167fbb 514 else None)
515 if _skip_msg:
e6f21b3d 516 self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}')
517 return [], info
06167fbb 518
e6f21b3d 519 outpath = replace_extension(filename, target_ext, source_ext)
520 self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}')
521 self.run_ffmpeg(filename, outpath, self._options(target_ext))
857f6313 522
e6f21b3d 523 info['filepath'] = outpath
524 info['format'] = info['ext'] = target_ext
525 return [filename], info
efe87a10
FS
526
527
857f6313 528class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
e6f21b3d 529 _ACTION = 'remuxing'
496c1923 530
857f6313 531 @staticmethod
532 def _options(target_ext):
533 options = ['-c', 'copy', '-map', '0', '-dn']
534 if target_ext in ['mp4', 'm4a', 'mov']:
535 options.extend(['-movflags', '+faststart'])
536 return options
496c1923
PH
537
538
539class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
cffab0ee 540 def __init__(self, downloader=None, already_have_subtitle=False):
541 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
542 self._already_have_subtitle = already_have_subtitle
543
8326b00a 544 @PostProcessor._restrict_to(images=False)
496c1923 545 def run(self, information):
40025ee2 546 if information['ext'] not in ('mp4', 'webm', 'mkv'):
1b77b347 547 self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
592e97e8 548 return [], information
c84dd8a9
JMF
549 subtitles = information.get('requested_subtitles')
550 if not subtitles:
1b77b347 551 self.to_screen('There aren\'t any subtitles to embed')
592e97e8 552 return [], information
496c1923 553
496c1923 554 filename = information['filepath']
1f2a268b 555 if information.get('duration') and self._duration_mismatch(
165efb82 556 self._get_real_video_duration(information, False), information['duration']):
557 self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch')
558 return [], information
40025ee2
S
559
560 ext = information['ext']
2412044c 561 sub_langs, sub_names, sub_filenames = [], [], []
40025ee2 562 webm_vtt_warn = False
06167fbb 563 mp4_ass_warn = False
40025ee2
S
564
565 for lang, sub_info in subtitles.items():
a1c39673 566 if not os.path.exists(sub_info.get('filepath', '')):
8e25d624 567 self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
568 continue
40025ee2 569 sub_ext = sub_info['ext']
503d4a44 570 if sub_ext == 'json':
06167fbb 571 self.report_warning('JSON subtitles cannot be embedded')
503d4a44 572 elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
40025ee2 573 sub_langs.append(lang)
2412044c 574 sub_names.append(sub_info.get('name'))
dcf64d43 575 sub_filenames.append(sub_info['filepath'])
40025ee2
S
576 else:
577 if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
578 webm_vtt_warn = True
06167fbb 579 self.report_warning('Only WebVTT subtitles can be embedded in webm files')
580 if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
581 mp4_ass_warn = True
582 self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
40025ee2
S
583
584 if not sub_langs:
585 return [], information
586
14523ed9 587 input_files = [filename] + sub_filenames
496c1923 588
e205db3b 589 opts = [
e0da59fe 590 '-c', 'copy', '-map', '0', '-dn',
e205db3b
JMF
591 # Don't copy the existing subtitles, we may be running the
592 # postprocessor a second time
593 '-map', '-0:s',
7f903dd8
T
594 # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
595 # https://trac.ffmpeg.org/ticket/6016)
596 '-map', '-0:d',
e205db3b 597 ]
083c1bb9
N
598 if information['ext'] == 'mp4':
599 opts += ['-c:s', 'mov_text']
2412044c 600 for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
2875cf01 601 opts.extend(['-map', '%d:0' % (i + 1)])
04fb6928
S
602 lang_code = ISO639Utils.short2long(lang) or lang
603 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
2412044c 604 if name:
605 opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name,
606 '-metadata:s:s:%d' % i, 'title=%s' % name])
496c1923 607
2875cf01 608 temp_filename = prepend_extension(filename, 'temp')
06167fbb 609 self.to_screen('Embedding subtitles in "%s"' % filename)
496c1923 610 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
d75201a8 611 os.replace(temp_filename, filename)
496c1923 612
cffab0ee 613 files_to_delete = [] if self._already_have_subtitle else sub_filenames
614 return files_to_delete, information
496c1923
PH
615
616
617class FFmpegMetadataPP(FFmpegPostProcessor):
7dde84f3 618
7a340e0d
NA
619 def __init__(self, downloader, add_metadata=True, add_chapters=True):
620 FFmpegPostProcessor.__init__(self, downloader)
621 self._add_metadata = add_metadata
622 self._add_chapters = add_chapters
623
7dde84f3 624 @staticmethod
625 def _options(target_ext):
626 yield from ('-map', '0', '-dn')
627 if target_ext == 'm4a':
628 yield from ('-vn', '-acodec', 'copy')
629 else:
630 yield from ('-c', 'copy')
631
8326b00a 632 @PostProcessor._restrict_to(images=False)
496c1923 633 def run(self, info):
7a340e0d
NA
634 filename, metadata_filename = info['filepath'], None
635 options = []
636 if self._add_chapters and info.get('chapters'):
637 metadata_filename = replace_extension(filename, 'meta')
638 options.extend(self._get_chapter_opts(info['chapters'], metadata_filename))
639 if self._add_metadata:
640 options.extend(self._get_metadata_opts(info))
641
642 if not options:
643 self.to_screen('There isn\'t any metadata to add')
644 return [], info
645
646 temp_filename = prepend_extension(filename, 'temp')
647 self.to_screen('Adding metadata to "%s"' % filename)
648 self.run_ffmpeg_multiple_files(
649 (filename, metadata_filename), temp_filename,
650 itertools.chain(self._options(info['ext']), *options))
651 if metadata_filename:
652 os.remove(metadata_filename)
653 os.replace(temp_filename, filename)
654 return [], info
655
656 @staticmethod
657 def _get_chapter_opts(chapters, metadata_filename):
658 with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
659 def ffmpeg_escape(text):
660 return re.sub(r'([\\=;#\n])', r'\\\1', text)
661
662 metadata_file_content = ';FFMETADATA1\n'
663 for chapter in chapters:
664 metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
665 metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
666 metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
667 chapter_title = chapter.get('title')
668 if chapter_title:
669 metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
670 f.write(metadata_file_content)
671 yield ('-map_metadata', '1')
672
673 def _get_metadata_opts(self, info):
496c1923 674 metadata = {}
b11d2101 675 meta_prefix = 'meta_'
4bd143a3
S
676
677 def add(meta_list, info_list=None):
b11d2101 678 value = next((
679 str(info[key]) for key in [meta_prefix] + list(variadic(info_list or meta_list))
680 if info.get(key) is not None), None)
681 if value not in ('', None):
682 metadata.update({meta_f: value for meta_f in variadic(meta_list)})
4bd143a3 683
2791e80b
S
684 # See [1-4] for some info on media metadata/metadata supported
685 # by ffmpeg.
686 # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
687 # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
688 # 3. https://kodi.wiki/view/Video_file_tagging
2791e80b 689
4bd143a3
S
690 add('title', ('track', 'title'))
691 add('date', 'upload_date')
cd9b384c 692 add(('description', 'synopsis'), 'description')
693 add(('purl', 'comment'), 'webpage_url')
4bd143a3
S
694 add('track', 'track_number')
695 add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
696 add('genre')
697 add('album')
698 add('album_artist')
699 add('disc', 'disc_number')
2791e80b
S
700 add('show', 'series')
701 add('season_number')
702 add('episode_id', ('episode', 'episode_id'))
703 add('episode_sort', 'episode_number')
496c1923 704
b11d2101 705 for key, value in info.items():
706 if value is not None and key != meta_prefix and key.startswith(meta_prefix):
707 metadata[key[len(meta_prefix):]] = value
84601bb7 708
7a340e0d
NA
709 for name, value in metadata.items():
710 yield ('-metadata', f'{name}={value}')
39c68260 711
7dde84f3 712 stream_idx = 0
713 for fmt in info.get('requested_formats') or []:
714 stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
715 if fmt.get('language'):
716 lang = ISO639Utils.short2long(fmt['language']) or fmt['language']
7a340e0d
NA
717 for i in range(stream_count):
718 yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang)
7dde84f3 719 stream_idx += stream_count
496c1923 720
41712218 721 if ('no-attach-info-json' not in self.get_param('compat_opts', [])
722 and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')):
7a340e0d 723 old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
06167fbb 724 if old_stream is not None:
7a340e0d 725 yield ('-map', '-0:%d' % old_stream)
06167fbb 726 new_stream -= 1
727
7a340e0d
NA
728 yield ('-attach', info['__infojson_filename'],
729 '-metadata:s:%d' % new_stream, 'mimetype=application/json')
496c1923
PH
730
731
732class FFmpegMergerPP(FFmpegPostProcessor):
8326b00a 733 @PostProcessor._restrict_to(images=False)
496c1923
PH
734 def run(self, info):
735 filename = info['filepath']
5b5fbc08 736 temp_filename = prepend_extension(filename, 'temp')
d03cfdce 737 args = ['-c', 'copy']
50eff38c 738 audio_streams = 0
d03cfdce 739 for (i, fmt) in enumerate(info['requested_formats']):
740 if fmt.get('acodec') != 'none':
a21e0ab1 741 args.extend(['-map', f'{i}:a:0'])
9dda99f2 742 aac_fixup = fmt['protocol'].startswith('m3u8') and self.get_audio_codec(fmt['filepath']) == 'aac'
743 if aac_fixup:
50eff38c 744 args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc'])
745 audio_streams += 1
d03cfdce 746 if fmt.get('vcodec') != 'none':
747 args.extend(['-map', '%u:v:0' % (i)])
1b77b347 748 self.to_screen('Merging formats into "%s"' % filename)
5b5fbc08
JMF
749 self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
750 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
d47aeb22 751 return info['__files_to_merge'], info
496c1923 752
13763ce5
S
753 def can_merge(self):
754 # TODO: figure out merge-capable ffmpeg version
755 if self.basename != 'avconv':
756 return True
757
758 required_version = '10-0'
759 if is_outdated_version(
760 self._versions[self.basename], required_version):
761 warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
7a5c1cfe 762 'yt-dlp will download single file media. '
13763ce5
S
763 'Update %s to version %s or newer to fix this.') % (
764 self.basename, self.basename, required_version)
f446cc66 765 self.report_warning(warning)
13763ce5
S
766 return False
767 return True
768
0c14e2fb 769
fd7cfb64 770class FFmpegFixupPostProcessor(FFmpegPostProcessor):
771 def _fixup(self, msg, filename, options):
6271f1ca
PH
772 temp_filename = prepend_extension(filename, 'temp')
773
f89b3e2d 774 self.to_screen(f'{msg} of "{filename}"')
6271f1ca
PH
775 self.run_ffmpeg(filename, temp_filename, options)
776
d75201a8 777 os.replace(temp_filename, filename)
6271f1ca 778
fd7cfb64 779
780class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
781 @PostProcessor._restrict_to(images=False, audio=False)
782 def run(self, info):
783 stretched_ratio = info.get('stretched_ratio')
784 if stretched_ratio not in (None, 1):
785 self._fixup('Fixing aspect ratio', info['filepath'], [
786 '-c', 'copy', '-map', '0', '-dn', '-aspect', '%f' % stretched_ratio])
592e97e8 787 return [], info
62cd676c
PH
788
789
fd7cfb64 790class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
8326b00a 791 @PostProcessor._restrict_to(images=False, video=False)
62cd676c 792 def run(self, info):
fd7cfb64 793 if info.get('container') == 'm4a_dash':
794 self._fixup('Correcting container', info['filepath'], [
795 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4'])
592e97e8 796 return [], info
e9fade72
JMF
797
798
fd7cfb64 799class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
8326b00a 800 @PostProcessor._restrict_to(images=False)
f17f8651 801 def run(self, info):
fd7cfb64 802 if self.get_audio_codec(info['filepath']) == 'aac':
803 self._fixup('Fixing malformed AAC bitstream', info['filepath'], [
804 '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
f17f8651 805 return [], info
806
807
e36d50c5 808class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
809
810 def __init__(self, downloader=None, trim=0.001):
811 # "trim" should be used when the video contains unintended packets
812 super(FFmpegFixupTimestampPP, self).__init__(downloader)
813 assert isinstance(trim, (int, float))
814 self.trim = str(trim)
815
816 @PostProcessor._restrict_to(images=False)
817 def run(self, info):
818 required_version = '4.4'
819 if is_outdated_version(self._versions[self.basename], required_version):
820 self.report_warning(
821 'A re-encode is needed to fix timestamps in older versions of ffmpeg. '
822 f'Please install ffmpeg {required_version} or later to fixup without re-encoding')
823 opts = ['-vf', 'setpts=PTS-STARTPTS']
824 else:
825 opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
826 self._fixup('Fixing frame timestamp', info['filepath'], opts + ['-map', '0', '-dn', '-ss', self.trim])
827 return [], info
828
829
830class FFmpegFixupDurationPP(FFmpegFixupPostProcessor):
831 @PostProcessor._restrict_to(images=False)
832 def run(self, info):
833 self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn'])
834 return [], info
835
836
e9fade72 837class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
81a23040 838 SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
839
e9fade72
JMF
840 def __init__(self, downloader=None, format=None):
841 super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
842 self.format = format
843
844 def run(self, info):
845 subs = info.get('requested_subtitles')
e9fade72
JMF
846 new_ext = self.format
847 new_format = new_ext
848 if new_format == 'vtt':
849 new_format = 'webvtt'
850 if subs is None:
1b77b347 851 self.to_screen('There aren\'t any subtitles to convert')
592e97e8 852 return [], info
1b77b347 853 self.to_screen('Converting subtitles')
e04398e3 854 sub_filenames = []
e9fade72 855 for lang, sub in subs.items():
a1c39673 856 if not os.path.exists(sub.get('filepath', '')):
857 self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
858 continue
e9fade72
JMF
859 ext = sub['ext']
860 if ext == new_ext:
1b77b347 861 self.to_screen('Subtitle file for %s is already in the requested format' % new_ext)
e9fade72 862 continue
503d4a44 863 elif ext == 'json':
1b77b347 864 self.to_screen(
865 'You have requested to convert json subtitles into another format, '
503d4a44 866 'which is currently not possible')
867 continue
dcf64d43 868 old_file = sub['filepath']
e04398e3 869 sub_filenames.append(old_file)
dcf64d43 870 new_file = replace_extension(old_file, new_ext)
bf6427d2 871
40fcba5e 872 if ext in ('dfxp', 'ttml', 'tt'):
f446cc66 873 self.report_warning(
1b77b347 874 'You have requested to convert dfxp (TTML) subtitles into another format, '
bf6427d2
YCH
875 'which results in style information loss')
876
e04398e3 877 dfxp_file = old_file
dcf64d43 878 srt_file = replace_extension(old_file, 'srt')
bf6427d2 879
3869028f 880 with open(dfxp_file, 'rb') as f:
bf6427d2
YCH
881 srt_data = dfxp2srt(f.read())
882
883 with io.open(srt_file, 'wt', encoding='utf-8') as f:
884 f.write(srt_data)
7e62c2eb 885 old_file = srt_file
bf6427d2 886
bf6427d2
YCH
887 subs[lang] = {
888 'ext': 'srt',
dcf64d43 889 'data': srt_data,
890 'filepath': srt_file,
bf6427d2
YCH
891 }
892
893 if new_ext == 'srt':
894 continue
7b8b007c
JMF
895 else:
896 sub_filenames.append(srt_file)
bf6427d2 897
e04398e3 898 self.run_ffmpeg(old_file, new_file, ['-f', new_format])
e9fade72
JMF
899
900 with io.open(new_file, 'rt', encoding='utf-8') as f:
901 subs[lang] = {
3547d265 902 'ext': new_ext,
e9fade72 903 'data': f.read(),
dcf64d43 904 'filepath': new_file,
e9fade72
JMF
905 }
906
dcf64d43 907 info['__files_to_move'][new_file] = replace_extension(
37242e56 908 info['__files_to_move'][sub['filepath']], new_ext)
dcf64d43 909
e04398e3 910 return sub_filenames, info
72755351 911
912
913class FFmpegSplitChaptersPP(FFmpegPostProcessor):
7a340e0d
NA
914 def __init__(self, downloader, force_keyframes=False):
915 FFmpegPostProcessor.__init__(self, downloader)
916 self._force_keyframes = force_keyframes
72755351 917
918 def _prepare_filename(self, number, chapter, info):
919 info = info.copy()
920 info.update({
921 'section_number': number,
922 'section_title': chapter.get('title'),
923 'section_start': chapter.get('start_time'),
924 'section_end': chapter.get('end_time'),
925 })
926 return self._downloader.prepare_filename(info, 'chapter')
927
928 def _ffmpeg_args_for_chapter(self, number, chapter, info):
929 destination = self._prepare_filename(number, chapter, info)
930 if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
931 return
932
dcf64d43 933 chapter['filepath'] = destination
72755351 934 self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
935 return (
936 destination,
937 ['-ss', compat_str(chapter['start_time']),
a94bfd6c 938 '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
72755351 939
8326b00a 940 @PostProcessor._restrict_to(images=False)
72755351 941 def run(self, info):
942 chapters = info.get('chapters') or []
943 if not chapters:
7a340e0d 944 self.to_screen('Chapter information is unavailable')
72755351 945 return [], info
946
7a340e0d
NA
947 in_file = info['filepath']
948 if self._force_keyframes and len(chapters) > 1:
949 in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters))
72755351 950 self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
951 for idx, chapter in enumerate(chapters):
952 destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
7a340e0d
NA
953 self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])])
954 if in_file != info['filepath']:
955 os.remove(in_file)
72755351 956 return [], info
8fa43c73 957
958
959class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
81a23040 960 SUPPORTED_EXTS = ('jpg', 'png')
961
8fa43c73 962 def __init__(self, downloader=None, format=None):
963 super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
964 self.format = format
965
966 @staticmethod
967 def is_webp(path):
968 with open(encodeFilename(path), 'rb') as f:
969 b = f.read(12)
970 return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
971
972 def fixup_webp(self, info, idx=-1):
973 thumbnail_filename = info['thumbnails'][idx]['filepath']
974 _, thumbnail_ext = os.path.splitext(thumbnail_filename)
975 if thumbnail_ext:
976 thumbnail_ext = thumbnail_ext[1:].lower()
977 if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
978 self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
979 webp_filename = replace_extension(thumbnail_filename, 'webp')
d75201a8 980 os.replace(thumbnail_filename, webp_filename)
8fa43c73 981 info['thumbnails'][idx]['filepath'] = webp_filename
982 info['__files_to_move'][webp_filename] = replace_extension(
983 info['__files_to_move'].pop(thumbnail_filename), 'webp')
984
81a23040 985 @staticmethod
986 def _options(target_ext):
987 if target_ext == 'jpg':
988 return ['-bsf:v', 'mjpeg2jpeg']
989 return []
990
991 def convert_thumbnail(self, thumbnail_filename, target_ext):
81a23040 992 thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
337e0c62 993
994 self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
995 self.real_run_ffmpeg(
996 [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
997 [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
a927acb1 998 return thumbnail_conv_filename
8fa43c73 999
1000 def run(self, info):
8fa43c73 1001 files_to_delete = []
1002 has_thumbnail = False
1003
1004 for idx, thumbnail_dict in enumerate(info['thumbnails']):
1005 if 'filepath' not in thumbnail_dict:
1006 continue
1007 has_thumbnail = True
1008 self.fixup_webp(info, idx)
1009 original_thumbnail = thumbnail_dict['filepath']
1010 _, thumbnail_ext = os.path.splitext(original_thumbnail)
1011 if thumbnail_ext:
1012 thumbnail_ext = thumbnail_ext[1:].lower()
15a4fd53 1013 if thumbnail_ext == 'jpeg':
1014 thumbnail_ext = 'jpg'
8fa43c73 1015 if thumbnail_ext == self.format:
1016 self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
1017 continue
1018 thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
1019 files_to_delete.append(original_thumbnail)
1020 info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
1021 info['__files_to_move'][original_thumbnail], self.format)
1022
1023 if not has_thumbnail:
1024 self.to_screen('There aren\'t any thumbnails to convert')
1025 return files_to_delete, info