]> jfr.im git - yt-dlp.git/blame - youtube_dl/PostProcessor.py
Simplify tests
[yt-dlp.git] / youtube_dl / PostProcessor.py
CommitLineData
d77c3dfd
FV
1import os
2import subprocess
3import sys
4import time
bc4f2917 5import datetime
d77c3dfd 6
9e8056d5 7from .utils import *
d77c3dfd
FV
8
9
10class PostProcessor(object):
59ae15a5 11 """Post Processor class.
d77c3dfd 12
59ae15a5
PH
13 PostProcessor objects can be added to downloaders with their
14 add_post_processor() method. When the downloader has finished a
15 successful download, it will take its internal chain of PostProcessors
16 and start calling the run() method on each one of them, first with
17 an initial argument and then with the returned value of the previous
18 PostProcessor.
d77c3dfd 19
59ae15a5
PH
20 The chain will be stopped if one of them ever returns None or the end
21 of the chain is reached.
d77c3dfd 22
59ae15a5
PH
23 PostProcessor objects follow a "mutual registration" process similar
24 to InfoExtractor objects.
25 """
d77c3dfd 26
59ae15a5 27 _downloader = None
d77c3dfd 28
59ae15a5
PH
29 def __init__(self, downloader=None):
30 self._downloader = downloader
d77c3dfd 31
59ae15a5
PH
32 def set_downloader(self, downloader):
33 """Sets the downloader for this PP."""
34 self._downloader = downloader
d77c3dfd 35
59ae15a5
PH
36 def run(self, information):
37 """Run the PostProcessor.
d77c3dfd 38
59ae15a5
PH
39 The "information" argument is a dictionary like the ones
40 composed by InfoExtractors. The only difference is that this
41 one has an extra field called "filepath" that points to the
42 downloaded file.
d77c3dfd 43
7851b379
PH
44 This method returns a tuple, the first element of which describes
45 whether the original file should be kept (i.e. not deleted - None for
46 no preference), and the second of which is the updated information.
d77c3dfd 47
59ae15a5 48 In addition, this method may raise a PostProcessingError
7851b379 49 exception if post processing fails.
59ae15a5 50 """
7851b379 51 return None, information # by default, keep file and do nothing
d77c3dfd 52
7851b379
PH
53class FFmpegPostProcessorError(PostProcessingError):
54 pass
67d0c25e 55
7851b379
PH
56class AudioConversionError(PostProcessingError):
57 pass
d77c3dfd 58
67d0c25e
JMF
59class FFmpegPostProcessor(PostProcessor):
60 def __init__(self,downloader=None):
59ae15a5 61 PostProcessor.__init__(self, downloader)
59ae15a5
PH
62 self._exes = self.detect_executables()
63
64 @staticmethod
65 def detect_executables():
66 def executable(exe):
67 try:
68 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
69 except OSError:
70 return False
71 return exe
72 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
73 return dict((program, executable(program)) for program in programs)
74
d4051a8e 75 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
67d0c25e 76 if not self._exes['ffmpeg'] and not self._exes['avconv']:
7851b379 77 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
d4051a8e
JMF
78
79 files_cmd = []
80 for path in input_paths:
81 files_cmd.extend(['-i', encodeFilename(path)])
82 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
67d0c25e
JMF
83 + opts +
84 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
d4051a8e 85
4eb7f1d1
JMF
86 if self._downloader.params.get('verbose', False):
87 self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
67d0c25e
JMF
88 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
89 stdout,stderr = p.communicate()
90 if p.returncode != 0:
fb2f8336 91 stderr = stderr.decode('utf-8', 'replace')
67d0c25e 92 msg = stderr.strip().split('\n')[-1]
fb2f8336 93 raise FFmpegPostProcessorError(msg)
67d0c25e 94
d4051a8e
JMF
95 def run_ffmpeg(self, path, out_path, opts):
96 self.run_ffmpeg_multiple_files([path], out_path, opts)
97
67d0c25e
JMF
98 def _ffmpeg_filename_argument(self, fn):
99 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
100 if fn.startswith(u'-'):
101 return u'./' + fn
102 return fn
103
104class FFmpegExtractAudioPP(FFmpegPostProcessor):
7851b379 105 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
67d0c25e
JMF
106 FFmpegPostProcessor.__init__(self, downloader)
107 if preferredcodec is None:
108 preferredcodec = 'best'
109 self._preferredcodec = preferredcodec
110 self._preferredquality = preferredquality
67d0c25e
JMF
111 self._nopostoverwrites = nopostoverwrites
112
59ae15a5 113 def get_audio_codec(self, path):
4aa16a50
JMF
114 if not self._exes['ffprobe'] and not self._exes['avprobe']:
115 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
59ae15a5 116 try:
712e86b9 117 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
5910e210 118 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
59ae15a5
PH
119 output = handle.communicate()[0]
120 if handle.wait() != 0:
121 return None
122 except (IOError, OSError):
123 return None
124 audio_codec = None
5910e210 125 for line in output.decode('ascii', 'ignore').split('\n'):
59ae15a5
PH
126 if line.startswith('codec_name='):
127 audio_codec = line.split('=')[1].strip()
128 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
129 return audio_codec
130 return None
131
132 def run_ffmpeg(self, path, out_path, codec, more_opts):
133 if not self._exes['ffmpeg'] and not self._exes['avconv']:
0c007432 134 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
59ae15a5
PH
135 if codec is None:
136 acodec_opts = []
137 else:
138 acodec_opts = ['-acodec', codec]
67d0c25e
JMF
139 opts = ['-vn'] + acodec_opts + more_opts
140 try:
141 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
142 except FFmpegPostProcessorError as err:
8ae97d76 143 raise AudioConversionError(err.msg)
59ae15a5
PH
144
145 def run(self, information):
146 path = information['filepath']
147
148 filecodec = self.get_audio_codec(path)
149 if filecodec is None:
7851b379 150 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
59ae15a5
PH
151
152 more_opts = []
153 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
0e336841 154 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
59ae15a5
PH
155 # Lossless, but in another container
156 acodec = 'copy'
0e336841 157 extension = 'm4a'
59ae15a5 158 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
510e6f6d 159 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
59ae15a5
PH
160 # Lossless if possible
161 acodec = 'copy'
162 extension = filecodec
163 if filecodec == 'aac':
164 more_opts = ['-f', 'adts']
165 if filecodec == 'vorbis':
166 extension = 'ogg'
167 else:
168 # MP3 otherwise.
169 acodec = 'libmp3lame'
170 extension = 'mp3'
171 more_opts = []
172 if self._preferredquality is not None:
173 if int(self._preferredquality) < 10:
174 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
175 else:
176 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
177 else:
178 # We convert the audio (lossy)
510e6f6d 179 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
59ae15a5
PH
180 extension = self._preferredcodec
181 more_opts = []
182 if self._preferredquality is not None:
0f6d12e4
JMF
183 # The opus codec doesn't support the -aq option
184 if int(self._preferredquality) < 10 and extension != 'opus':
59ae15a5
PH
185 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
186 else:
187 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
188 if self._preferredcodec == 'aac':
189 more_opts += ['-f', 'adts']
190 if self._preferredcodec == 'm4a':
191 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
192 if self._preferredcodec == 'vorbis':
193 extension = 'ogg'
194 if self._preferredcodec == 'wav':
195 extension = 'wav'
196 more_opts += ['-f', 'wav']
197
198 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
199 new_path = prefix + sep + extension
e74c504f
JF
200
201 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
202 if new_path == path:
203 self._nopostoverwrites = True
204
59ae15a5 205 try:
b7298b6e
BPG
206 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
207 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
208 else:
209 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
210 self.run_ffmpeg(path, new_path, acodec, more_opts)
59ae15a5
PH
211 except:
212 etype,e,tb = sys.exc_info()
213 if isinstance(e, AudioConversionError):
8ae97d76 214 msg = u'audio conversion failed: ' + e.msg
59ae15a5 215 else:
7851b379
PH
216 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
217 raise PostProcessingError(msg)
59ae15a5
PH
218
219 # Try to update the date time for extracted audio file.
220 if information.get('filetime') is not None:
221 try:
222 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
223 except:
bbcbf4d4 224 self._downloader.report_warning(u'Cannot update utime of audio file')
59ae15a5 225
59ae15a5 226 information['filepath'] = new_path
e74c504f 227 return self._nopostoverwrites,information
712e86b9 228
67d0c25e
JMF
229class FFmpegVideoConvertor(FFmpegPostProcessor):
230 def __init__(self, downloader=None,preferedformat=None):
7851b379 231 super(FFmpegVideoConvertor, self).__init__(downloader)
67d0c25e 232 self._preferedformat=preferedformat
712e86b9 233
67d0c25e
JMF
234 def run(self, information):
235 path = information['filepath']
236 prefix, sep, ext = path.rpartition(u'.')
237 outpath = prefix + sep + self._preferedformat
7851b379
PH
238 if information['ext'] == self._preferedformat:
239 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
240 return True,information
241 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
67d0c25e
JMF
242 self.run_ffmpeg(path, outpath, [])
243 information['filepath'] = outpath
244 information['format'] = self._preferedformat
7851b379
PH
245 information['ext'] = self._preferedformat
246 return False,information
d4051a8e
JMF
247
248
249class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
250 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
251 _lang_map = {
252 'aa': 'aar',
253 'ab': 'abk',
254 'ae': 'ave',
255 'af': 'afr',
256 'ak': 'aka',
257 'am': 'amh',
258 'an': 'arg',
259 'ar': 'ara',
260 'as': 'asm',
261 'av': 'ava',
262 'ay': 'aym',
263 'az': 'aze',
264 'ba': 'bak',
265 'be': 'bel',
266 'bg': 'bul',
267 'bh': 'bih',
268 'bi': 'bis',
269 'bm': 'bam',
270 'bn': 'ben',
271 'bo': 'bod',
272 'br': 'bre',
273 'bs': 'bos',
274 'ca': 'cat',
275 'ce': 'che',
276 'ch': 'cha',
277 'co': 'cos',
278 'cr': 'cre',
279 'cs': 'ces',
280 'cu': 'chu',
281 'cv': 'chv',
282 'cy': 'cym',
283 'da': 'dan',
284 'de': 'deu',
285 'dv': 'div',
286 'dz': 'dzo',
287 'ee': 'ewe',
288 'el': 'ell',
289 'en': 'eng',
290 'eo': 'epo',
291 'es': 'spa',
292 'et': 'est',
293 'eu': 'eus',
294 'fa': 'fas',
295 'ff': 'ful',
296 'fi': 'fin',
297 'fj': 'fij',
298 'fo': 'fao',
299 'fr': 'fra',
300 'fy': 'fry',
301 'ga': 'gle',
302 'gd': 'gla',
303 'gl': 'glg',
304 'gn': 'grn',
305 'gu': 'guj',
306 'gv': 'glv',
307 'ha': 'hau',
308 'he': 'heb',
309 'hi': 'hin',
310 'ho': 'hmo',
311 'hr': 'hrv',
312 'ht': 'hat',
313 'hu': 'hun',
314 'hy': 'hye',
315 'hz': 'her',
316 'ia': 'ina',
317 'id': 'ind',
318 'ie': 'ile',
319 'ig': 'ibo',
320 'ii': 'iii',
321 'ik': 'ipk',
322 'io': 'ido',
323 'is': 'isl',
324 'it': 'ita',
325 'iu': 'iku',
326 'ja': 'jpn',
327 'jv': 'jav',
328 'ka': 'kat',
329 'kg': 'kon',
330 'ki': 'kik',
331 'kj': 'kua',
332 'kk': 'kaz',
333 'kl': 'kal',
334 'km': 'khm',
335 'kn': 'kan',
336 'ko': 'kor',
337 'kr': 'kau',
338 'ks': 'kas',
339 'ku': 'kur',
340 'kv': 'kom',
341 'kw': 'cor',
342 'ky': 'kir',
343 'la': 'lat',
344 'lb': 'ltz',
345 'lg': 'lug',
346 'li': 'lim',
347 'ln': 'lin',
348 'lo': 'lao',
349 'lt': 'lit',
350 'lu': 'lub',
351 'lv': 'lav',
352 'mg': 'mlg',
353 'mh': 'mah',
354 'mi': 'mri',
355 'mk': 'mkd',
356 'ml': 'mal',
357 'mn': 'mon',
358 'mr': 'mar',
359 'ms': 'msa',
360 'mt': 'mlt',
361 'my': 'mya',
362 'na': 'nau',
363 'nb': 'nob',
364 'nd': 'nde',
365 'ne': 'nep',
366 'ng': 'ndo',
367 'nl': 'nld',
368 'nn': 'nno',
369 'no': 'nor',
370 'nr': 'nbl',
371 'nv': 'nav',
372 'ny': 'nya',
373 'oc': 'oci',
374 'oj': 'oji',
375 'om': 'orm',
376 'or': 'ori',
377 'os': 'oss',
378 'pa': 'pan',
379 'pi': 'pli',
380 'pl': 'pol',
381 'ps': 'pus',
382 'pt': 'por',
383 'qu': 'que',
384 'rm': 'roh',
385 'rn': 'run',
386 'ro': 'ron',
387 'ru': 'rus',
388 'rw': 'kin',
389 'sa': 'san',
390 'sc': 'srd',
391 'sd': 'snd',
392 'se': 'sme',
393 'sg': 'sag',
394 'si': 'sin',
395 'sk': 'slk',
396 'sl': 'slv',
397 'sm': 'smo',
398 'sn': 'sna',
399 'so': 'som',
400 'sq': 'sqi',
401 'sr': 'srp',
402 'ss': 'ssw',
403 'st': 'sot',
404 'su': 'sun',
405 'sv': 'swe',
406 'sw': 'swa',
407 'ta': 'tam',
408 'te': 'tel',
409 'tg': 'tgk',
410 'th': 'tha',
411 'ti': 'tir',
412 'tk': 'tuk',
413 'tl': 'tgl',
414 'tn': 'tsn',
415 'to': 'ton',
416 'tr': 'tur',
417 'ts': 'tso',
418 'tt': 'tat',
419 'tw': 'twi',
420 'ty': 'tah',
421 'ug': 'uig',
422 'uk': 'ukr',
423 'ur': 'urd',
424 'uz': 'uzb',
425 've': 'ven',
426 'vi': 'vie',
427 'vo': 'vol',
428 'wa': 'wln',
429 'wo': 'wol',
430 'xh': 'xho',
431 'yi': 'yid',
432 'yo': 'yor',
433 'za': 'zha',
434 'zh': 'zho',
435 'zu': 'zul',
436 }
437
438 def __init__(self, downloader=None, subtitlesformat='srt'):
439 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
440 self._subformat = subtitlesformat
441
442 @classmethod
443 def _conver_lang_code(cls, code):
444 """Convert language code from ISO 639-1 to ISO 639-2/T"""
445 return cls._lang_map.get(code[:2])
446
447 def run(self, information):
448 if information['ext'] != u'mp4':
449 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
450 return True, information
74bab3f0
JMF
451 if not information.get('subtitles'):
452 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
453 return True, information
d4051a8e 454
74bab3f0 455 sub_langs = [key for key in information['subtitles']]
d4051a8e
JMF
456 filename = information['filepath']
457 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
458
459 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
460 for (i, lang) in enumerate(sub_langs):
461 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
462 lang_code = self._conver_lang_code(lang)
463 if lang_code is not None:
464 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
465 opts.extend(['-f', 'mp4'])
466
467 temp_filename = filename + u'.temp'
9af73dc4 468 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
d4051a8e
JMF
469 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
470 os.remove(encodeFilename(filename))
471 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
472
473 return True, information
bc4f2917
JMF
474
475
476class FFmpegMetadataPP(FFmpegPostProcessor):
477 def run(self, info):
478 metadata = {}
479 if info.get('title') is not None:
480 metadata['title'] = info['title']
481 if info.get('upload_date') is not None:
482 metadata['date'] = info['upload_date']
483 if info.get('uploader') is not None:
484 metadata['artist'] = info['uploader']
485 elif info.get('uploader_id') is not None:
486 metadata['artist'] = info['uploader_id']
487
488 if not metadata:
489 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
490 return True, info
491
492 filename = info['filepath']
493 ext = os.path.splitext(filename)[1][1:]
494 temp_filename = filename + u'.temp'
495
496 options = ['-c', 'copy']
497 for (name, value) in metadata.items():
498 options.extend(['-metadata', '%s="%s"' % (name, value)])
499 options.extend(['-f', ext])
500
501 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
502 self.run_ffmpeg(filename, temp_filename, options)
503 os.remove(encodeFilename(filename))
504 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
505 return True, info