]> jfr.im git - yt-dlp.git/blame - youtube_dl/PostProcessor.py
release 2013.11.13
[yt-dlp.git] / youtube_dl / PostProcessor.py
CommitLineData
d77c3dfd
FV
1import os
2import subprocess
3import sys
4import time
5
a4fd0415
PH
6
7from .utils import (
8 compat_subprocess_get_DEVNULL,
9 encodeFilename,
10 PostProcessingError,
11 shell_quote,
12 subtitles_filename,
13)
d77c3dfd
FV
14
15
16class PostProcessor(object):
59ae15a5 17 """Post Processor class.
d77c3dfd 18
59ae15a5
PH
19 PostProcessor objects can be added to downloaders with their
20 add_post_processor() method. When the downloader has finished a
21 successful download, it will take its internal chain of PostProcessors
22 and start calling the run() method on each one of them, first with
23 an initial argument and then with the returned value of the previous
24 PostProcessor.
d77c3dfd 25
59ae15a5
PH
26 The chain will be stopped if one of them ever returns None or the end
27 of the chain is reached.
d77c3dfd 28
59ae15a5
PH
29 PostProcessor objects follow a "mutual registration" process similar
30 to InfoExtractor objects.
31 """
d77c3dfd 32
59ae15a5 33 _downloader = None
d77c3dfd 34
59ae15a5
PH
35 def __init__(self, downloader=None):
36 self._downloader = downloader
d77c3dfd 37
59ae15a5
PH
38 def set_downloader(self, downloader):
39 """Sets the downloader for this PP."""
40 self._downloader = downloader
d77c3dfd 41
59ae15a5
PH
42 def run(self, information):
43 """Run the PostProcessor.
d77c3dfd 44
59ae15a5
PH
45 The "information" argument is a dictionary like the ones
46 composed by InfoExtractors. The only difference is that this
47 one has an extra field called "filepath" that points to the
48 downloaded file.
d77c3dfd 49
7851b379
PH
50 This method returns a tuple, the first element of which describes
51 whether the original file should be kept (i.e. not deleted - None for
52 no preference), and the second of which is the updated information.
d77c3dfd 53
59ae15a5 54 In addition, this method may raise a PostProcessingError
7851b379 55 exception if post processing fails.
59ae15a5 56 """
7851b379 57 return None, information # by default, keep file and do nothing
d77c3dfd 58
7851b379
PH
59class FFmpegPostProcessorError(PostProcessingError):
60 pass
67d0c25e 61
7851b379
PH
62class AudioConversionError(PostProcessingError):
63 pass
d77c3dfd 64
67d0c25e
JMF
65class FFmpegPostProcessor(PostProcessor):
66 def __init__(self,downloader=None):
59ae15a5 67 PostProcessor.__init__(self, downloader)
59ae15a5
PH
68 self._exes = self.detect_executables()
69
70 @staticmethod
71 def detect_executables():
72 def executable(exe):
73 try:
74 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
75 except OSError:
76 return False
77 return exe
78 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
79 return dict((program, executable(program)) for program in programs)
80
d4051a8e 81 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
67d0c25e 82 if not self._exes['ffmpeg'] and not self._exes['avconv']:
7851b379 83 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
d4051a8e
JMF
84
85 files_cmd = []
86 for path in input_paths:
87 files_cmd.extend(['-i', encodeFilename(path)])
88 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
67d0c25e
JMF
89 + opts +
90 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
d4051a8e 91
4eb7f1d1
JMF
92 if self._downloader.params.get('verbose', False):
93 self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
67d0c25e
JMF
94 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
95 stdout,stderr = p.communicate()
96 if p.returncode != 0:
fb2f8336 97 stderr = stderr.decode('utf-8', 'replace')
67d0c25e 98 msg = stderr.strip().split('\n')[-1]
fb2f8336 99 raise FFmpegPostProcessorError(msg)
67d0c25e 100
d4051a8e
JMF
101 def run_ffmpeg(self, path, out_path, opts):
102 self.run_ffmpeg_multiple_files([path], out_path, opts)
103
67d0c25e
JMF
104 def _ffmpeg_filename_argument(self, fn):
105 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
106 if fn.startswith(u'-'):
107 return u'./' + fn
108 return fn
109
110class FFmpegExtractAudioPP(FFmpegPostProcessor):
7851b379 111 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
67d0c25e
JMF
112 FFmpegPostProcessor.__init__(self, downloader)
113 if preferredcodec is None:
114 preferredcodec = 'best'
115 self._preferredcodec = preferredcodec
116 self._preferredquality = preferredquality
67d0c25e
JMF
117 self._nopostoverwrites = nopostoverwrites
118
59ae15a5 119 def get_audio_codec(self, path):
4aa16a50
JMF
120 if not self._exes['ffprobe'] and not self._exes['avprobe']:
121 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
59ae15a5 122 try:
712e86b9 123 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
5910e210 124 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
59ae15a5
PH
125 output = handle.communicate()[0]
126 if handle.wait() != 0:
127 return None
128 except (IOError, OSError):
129 return None
130 audio_codec = None
5910e210 131 for line in output.decode('ascii', 'ignore').split('\n'):
59ae15a5
PH
132 if line.startswith('codec_name='):
133 audio_codec = line.split('=')[1].strip()
134 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
135 return audio_codec
136 return None
137
138 def run_ffmpeg(self, path, out_path, codec, more_opts):
139 if not self._exes['ffmpeg'] and not self._exes['avconv']:
0c007432 140 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
59ae15a5
PH
141 if codec is None:
142 acodec_opts = []
143 else:
144 acodec_opts = ['-acodec', codec]
67d0c25e
JMF
145 opts = ['-vn'] + acodec_opts + more_opts
146 try:
147 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
148 except FFmpegPostProcessorError as err:
8ae97d76 149 raise AudioConversionError(err.msg)
59ae15a5
PH
150
151 def run(self, information):
152 path = information['filepath']
153
154 filecodec = self.get_audio_codec(path)
155 if filecodec is None:
7851b379 156 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
59ae15a5
PH
157
158 more_opts = []
159 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
0e336841 160 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
59ae15a5
PH
161 # Lossless, but in another container
162 acodec = 'copy'
0e336841 163 extension = 'm4a'
59ae15a5 164 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
510e6f6d 165 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
59ae15a5
PH
166 # Lossless if possible
167 acodec = 'copy'
168 extension = filecodec
169 if filecodec == 'aac':
170 more_opts = ['-f', 'adts']
171 if filecodec == 'vorbis':
172 extension = 'ogg'
173 else:
174 # MP3 otherwise.
175 acodec = 'libmp3lame'
176 extension = 'mp3'
177 more_opts = []
178 if self._preferredquality is not None:
179 if int(self._preferredquality) < 10:
180 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
181 else:
182 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
183 else:
184 # We convert the audio (lossy)
510e6f6d 185 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
59ae15a5
PH
186 extension = self._preferredcodec
187 more_opts = []
188 if self._preferredquality is not None:
0f6d12e4
JMF
189 # The opus codec doesn't support the -aq option
190 if int(self._preferredquality) < 10 and extension != 'opus':
59ae15a5
PH
191 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
192 else:
193 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
194 if self._preferredcodec == 'aac':
195 more_opts += ['-f', 'adts']
196 if self._preferredcodec == 'm4a':
197 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
198 if self._preferredcodec == 'vorbis':
199 extension = 'ogg'
200 if self._preferredcodec == 'wav':
201 extension = 'wav'
202 more_opts += ['-f', 'wav']
203
204 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
205 new_path = prefix + sep + extension
e74c504f
JF
206
207 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
208 if new_path == path:
209 self._nopostoverwrites = True
210
59ae15a5 211 try:
b7298b6e
BPG
212 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
213 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
214 else:
215 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
216 self.run_ffmpeg(path, new_path, acodec, more_opts)
59ae15a5
PH
217 except:
218 etype,e,tb = sys.exc_info()
219 if isinstance(e, AudioConversionError):
8ae97d76 220 msg = u'audio conversion failed: ' + e.msg
59ae15a5 221 else:
7851b379
PH
222 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
223 raise PostProcessingError(msg)
59ae15a5
PH
224
225 # Try to update the date time for extracted audio file.
226 if information.get('filetime') is not None:
227 try:
228 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
229 except:
bbcbf4d4 230 self._downloader.report_warning(u'Cannot update utime of audio file')
59ae15a5 231
59ae15a5 232 information['filepath'] = new_path
e74c504f 233 return self._nopostoverwrites,information
712e86b9 234
67d0c25e
JMF
235class FFmpegVideoConvertor(FFmpegPostProcessor):
236 def __init__(self, downloader=None,preferedformat=None):
7851b379 237 super(FFmpegVideoConvertor, self).__init__(downloader)
67d0c25e 238 self._preferedformat=preferedformat
712e86b9 239
67d0c25e
JMF
240 def run(self, information):
241 path = information['filepath']
242 prefix, sep, ext = path.rpartition(u'.')
243 outpath = prefix + sep + self._preferedformat
7851b379
PH
244 if information['ext'] == self._preferedformat:
245 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
246 return True,information
247 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
67d0c25e
JMF
248 self.run_ffmpeg(path, outpath, [])
249 information['filepath'] = outpath
250 information['format'] = self._preferedformat
7851b379
PH
251 information['ext'] = self._preferedformat
252 return False,information
d4051a8e
JMF
253
254
255class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
256 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
257 _lang_map = {
258 'aa': 'aar',
259 'ab': 'abk',
260 'ae': 'ave',
261 'af': 'afr',
262 'ak': 'aka',
263 'am': 'amh',
264 'an': 'arg',
265 'ar': 'ara',
266 'as': 'asm',
267 'av': 'ava',
268 'ay': 'aym',
269 'az': 'aze',
270 'ba': 'bak',
271 'be': 'bel',
272 'bg': 'bul',
273 'bh': 'bih',
274 'bi': 'bis',
275 'bm': 'bam',
276 'bn': 'ben',
277 'bo': 'bod',
278 'br': 'bre',
279 'bs': 'bos',
280 'ca': 'cat',
281 'ce': 'che',
282 'ch': 'cha',
283 'co': 'cos',
284 'cr': 'cre',
285 'cs': 'ces',
286 'cu': 'chu',
287 'cv': 'chv',
288 'cy': 'cym',
289 'da': 'dan',
290 'de': 'deu',
291 'dv': 'div',
292 'dz': 'dzo',
293 'ee': 'ewe',
294 'el': 'ell',
295 'en': 'eng',
296 'eo': 'epo',
297 'es': 'spa',
298 'et': 'est',
299 'eu': 'eus',
300 'fa': 'fas',
301 'ff': 'ful',
302 'fi': 'fin',
303 'fj': 'fij',
304 'fo': 'fao',
305 'fr': 'fra',
306 'fy': 'fry',
307 'ga': 'gle',
308 'gd': 'gla',
309 'gl': 'glg',
310 'gn': 'grn',
311 'gu': 'guj',
312 'gv': 'glv',
313 'ha': 'hau',
314 'he': 'heb',
315 'hi': 'hin',
316 'ho': 'hmo',
317 'hr': 'hrv',
318 'ht': 'hat',
319 'hu': 'hun',
320 'hy': 'hye',
321 'hz': 'her',
322 'ia': 'ina',
323 'id': 'ind',
324 'ie': 'ile',
325 'ig': 'ibo',
326 'ii': 'iii',
327 'ik': 'ipk',
328 'io': 'ido',
329 'is': 'isl',
330 'it': 'ita',
331 'iu': 'iku',
332 'ja': 'jpn',
333 'jv': 'jav',
334 'ka': 'kat',
335 'kg': 'kon',
336 'ki': 'kik',
337 'kj': 'kua',
338 'kk': 'kaz',
339 'kl': 'kal',
340 'km': 'khm',
341 'kn': 'kan',
342 'ko': 'kor',
343 'kr': 'kau',
344 'ks': 'kas',
345 'ku': 'kur',
346 'kv': 'kom',
347 'kw': 'cor',
348 'ky': 'kir',
349 'la': 'lat',
350 'lb': 'ltz',
351 'lg': 'lug',
352 'li': 'lim',
353 'ln': 'lin',
354 'lo': 'lao',
355 'lt': 'lit',
356 'lu': 'lub',
357 'lv': 'lav',
358 'mg': 'mlg',
359 'mh': 'mah',
360 'mi': 'mri',
361 'mk': 'mkd',
362 'ml': 'mal',
363 'mn': 'mon',
364 'mr': 'mar',
365 'ms': 'msa',
366 'mt': 'mlt',
367 'my': 'mya',
368 'na': 'nau',
369 'nb': 'nob',
370 'nd': 'nde',
371 'ne': 'nep',
372 'ng': 'ndo',
373 'nl': 'nld',
374 'nn': 'nno',
375 'no': 'nor',
376 'nr': 'nbl',
377 'nv': 'nav',
378 'ny': 'nya',
379 'oc': 'oci',
380 'oj': 'oji',
381 'om': 'orm',
382 'or': 'ori',
383 'os': 'oss',
384 'pa': 'pan',
385 'pi': 'pli',
386 'pl': 'pol',
387 'ps': 'pus',
388 'pt': 'por',
389 'qu': 'que',
390 'rm': 'roh',
391 'rn': 'run',
392 'ro': 'ron',
393 'ru': 'rus',
394 'rw': 'kin',
395 'sa': 'san',
396 'sc': 'srd',
397 'sd': 'snd',
398 'se': 'sme',
399 'sg': 'sag',
400 'si': 'sin',
401 'sk': 'slk',
402 'sl': 'slv',
403 'sm': 'smo',
404 'sn': 'sna',
405 'so': 'som',
406 'sq': 'sqi',
407 'sr': 'srp',
408 'ss': 'ssw',
409 'st': 'sot',
410 'su': 'sun',
411 'sv': 'swe',
412 'sw': 'swa',
413 'ta': 'tam',
414 'te': 'tel',
415 'tg': 'tgk',
416 'th': 'tha',
417 'ti': 'tir',
418 'tk': 'tuk',
419 'tl': 'tgl',
420 'tn': 'tsn',
421 'to': 'ton',
422 'tr': 'tur',
423 'ts': 'tso',
424 'tt': 'tat',
425 'tw': 'twi',
426 'ty': 'tah',
427 'ug': 'uig',
428 'uk': 'ukr',
429 'ur': 'urd',
430 'uz': 'uzb',
431 've': 'ven',
432 'vi': 'vie',
433 'vo': 'vol',
434 'wa': 'wln',
435 'wo': 'wol',
436 'xh': 'xho',
437 'yi': 'yid',
438 'yo': 'yor',
439 'za': 'zha',
440 'zh': 'zho',
441 'zu': 'zul',
442 }
443
444 def __init__(self, downloader=None, subtitlesformat='srt'):
445 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
446 self._subformat = subtitlesformat
447
448 @classmethod
449 def _conver_lang_code(cls, code):
450 """Convert language code from ISO 639-1 to ISO 639-2/T"""
451 return cls._lang_map.get(code[:2])
452
453 def run(self, information):
454 if information['ext'] != u'mp4':
455 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
456 return True, information
74bab3f0
JMF
457 if not information.get('subtitles'):
458 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
459 return True, information
d4051a8e 460
74bab3f0 461 sub_langs = [key for key in information['subtitles']]
d4051a8e
JMF
462 filename = information['filepath']
463 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
464
465 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
466 for (i, lang) in enumerate(sub_langs):
467 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
468 lang_code = self._conver_lang_code(lang)
469 if lang_code is not None:
470 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
471 opts.extend(['-f', 'mp4'])
472
473 temp_filename = filename + u'.temp'
9af73dc4 474 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
d4051a8e
JMF
475 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
476 os.remove(encodeFilename(filename))
477 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
478
479 return True, information
bc4f2917
JMF
480
481
482class FFmpegMetadataPP(FFmpegPostProcessor):
483 def run(self, info):
484 metadata = {}
485 if info.get('title') is not None:
486 metadata['title'] = info['title']
487 if info.get('upload_date') is not None:
488 metadata['date'] = info['upload_date']
489 if info.get('uploader') is not None:
490 metadata['artist'] = info['uploader']
491 elif info.get('uploader_id') is not None:
492 metadata['artist'] = info['uploader_id']
493
494 if not metadata:
495 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
496 return True, info
497
498 filename = info['filepath']
499 ext = os.path.splitext(filename)[1][1:]
500 temp_filename = filename + u'.temp'
501
502 options = ['-c', 'copy']
503 for (name, value) in metadata.items():
72b18c5d 504 options.extend(['-metadata', '%s=%s' % (name, value)])
bc4f2917
JMF
505 options.extend(['-f', ext])
506
507 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
508 self.run_ffmpeg(filename, temp_filename, options)
509 os.remove(encodeFilename(filename))
510 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
511 return True, info