]> jfr.im git - yt-dlp.git/blob - youtube_dl/PostProcessor.py
[ted] Added support for subtitle download
[yt-dlp.git] / youtube_dl / PostProcessor.py
1 import os
2 import subprocess
3 import sys
4 import time
5 import datetime
6
7 from .utils import *
8
9
10 class PostProcessor(object):
11 """Post Processor class.
12
13 PostProcessor objects can be added to downloaders with their
14 add_post_processor() method. When the downloader has finished a
15 successful download, it will take its internal chain of PostProcessors
16 and start calling the run() method on each one of them, first with
17 an initial argument and then with the returned value of the previous
18 PostProcessor.
19
20 The chain will be stopped if one of them ever returns None or the end
21 of the chain is reached.
22
23 PostProcessor objects follow a "mutual registration" process similar
24 to InfoExtractor objects.
25 """
26
27 _downloader = None
28
29 def __init__(self, downloader=None):
30 self._downloader = downloader
31
32 def set_downloader(self, downloader):
33 """Sets the downloader for this PP."""
34 self._downloader = downloader
35
36 def run(self, information):
37 """Run the PostProcessor.
38
39 The "information" argument is a dictionary like the ones
40 composed by InfoExtractors. The only difference is that this
41 one has an extra field called "filepath" that points to the
42 downloaded file.
43
44 This method returns a tuple, the first element of which describes
45 whether the original file should be kept (i.e. not deleted - None for
46 no preference), and the second of which is the updated information.
47
48 In addition, this method may raise a PostProcessingError
49 exception if post processing fails.
50 """
51 return None, information # by default, keep file and do nothing
52
53 class FFmpegPostProcessorError(PostProcessingError):
54 pass
55
56 class AudioConversionError(PostProcessingError):
57 pass
58
59 class FFmpegPostProcessor(PostProcessor):
60 def __init__(self,downloader=None):
61 PostProcessor.__init__(self, downloader)
62 self._exes = self.detect_executables()
63
64 @staticmethod
65 def detect_executables():
66 def executable(exe):
67 try:
68 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
69 except OSError:
70 return False
71 return exe
72 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
73 return dict((program, executable(program)) for program in programs)
74
75 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
76 if not self._exes['ffmpeg'] and not self._exes['avconv']:
77 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
78
79 files_cmd = []
80 for path in input_paths:
81 files_cmd.extend(['-i', encodeFilename(path)])
82 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
83 + opts +
84 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
85
86 if self._downloader.params.get('verbose', False):
87 self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
88 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
89 stdout,stderr = p.communicate()
90 if p.returncode != 0:
91 stderr = stderr.decode('utf-8', 'replace')
92 msg = stderr.strip().split('\n')[-1]
93 raise FFmpegPostProcessorError(msg)
94
95 def run_ffmpeg(self, path, out_path, opts):
96 self.run_ffmpeg_multiple_files([path], out_path, opts)
97
98 def _ffmpeg_filename_argument(self, fn):
99 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
100 if fn.startswith(u'-'):
101 return u'./' + fn
102 return fn
103
104 class FFmpegExtractAudioPP(FFmpegPostProcessor):
105 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
106 FFmpegPostProcessor.__init__(self, downloader)
107 if preferredcodec is None:
108 preferredcodec = 'best'
109 self._preferredcodec = preferredcodec
110 self._preferredquality = preferredquality
111 self._nopostoverwrites = nopostoverwrites
112
113 def get_audio_codec(self, path):
114 if not self._exes['ffprobe'] and not self._exes['avprobe']:
115 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
116 try:
117 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
118 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
119 output = handle.communicate()[0]
120 if handle.wait() != 0:
121 return None
122 except (IOError, OSError):
123 return None
124 audio_codec = None
125 for line in output.decode('ascii', 'ignore').split('\n'):
126 if line.startswith('codec_name='):
127 audio_codec = line.split('=')[1].strip()
128 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
129 return audio_codec
130 return None
131
132 def run_ffmpeg(self, path, out_path, codec, more_opts):
133 if not self._exes['ffmpeg'] and not self._exes['avconv']:
134 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
135 if codec is None:
136 acodec_opts = []
137 else:
138 acodec_opts = ['-acodec', codec]
139 opts = ['-vn'] + acodec_opts + more_opts
140 try:
141 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
142 except FFmpegPostProcessorError as err:
143 raise AudioConversionError(err.msg)
144
145 def run(self, information):
146 path = information['filepath']
147
148 filecodec = self.get_audio_codec(path)
149 if filecodec is None:
150 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
151
152 more_opts = []
153 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
154 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
155 # Lossless, but in another container
156 acodec = 'copy'
157 extension = 'm4a'
158 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
159 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
160 # Lossless if possible
161 acodec = 'copy'
162 extension = filecodec
163 if filecodec == 'aac':
164 more_opts = ['-f', 'adts']
165 if filecodec == 'vorbis':
166 extension = 'ogg'
167 else:
168 # MP3 otherwise.
169 acodec = 'libmp3lame'
170 extension = 'mp3'
171 more_opts = []
172 if self._preferredquality is not None:
173 if int(self._preferredquality) < 10:
174 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
175 else:
176 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
177 else:
178 # We convert the audio (lossy)
179 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
180 extension = self._preferredcodec
181 more_opts = []
182 if self._preferredquality is not None:
183 # The opus codec doesn't support the -aq option
184 if int(self._preferredquality) < 10 and extension != 'opus':
185 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
186 else:
187 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
188 if self._preferredcodec == 'aac':
189 more_opts += ['-f', 'adts']
190 if self._preferredcodec == 'm4a':
191 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
192 if self._preferredcodec == 'vorbis':
193 extension = 'ogg'
194 if self._preferredcodec == 'wav':
195 extension = 'wav'
196 more_opts += ['-f', 'wav']
197
198 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
199 new_path = prefix + sep + extension
200
201 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
202 if new_path == path:
203 self._nopostoverwrites = True
204
205 try:
206 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
207 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
208 else:
209 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
210 self.run_ffmpeg(path, new_path, acodec, more_opts)
211 except:
212 etype,e,tb = sys.exc_info()
213 if isinstance(e, AudioConversionError):
214 msg = u'audio conversion failed: ' + e.msg
215 else:
216 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
217 raise PostProcessingError(msg)
218
219 # Try to update the date time for extracted audio file.
220 if information.get('filetime') is not None:
221 try:
222 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
223 except:
224 self._downloader.report_warning(u'Cannot update utime of audio file')
225
226 information['filepath'] = new_path
227 return self._nopostoverwrites,information
228
229 class FFmpegVideoConvertor(FFmpegPostProcessor):
230 def __init__(self, downloader=None,preferedformat=None):
231 super(FFmpegVideoConvertor, self).__init__(downloader)
232 self._preferedformat=preferedformat
233
234 def run(self, information):
235 path = information['filepath']
236 prefix, sep, ext = path.rpartition(u'.')
237 outpath = prefix + sep + self._preferedformat
238 if information['ext'] == self._preferedformat:
239 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
240 return True,information
241 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
242 self.run_ffmpeg(path, outpath, [])
243 information['filepath'] = outpath
244 information['format'] = self._preferedformat
245 information['ext'] = self._preferedformat
246 return False,information
247
248
249 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
250 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
251 _lang_map = {
252 'aa': 'aar',
253 'ab': 'abk',
254 'ae': 'ave',
255 'af': 'afr',
256 'ak': 'aka',
257 'am': 'amh',
258 'an': 'arg',
259 'ar': 'ara',
260 'as': 'asm',
261 'av': 'ava',
262 'ay': 'aym',
263 'az': 'aze',
264 'ba': 'bak',
265 'be': 'bel',
266 'bg': 'bul',
267 'bh': 'bih',
268 'bi': 'bis',
269 'bm': 'bam',
270 'bn': 'ben',
271 'bo': 'bod',
272 'br': 'bre',
273 'bs': 'bos',
274 'ca': 'cat',
275 'ce': 'che',
276 'ch': 'cha',
277 'co': 'cos',
278 'cr': 'cre',
279 'cs': 'ces',
280 'cu': 'chu',
281 'cv': 'chv',
282 'cy': 'cym',
283 'da': 'dan',
284 'de': 'deu',
285 'dv': 'div',
286 'dz': 'dzo',
287 'ee': 'ewe',
288 'el': 'ell',
289 'en': 'eng',
290 'eo': 'epo',
291 'es': 'spa',
292 'et': 'est',
293 'eu': 'eus',
294 'fa': 'fas',
295 'ff': 'ful',
296 'fi': 'fin',
297 'fj': 'fij',
298 'fo': 'fao',
299 'fr': 'fra',
300 'fy': 'fry',
301 'ga': 'gle',
302 'gd': 'gla',
303 'gl': 'glg',
304 'gn': 'grn',
305 'gu': 'guj',
306 'gv': 'glv',
307 'ha': 'hau',
308 'he': 'heb',
309 'hi': 'hin',
310 'ho': 'hmo',
311 'hr': 'hrv',
312 'ht': 'hat',
313 'hu': 'hun',
314 'hy': 'hye',
315 'hz': 'her',
316 'ia': 'ina',
317 'id': 'ind',
318 'ie': 'ile',
319 'ig': 'ibo',
320 'ii': 'iii',
321 'ik': 'ipk',
322 'io': 'ido',
323 'is': 'isl',
324 'it': 'ita',
325 'iu': 'iku',
326 'ja': 'jpn',
327 'jv': 'jav',
328 'ka': 'kat',
329 'kg': 'kon',
330 'ki': 'kik',
331 'kj': 'kua',
332 'kk': 'kaz',
333 'kl': 'kal',
334 'km': 'khm',
335 'kn': 'kan',
336 'ko': 'kor',
337 'kr': 'kau',
338 'ks': 'kas',
339 'ku': 'kur',
340 'kv': 'kom',
341 'kw': 'cor',
342 'ky': 'kir',
343 'la': 'lat',
344 'lb': 'ltz',
345 'lg': 'lug',
346 'li': 'lim',
347 'ln': 'lin',
348 'lo': 'lao',
349 'lt': 'lit',
350 'lu': 'lub',
351 'lv': 'lav',
352 'mg': 'mlg',
353 'mh': 'mah',
354 'mi': 'mri',
355 'mk': 'mkd',
356 'ml': 'mal',
357 'mn': 'mon',
358 'mr': 'mar',
359 'ms': 'msa',
360 'mt': 'mlt',
361 'my': 'mya',
362 'na': 'nau',
363 'nb': 'nob',
364 'nd': 'nde',
365 'ne': 'nep',
366 'ng': 'ndo',
367 'nl': 'nld',
368 'nn': 'nno',
369 'no': 'nor',
370 'nr': 'nbl',
371 'nv': 'nav',
372 'ny': 'nya',
373 'oc': 'oci',
374 'oj': 'oji',
375 'om': 'orm',
376 'or': 'ori',
377 'os': 'oss',
378 'pa': 'pan',
379 'pi': 'pli',
380 'pl': 'pol',
381 'ps': 'pus',
382 'pt': 'por',
383 'qu': 'que',
384 'rm': 'roh',
385 'rn': 'run',
386 'ro': 'ron',
387 'ru': 'rus',
388 'rw': 'kin',
389 'sa': 'san',
390 'sc': 'srd',
391 'sd': 'snd',
392 'se': 'sme',
393 'sg': 'sag',
394 'si': 'sin',
395 'sk': 'slk',
396 'sl': 'slv',
397 'sm': 'smo',
398 'sn': 'sna',
399 'so': 'som',
400 'sq': 'sqi',
401 'sr': 'srp',
402 'ss': 'ssw',
403 'st': 'sot',
404 'su': 'sun',
405 'sv': 'swe',
406 'sw': 'swa',
407 'ta': 'tam',
408 'te': 'tel',
409 'tg': 'tgk',
410 'th': 'tha',
411 'ti': 'tir',
412 'tk': 'tuk',
413 'tl': 'tgl',
414 'tn': 'tsn',
415 'to': 'ton',
416 'tr': 'tur',
417 'ts': 'tso',
418 'tt': 'tat',
419 'tw': 'twi',
420 'ty': 'tah',
421 'ug': 'uig',
422 'uk': 'ukr',
423 'ur': 'urd',
424 'uz': 'uzb',
425 've': 'ven',
426 'vi': 'vie',
427 'vo': 'vol',
428 'wa': 'wln',
429 'wo': 'wol',
430 'xh': 'xho',
431 'yi': 'yid',
432 'yo': 'yor',
433 'za': 'zha',
434 'zh': 'zho',
435 'zu': 'zul',
436 }
437
438 def __init__(self, downloader=None, subtitlesformat='srt'):
439 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
440 self._subformat = subtitlesformat
441
442 @classmethod
443 def _conver_lang_code(cls, code):
444 """Convert language code from ISO 639-1 to ISO 639-2/T"""
445 return cls._lang_map.get(code[:2])
446
447 def run(self, information):
448 if information['ext'] != u'mp4':
449 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
450 return True, information
451 if not information.get('subtitles'):
452 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
453 return True, information
454
455 sub_langs = [key for key in information['subtitles']]
456 filename = information['filepath']
457 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
458
459 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
460 for (i, lang) in enumerate(sub_langs):
461 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
462 lang_code = self._conver_lang_code(lang)
463 if lang_code is not None:
464 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
465 opts.extend(['-f', 'mp4'])
466
467 temp_filename = filename + u'.temp'
468 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
469 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
470 os.remove(encodeFilename(filename))
471 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
472
473 return True, information
474
475
476 class FFmpegMetadataPP(FFmpegPostProcessor):
477 def run(self, info):
478 metadata = {}
479 if info.get('title') is not None:
480 metadata['title'] = info['title']
481 if info.get('upload_date') is not None:
482 metadata['date'] = info['upload_date']
483 if info.get('uploader') is not None:
484 metadata['artist'] = info['uploader']
485 elif info.get('uploader_id') is not None:
486 metadata['artist'] = info['uploader_id']
487
488 if not metadata:
489 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
490 return True, info
491
492 filename = info['filepath']
493 ext = os.path.splitext(filename)[1][1:]
494 temp_filename = filename + u'.temp'
495
496 options = ['-c', 'copy']
497 for (name, value) in metadata.items():
498 options.extend(['-metadata', '%s="%s"' % (name, value)])
499 options.extend(['-f', ext])
500
501 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
502 self.run_ffmpeg(filename, temp_filename, options)
503 os.remove(encodeFilename(filename))
504 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
505 return True, info