]> jfr.im git - yt-dlp.git/blame - youtube_dl/PostProcessor.py
Add a PostProcessor for adding metadata to the file (closes #1570)
[yt-dlp.git] / youtube_dl / PostProcessor.py
CommitLineData
d77c3dfd
FV
1import os
2import subprocess
3import sys
4import time
bc4f2917 5import datetime
d77c3dfd 6
9e8056d5 7from .utils import *
d77c3dfd
FV
8
9
10class PostProcessor(object):
59ae15a5 11 """Post Processor class.
d77c3dfd 12
59ae15a5
PH
13 PostProcessor objects can be added to downloaders with their
14 add_post_processor() method. When the downloader has finished a
15 successful download, it will take its internal chain of PostProcessors
16 and start calling the run() method on each one of them, first with
17 an initial argument and then with the returned value of the previous
18 PostProcessor.
d77c3dfd 19
59ae15a5
PH
20 The chain will be stopped if one of them ever returns None or the end
21 of the chain is reached.
d77c3dfd 22
59ae15a5
PH
23 PostProcessor objects follow a "mutual registration" process similar
24 to InfoExtractor objects.
25 """
d77c3dfd 26
59ae15a5 27 _downloader = None
d77c3dfd 28
59ae15a5
PH
29 def __init__(self, downloader=None):
30 self._downloader = downloader
d77c3dfd 31
59ae15a5
PH
32 def set_downloader(self, downloader):
33 """Sets the downloader for this PP."""
34 self._downloader = downloader
d77c3dfd 35
59ae15a5
PH
36 def run(self, information):
37 """Run the PostProcessor.
d77c3dfd 38
59ae15a5
PH
39 The "information" argument is a dictionary like the ones
40 composed by InfoExtractors. The only difference is that this
41 one has an extra field called "filepath" that points to the
42 downloaded file.
d77c3dfd 43
7851b379
PH
44 This method returns a tuple, the first element of which describes
45 whether the original file should be kept (i.e. not deleted - None for
46 no preference), and the second of which is the updated information.
d77c3dfd 47
59ae15a5 48 In addition, this method may raise a PostProcessingError
7851b379 49 exception if post processing fails.
59ae15a5 50 """
7851b379 51 return None, information # by default, keep file and do nothing
d77c3dfd 52
7851b379
PH
53class FFmpegPostProcessorError(PostProcessingError):
54 pass
67d0c25e 55
7851b379
PH
56class AudioConversionError(PostProcessingError):
57 pass
d77c3dfd 58
67d0c25e
JMF
59class FFmpegPostProcessor(PostProcessor):
60 def __init__(self,downloader=None):
59ae15a5 61 PostProcessor.__init__(self, downloader)
59ae15a5
PH
62 self._exes = self.detect_executables()
63
64 @staticmethod
65 def detect_executables():
66 def executable(exe):
67 try:
68 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
69 except OSError:
70 return False
71 return exe
72 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
73 return dict((program, executable(program)) for program in programs)
74
d4051a8e 75 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
67d0c25e 76 if not self._exes['ffmpeg'] and not self._exes['avconv']:
7851b379 77 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
d4051a8e
JMF
78
79 files_cmd = []
80 for path in input_paths:
81 files_cmd.extend(['-i', encodeFilename(path)])
82 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
67d0c25e
JMF
83 + opts +
84 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
d4051a8e 85
67d0c25e
JMF
86 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
87 stdout,stderr = p.communicate()
88 if p.returncode != 0:
fb2f8336 89 stderr = stderr.decode('utf-8', 'replace')
67d0c25e 90 msg = stderr.strip().split('\n')[-1]
fb2f8336 91 raise FFmpegPostProcessorError(msg)
67d0c25e 92
d4051a8e
JMF
93 def run_ffmpeg(self, path, out_path, opts):
94 self.run_ffmpeg_multiple_files([path], out_path, opts)
95
67d0c25e
JMF
96 def _ffmpeg_filename_argument(self, fn):
97 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
98 if fn.startswith(u'-'):
99 return u'./' + fn
100 return fn
101
102class FFmpegExtractAudioPP(FFmpegPostProcessor):
7851b379 103 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
67d0c25e
JMF
104 FFmpegPostProcessor.__init__(self, downloader)
105 if preferredcodec is None:
106 preferredcodec = 'best'
107 self._preferredcodec = preferredcodec
108 self._preferredquality = preferredquality
67d0c25e
JMF
109 self._nopostoverwrites = nopostoverwrites
110
59ae15a5 111 def get_audio_codec(self, path):
4aa16a50
JMF
112 if not self._exes['ffprobe'] and not self._exes['avprobe']:
113 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
59ae15a5 114 try:
712e86b9 115 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
5910e210 116 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
59ae15a5
PH
117 output = handle.communicate()[0]
118 if handle.wait() != 0:
119 return None
120 except (IOError, OSError):
121 return None
122 audio_codec = None
5910e210 123 for line in output.decode('ascii', 'ignore').split('\n'):
59ae15a5
PH
124 if line.startswith('codec_name='):
125 audio_codec = line.split('=')[1].strip()
126 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
127 return audio_codec
128 return None
129
130 def run_ffmpeg(self, path, out_path, codec, more_opts):
131 if not self._exes['ffmpeg'] and not self._exes['avconv']:
0c007432 132 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
59ae15a5
PH
133 if codec is None:
134 acodec_opts = []
135 else:
136 acodec_opts = ['-acodec', codec]
67d0c25e
JMF
137 opts = ['-vn'] + acodec_opts + more_opts
138 try:
139 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
140 except FFmpegPostProcessorError as err:
8ae97d76 141 raise AudioConversionError(err.msg)
59ae15a5
PH
142
143 def run(self, information):
144 path = information['filepath']
145
146 filecodec = self.get_audio_codec(path)
147 if filecodec is None:
7851b379 148 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
59ae15a5
PH
149
150 more_opts = []
151 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
0e336841 152 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
59ae15a5
PH
153 # Lossless, but in another container
154 acodec = 'copy'
0e336841 155 extension = 'm4a'
59ae15a5 156 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
510e6f6d 157 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
59ae15a5
PH
158 # Lossless if possible
159 acodec = 'copy'
160 extension = filecodec
161 if filecodec == 'aac':
162 more_opts = ['-f', 'adts']
163 if filecodec == 'vorbis':
164 extension = 'ogg'
165 else:
166 # MP3 otherwise.
167 acodec = 'libmp3lame'
168 extension = 'mp3'
169 more_opts = []
170 if self._preferredquality is not None:
171 if int(self._preferredquality) < 10:
172 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
173 else:
174 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
175 else:
176 # We convert the audio (lossy)
510e6f6d 177 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
59ae15a5
PH
178 extension = self._preferredcodec
179 more_opts = []
180 if self._preferredquality is not None:
181 if int(self._preferredquality) < 10:
182 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
183 else:
184 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
185 if self._preferredcodec == 'aac':
186 more_opts += ['-f', 'adts']
187 if self._preferredcodec == 'm4a':
188 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
189 if self._preferredcodec == 'vorbis':
190 extension = 'ogg'
191 if self._preferredcodec == 'wav':
192 extension = 'wav'
193 more_opts += ['-f', 'wav']
194
195 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
196 new_path = prefix + sep + extension
e74c504f
JF
197
198 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
199 if new_path == path:
200 self._nopostoverwrites = True
201
59ae15a5 202 try:
b7298b6e
BPG
203 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
204 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
205 else:
206 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
207 self.run_ffmpeg(path, new_path, acodec, more_opts)
59ae15a5
PH
208 except:
209 etype,e,tb = sys.exc_info()
210 if isinstance(e, AudioConversionError):
8ae97d76 211 msg = u'audio conversion failed: ' + e.msg
59ae15a5 212 else:
7851b379
PH
213 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
214 raise PostProcessingError(msg)
59ae15a5
PH
215
216 # Try to update the date time for extracted audio file.
217 if information.get('filetime') is not None:
218 try:
219 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
220 except:
bbcbf4d4 221 self._downloader.report_warning(u'Cannot update utime of audio file')
59ae15a5 222
59ae15a5 223 information['filepath'] = new_path
e74c504f 224 return self._nopostoverwrites,information
712e86b9 225
67d0c25e
JMF
226class FFmpegVideoConvertor(FFmpegPostProcessor):
227 def __init__(self, downloader=None,preferedformat=None):
7851b379 228 super(FFmpegVideoConvertor, self).__init__(downloader)
67d0c25e 229 self._preferedformat=preferedformat
712e86b9 230
67d0c25e
JMF
231 def run(self, information):
232 path = information['filepath']
233 prefix, sep, ext = path.rpartition(u'.')
234 outpath = prefix + sep + self._preferedformat
7851b379
PH
235 if information['ext'] == self._preferedformat:
236 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
237 return True,information
238 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
67d0c25e
JMF
239 self.run_ffmpeg(path, outpath, [])
240 information['filepath'] = outpath
241 information['format'] = self._preferedformat
7851b379
PH
242 information['ext'] = self._preferedformat
243 return False,information
d4051a8e
JMF
244
245
246class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
247 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
248 _lang_map = {
249 'aa': 'aar',
250 'ab': 'abk',
251 'ae': 'ave',
252 'af': 'afr',
253 'ak': 'aka',
254 'am': 'amh',
255 'an': 'arg',
256 'ar': 'ara',
257 'as': 'asm',
258 'av': 'ava',
259 'ay': 'aym',
260 'az': 'aze',
261 'ba': 'bak',
262 'be': 'bel',
263 'bg': 'bul',
264 'bh': 'bih',
265 'bi': 'bis',
266 'bm': 'bam',
267 'bn': 'ben',
268 'bo': 'bod',
269 'br': 'bre',
270 'bs': 'bos',
271 'ca': 'cat',
272 'ce': 'che',
273 'ch': 'cha',
274 'co': 'cos',
275 'cr': 'cre',
276 'cs': 'ces',
277 'cu': 'chu',
278 'cv': 'chv',
279 'cy': 'cym',
280 'da': 'dan',
281 'de': 'deu',
282 'dv': 'div',
283 'dz': 'dzo',
284 'ee': 'ewe',
285 'el': 'ell',
286 'en': 'eng',
287 'eo': 'epo',
288 'es': 'spa',
289 'et': 'est',
290 'eu': 'eus',
291 'fa': 'fas',
292 'ff': 'ful',
293 'fi': 'fin',
294 'fj': 'fij',
295 'fo': 'fao',
296 'fr': 'fra',
297 'fy': 'fry',
298 'ga': 'gle',
299 'gd': 'gla',
300 'gl': 'glg',
301 'gn': 'grn',
302 'gu': 'guj',
303 'gv': 'glv',
304 'ha': 'hau',
305 'he': 'heb',
306 'hi': 'hin',
307 'ho': 'hmo',
308 'hr': 'hrv',
309 'ht': 'hat',
310 'hu': 'hun',
311 'hy': 'hye',
312 'hz': 'her',
313 'ia': 'ina',
314 'id': 'ind',
315 'ie': 'ile',
316 'ig': 'ibo',
317 'ii': 'iii',
318 'ik': 'ipk',
319 'io': 'ido',
320 'is': 'isl',
321 'it': 'ita',
322 'iu': 'iku',
323 'ja': 'jpn',
324 'jv': 'jav',
325 'ka': 'kat',
326 'kg': 'kon',
327 'ki': 'kik',
328 'kj': 'kua',
329 'kk': 'kaz',
330 'kl': 'kal',
331 'km': 'khm',
332 'kn': 'kan',
333 'ko': 'kor',
334 'kr': 'kau',
335 'ks': 'kas',
336 'ku': 'kur',
337 'kv': 'kom',
338 'kw': 'cor',
339 'ky': 'kir',
340 'la': 'lat',
341 'lb': 'ltz',
342 'lg': 'lug',
343 'li': 'lim',
344 'ln': 'lin',
345 'lo': 'lao',
346 'lt': 'lit',
347 'lu': 'lub',
348 'lv': 'lav',
349 'mg': 'mlg',
350 'mh': 'mah',
351 'mi': 'mri',
352 'mk': 'mkd',
353 'ml': 'mal',
354 'mn': 'mon',
355 'mr': 'mar',
356 'ms': 'msa',
357 'mt': 'mlt',
358 'my': 'mya',
359 'na': 'nau',
360 'nb': 'nob',
361 'nd': 'nde',
362 'ne': 'nep',
363 'ng': 'ndo',
364 'nl': 'nld',
365 'nn': 'nno',
366 'no': 'nor',
367 'nr': 'nbl',
368 'nv': 'nav',
369 'ny': 'nya',
370 'oc': 'oci',
371 'oj': 'oji',
372 'om': 'orm',
373 'or': 'ori',
374 'os': 'oss',
375 'pa': 'pan',
376 'pi': 'pli',
377 'pl': 'pol',
378 'ps': 'pus',
379 'pt': 'por',
380 'qu': 'que',
381 'rm': 'roh',
382 'rn': 'run',
383 'ro': 'ron',
384 'ru': 'rus',
385 'rw': 'kin',
386 'sa': 'san',
387 'sc': 'srd',
388 'sd': 'snd',
389 'se': 'sme',
390 'sg': 'sag',
391 'si': 'sin',
392 'sk': 'slk',
393 'sl': 'slv',
394 'sm': 'smo',
395 'sn': 'sna',
396 'so': 'som',
397 'sq': 'sqi',
398 'sr': 'srp',
399 'ss': 'ssw',
400 'st': 'sot',
401 'su': 'sun',
402 'sv': 'swe',
403 'sw': 'swa',
404 'ta': 'tam',
405 'te': 'tel',
406 'tg': 'tgk',
407 'th': 'tha',
408 'ti': 'tir',
409 'tk': 'tuk',
410 'tl': 'tgl',
411 'tn': 'tsn',
412 'to': 'ton',
413 'tr': 'tur',
414 'ts': 'tso',
415 'tt': 'tat',
416 'tw': 'twi',
417 'ty': 'tah',
418 'ug': 'uig',
419 'uk': 'ukr',
420 'ur': 'urd',
421 'uz': 'uzb',
422 've': 'ven',
423 'vi': 'vie',
424 'vo': 'vol',
425 'wa': 'wln',
426 'wo': 'wol',
427 'xh': 'xho',
428 'yi': 'yid',
429 'yo': 'yor',
430 'za': 'zha',
431 'zh': 'zho',
432 'zu': 'zul',
433 }
434
435 def __init__(self, downloader=None, subtitlesformat='srt'):
436 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
437 self._subformat = subtitlesformat
438
439 @classmethod
440 def _conver_lang_code(cls, code):
441 """Convert language code from ISO 639-1 to ISO 639-2/T"""
442 return cls._lang_map.get(code[:2])
443
444 def run(self, information):
445 if information['ext'] != u'mp4':
446 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
447 return True, information
74bab3f0
JMF
448 if not information.get('subtitles'):
449 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
450 return True, information
d4051a8e 451
74bab3f0 452 sub_langs = [key for key in information['subtitles']]
d4051a8e
JMF
453 filename = information['filepath']
454 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
455
456 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
457 for (i, lang) in enumerate(sub_langs):
458 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
459 lang_code = self._conver_lang_code(lang)
460 if lang_code is not None:
461 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
462 opts.extend(['-f', 'mp4'])
463
464 temp_filename = filename + u'.temp'
9af73dc4 465 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
d4051a8e
JMF
466 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
467 os.remove(encodeFilename(filename))
468 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
469
470 return True, information
bc4f2917
JMF
471
472
473class FFmpegMetadataPP(FFmpegPostProcessor):
474 def run(self, info):
475 metadata = {}
476 if info.get('title') is not None:
477 metadata['title'] = info['title']
478 if info.get('upload_date') is not None:
479 metadata['date'] = info['upload_date']
480 if info.get('uploader') is not None:
481 metadata['artist'] = info['uploader']
482 elif info.get('uploader_id') is not None:
483 metadata['artist'] = info['uploader_id']
484
485 if not metadata:
486 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
487 return True, info
488
489 filename = info['filepath']
490 ext = os.path.splitext(filename)[1][1:]
491 temp_filename = filename + u'.temp'
492
493 options = ['-c', 'copy']
494 for (name, value) in metadata.items():
495 options.extend(['-metadata', '%s="%s"' % (name, value)])
496 options.extend(['-f', ext])
497
498 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
499 self.run_ffmpeg(filename, temp_filename, options)
500 os.remove(encodeFilename(filename))
501 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
502 return True, info