]> jfr.im git - yt-dlp.git/blob - youtube_dl/PostProcessor.py
Added '--xattrs' option which writes metadata to the file's extended attributes using...
[yt-dlp.git] / youtube_dl / PostProcessor.py
1 import os
2 import subprocess
3 import sys
4 import time
5
6
7 from .utils import (
8 compat_subprocess_get_DEVNULL,
9 encodeFilename,
10 PostProcessingError,
11 shell_quote,
12 subtitles_filename,
13 )
14
15
16 class PostProcessor(object):
17 """Post Processor class.
18
19 PostProcessor objects can be added to downloaders with their
20 add_post_processor() method. When the downloader has finished a
21 successful download, it will take its internal chain of PostProcessors
22 and start calling the run() method on each one of them, first with
23 an initial argument and then with the returned value of the previous
24 PostProcessor.
25
26 The chain will be stopped if one of them ever returns None or the end
27 of the chain is reached.
28
29 PostProcessor objects follow a "mutual registration" process similar
30 to InfoExtractor objects.
31 """
32
33 _downloader = None
34
35 def __init__(self, downloader=None):
36 self._downloader = downloader
37
38 def set_downloader(self, downloader):
39 """Sets the downloader for this PP."""
40 self._downloader = downloader
41
42 def run(self, information):
43 """Run the PostProcessor.
44
45 The "information" argument is a dictionary like the ones
46 composed by InfoExtractors. The only difference is that this
47 one has an extra field called "filepath" that points to the
48 downloaded file.
49
50 This method returns a tuple, the first element of which describes
51 whether the original file should be kept (i.e. not deleted - None for
52 no preference), and the second of which is the updated information.
53
54 In addition, this method may raise a PostProcessingError
55 exception if post processing fails.
56 """
57 return None, information # by default, keep file and do nothing
58
59 class FFmpegPostProcessorError(PostProcessingError):
60 pass
61
62 class AudioConversionError(PostProcessingError):
63 pass
64
65
66 class FFmpegPostProcessor(PostProcessor):
67 def __init__(self,downloader=None):
68 PostProcessor.__init__(self, downloader)
69 self._exes = self.detect_executables()
70
71 @staticmethod
72 def detect_executables():
73 def executable(exe):
74 try:
75 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
76 except OSError:
77 return False
78 return exe
79 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
80 return dict((program, executable(program)) for program in programs)
81
82 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
83 if not self._exes['ffmpeg'] and not self._exes['avconv']:
84 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
85
86 files_cmd = []
87 for path in input_paths:
88 files_cmd.extend(['-i', encodeFilename(path)])
89 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
90 + opts +
91 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
92
93 if self._downloader.params.get('verbose', False):
94 self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
95 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
96 stdout,stderr = p.communicate()
97 if p.returncode != 0:
98 stderr = stderr.decode('utf-8', 'replace')
99 msg = stderr.strip().split('\n')[-1]
100 raise FFmpegPostProcessorError(msg)
101
102 def run_ffmpeg(self, path, out_path, opts):
103 self.run_ffmpeg_multiple_files([path], out_path, opts)
104
105 def _ffmpeg_filename_argument(self, fn):
106 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
107 if fn.startswith(u'-'):
108 return u'./' + fn
109 return fn
110
111
112 class FFmpegExtractAudioPP(FFmpegPostProcessor):
113 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
114 FFmpegPostProcessor.__init__(self, downloader)
115 if preferredcodec is None:
116 preferredcodec = 'best'
117 self._preferredcodec = preferredcodec
118 self._preferredquality = preferredquality
119 self._nopostoverwrites = nopostoverwrites
120
121 def get_audio_codec(self, path):
122 if not self._exes['ffprobe'] and not self._exes['avprobe']:
123 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
124 try:
125 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
126 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
127 output = handle.communicate()[0]
128 if handle.wait() != 0:
129 return None
130 except (IOError, OSError):
131 return None
132 audio_codec = None
133 for line in output.decode('ascii', 'ignore').split('\n'):
134 if line.startswith('codec_name='):
135 audio_codec = line.split('=')[1].strip()
136 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
137 return audio_codec
138 return None
139
140 def run_ffmpeg(self, path, out_path, codec, more_opts):
141 if not self._exes['ffmpeg'] and not self._exes['avconv']:
142 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
143 if codec is None:
144 acodec_opts = []
145 else:
146 acodec_opts = ['-acodec', codec]
147 opts = ['-vn'] + acodec_opts + more_opts
148 try:
149 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
150 except FFmpegPostProcessorError as err:
151 raise AudioConversionError(err.msg)
152
153 def run(self, information):
154 path = information['filepath']
155
156 filecodec = self.get_audio_codec(path)
157 if filecodec is None:
158 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
159
160 more_opts = []
161 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
162 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
163 # Lossless, but in another container
164 acodec = 'copy'
165 extension = 'm4a'
166 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
167 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
168 # Lossless if possible
169 acodec = 'copy'
170 extension = filecodec
171 if filecodec == 'aac':
172 more_opts = ['-f', 'adts']
173 if filecodec == 'vorbis':
174 extension = 'ogg'
175 else:
176 # MP3 otherwise.
177 acodec = 'libmp3lame'
178 extension = 'mp3'
179 more_opts = []
180 if self._preferredquality is not None:
181 if int(self._preferredquality) < 10:
182 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
183 else:
184 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
185 else:
186 # We convert the audio (lossy)
187 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
188 extension = self._preferredcodec
189 more_opts = []
190 if self._preferredquality is not None:
191 # The opus codec doesn't support the -aq option
192 if int(self._preferredquality) < 10 and extension != 'opus':
193 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
194 else:
195 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
196 if self._preferredcodec == 'aac':
197 more_opts += ['-f', 'adts']
198 if self._preferredcodec == 'm4a':
199 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
200 if self._preferredcodec == 'vorbis':
201 extension = 'ogg'
202 if self._preferredcodec == 'wav':
203 extension = 'wav'
204 more_opts += ['-f', 'wav']
205
206 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
207 new_path = prefix + sep + extension
208
209 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
210 if new_path == path:
211 self._nopostoverwrites = True
212
213 try:
214 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
215 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
216 else:
217 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
218 self.run_ffmpeg(path, new_path, acodec, more_opts)
219 except:
220 etype,e,tb = sys.exc_info()
221 if isinstance(e, AudioConversionError):
222 msg = u'audio conversion failed: ' + e.msg
223 else:
224 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
225 raise PostProcessingError(msg)
226
227 # Try to update the date time for extracted audio file.
228 if information.get('filetime') is not None:
229 try:
230 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
231 except:
232 self._downloader.report_warning(u'Cannot update utime of audio file')
233
234 information['filepath'] = new_path
235 return self._nopostoverwrites,information
236
237
238 class FFmpegVideoConvertor(FFmpegPostProcessor):
239 def __init__(self, downloader=None,preferedformat=None):
240 super(FFmpegVideoConvertor, self).__init__(downloader)
241 self._preferedformat=preferedformat
242
243 def run(self, information):
244 path = information['filepath']
245 prefix, sep, ext = path.rpartition(u'.')
246 outpath = prefix + sep + self._preferedformat
247 if information['ext'] == self._preferedformat:
248 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
249 return True,information
250 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
251 self.run_ffmpeg(path, outpath, [])
252 information['filepath'] = outpath
253 information['format'] = self._preferedformat
254 information['ext'] = self._preferedformat
255 return False,information
256
257
258 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
259 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
260 _lang_map = {
261 'aa': 'aar',
262 'ab': 'abk',
263 'ae': 'ave',
264 'af': 'afr',
265 'ak': 'aka',
266 'am': 'amh',
267 'an': 'arg',
268 'ar': 'ara',
269 'as': 'asm',
270 'av': 'ava',
271 'ay': 'aym',
272 'az': 'aze',
273 'ba': 'bak',
274 'be': 'bel',
275 'bg': 'bul',
276 'bh': 'bih',
277 'bi': 'bis',
278 'bm': 'bam',
279 'bn': 'ben',
280 'bo': 'bod',
281 'br': 'bre',
282 'bs': 'bos',
283 'ca': 'cat',
284 'ce': 'che',
285 'ch': 'cha',
286 'co': 'cos',
287 'cr': 'cre',
288 'cs': 'ces',
289 'cu': 'chu',
290 'cv': 'chv',
291 'cy': 'cym',
292 'da': 'dan',
293 'de': 'deu',
294 'dv': 'div',
295 'dz': 'dzo',
296 'ee': 'ewe',
297 'el': 'ell',
298 'en': 'eng',
299 'eo': 'epo',
300 'es': 'spa',
301 'et': 'est',
302 'eu': 'eus',
303 'fa': 'fas',
304 'ff': 'ful',
305 'fi': 'fin',
306 'fj': 'fij',
307 'fo': 'fao',
308 'fr': 'fra',
309 'fy': 'fry',
310 'ga': 'gle',
311 'gd': 'gla',
312 'gl': 'glg',
313 'gn': 'grn',
314 'gu': 'guj',
315 'gv': 'glv',
316 'ha': 'hau',
317 'he': 'heb',
318 'hi': 'hin',
319 'ho': 'hmo',
320 'hr': 'hrv',
321 'ht': 'hat',
322 'hu': 'hun',
323 'hy': 'hye',
324 'hz': 'her',
325 'ia': 'ina',
326 'id': 'ind',
327 'ie': 'ile',
328 'ig': 'ibo',
329 'ii': 'iii',
330 'ik': 'ipk',
331 'io': 'ido',
332 'is': 'isl',
333 'it': 'ita',
334 'iu': 'iku',
335 'ja': 'jpn',
336 'jv': 'jav',
337 'ka': 'kat',
338 'kg': 'kon',
339 'ki': 'kik',
340 'kj': 'kua',
341 'kk': 'kaz',
342 'kl': 'kal',
343 'km': 'khm',
344 'kn': 'kan',
345 'ko': 'kor',
346 'kr': 'kau',
347 'ks': 'kas',
348 'ku': 'kur',
349 'kv': 'kom',
350 'kw': 'cor',
351 'ky': 'kir',
352 'la': 'lat',
353 'lb': 'ltz',
354 'lg': 'lug',
355 'li': 'lim',
356 'ln': 'lin',
357 'lo': 'lao',
358 'lt': 'lit',
359 'lu': 'lub',
360 'lv': 'lav',
361 'mg': 'mlg',
362 'mh': 'mah',
363 'mi': 'mri',
364 'mk': 'mkd',
365 'ml': 'mal',
366 'mn': 'mon',
367 'mr': 'mar',
368 'ms': 'msa',
369 'mt': 'mlt',
370 'my': 'mya',
371 'na': 'nau',
372 'nb': 'nob',
373 'nd': 'nde',
374 'ne': 'nep',
375 'ng': 'ndo',
376 'nl': 'nld',
377 'nn': 'nno',
378 'no': 'nor',
379 'nr': 'nbl',
380 'nv': 'nav',
381 'ny': 'nya',
382 'oc': 'oci',
383 'oj': 'oji',
384 'om': 'orm',
385 'or': 'ori',
386 'os': 'oss',
387 'pa': 'pan',
388 'pi': 'pli',
389 'pl': 'pol',
390 'ps': 'pus',
391 'pt': 'por',
392 'qu': 'que',
393 'rm': 'roh',
394 'rn': 'run',
395 'ro': 'ron',
396 'ru': 'rus',
397 'rw': 'kin',
398 'sa': 'san',
399 'sc': 'srd',
400 'sd': 'snd',
401 'se': 'sme',
402 'sg': 'sag',
403 'si': 'sin',
404 'sk': 'slk',
405 'sl': 'slv',
406 'sm': 'smo',
407 'sn': 'sna',
408 'so': 'som',
409 'sq': 'sqi',
410 'sr': 'srp',
411 'ss': 'ssw',
412 'st': 'sot',
413 'su': 'sun',
414 'sv': 'swe',
415 'sw': 'swa',
416 'ta': 'tam',
417 'te': 'tel',
418 'tg': 'tgk',
419 'th': 'tha',
420 'ti': 'tir',
421 'tk': 'tuk',
422 'tl': 'tgl',
423 'tn': 'tsn',
424 'to': 'ton',
425 'tr': 'tur',
426 'ts': 'tso',
427 'tt': 'tat',
428 'tw': 'twi',
429 'ty': 'tah',
430 'ug': 'uig',
431 'uk': 'ukr',
432 'ur': 'urd',
433 'uz': 'uzb',
434 've': 'ven',
435 'vi': 'vie',
436 'vo': 'vol',
437 'wa': 'wln',
438 'wo': 'wol',
439 'xh': 'xho',
440 'yi': 'yid',
441 'yo': 'yor',
442 'za': 'zha',
443 'zh': 'zho',
444 'zu': 'zul',
445 }
446
447 def __init__(self, downloader=None, subtitlesformat='srt'):
448 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
449 self._subformat = subtitlesformat
450
451 @classmethod
452 def _conver_lang_code(cls, code):
453 """Convert language code from ISO 639-1 to ISO 639-2/T"""
454 return cls._lang_map.get(code[:2])
455
456 def run(self, information):
457 if information['ext'] != u'mp4':
458 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
459 return True, information
460 if not information.get('subtitles'):
461 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
462 return True, information
463
464 sub_langs = [key for key in information['subtitles']]
465 filename = information['filepath']
466 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
467
468 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
469 for (i, lang) in enumerate(sub_langs):
470 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
471 lang_code = self._conver_lang_code(lang)
472 if lang_code is not None:
473 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
474 opts.extend(['-f', 'mp4'])
475
476 temp_filename = filename + u'.temp'
477 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
478 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
479 os.remove(encodeFilename(filename))
480 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
481
482 return True, information
483
484
485 class FFmpegMetadataPP(FFmpegPostProcessor):
486 def run(self, info):
487 metadata = {}
488 if info.get('title') is not None:
489 metadata['title'] = info['title']
490 if info.get('upload_date') is not None:
491 metadata['date'] = info['upload_date']
492 if info.get('uploader') is not None:
493 metadata['artist'] = info['uploader']
494 elif info.get('uploader_id') is not None:
495 metadata['artist'] = info['uploader_id']
496
497 if not metadata:
498 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
499 return True, info
500
501 filename = info['filepath']
502 ext = os.path.splitext(filename)[1][1:]
503 temp_filename = filename + u'.temp'
504
505 options = ['-c', 'copy']
506 for (name, value) in metadata.items():
507 options.extend(['-metadata', '%s=%s' % (name, value)])
508 options.extend(['-f', ext])
509
510 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
511 self.run_ffmpeg(filename, temp_filename, options)
512 os.remove(encodeFilename(filename))
513 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
514 return True, info
515
516
517 class XAttrMetadataPP(PostProcessor):
518
519 #
520 # More info about extended attributes for media:
521 # http://freedesktop.org/wiki/CommonExtendedAttributes/
522 # http://www.freedesktop.org/wiki/PhreedomDraft/
523 # http://dublincore.org/documents/usageguide/elements.shtml
524 #
525 # TODO:
526 # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
527 # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
528 #
529
530 def run(self, info):
531 """ Set extended attributes on downloaded file (if xattr support is found). """
532
533 from .utils import hyphenate_date
534
535 # This mess below finds the best xattr tool for the job and creates a
536 # "write_xattr" function.
537 try:
538 # try the pyxattr module...
539 import xattr
540 def write_xattr(path, key, value):
541 return xattr.setxattr(path, key, value)
542
543 except ImportError:
544
545 if os.name == 'posix':
546 def which(bin):
547 for dir in os.environ["PATH"].split(":"):
548 path = os.path.join(dir, bin)
549 if os.path.exists(path):
550 return path
551
552 user_has_setfattr = which("setfattr")
553 user_has_xattr = which("xattr")
554
555 if user_has_setfattr or user_has_xattr:
556
557 def write_xattr(path, key, value):
558 import errno
559 potential_errors = {
560 # setfattr: /tmp/blah: Operation not supported
561 "Operation not supported": errno.EOPNOTSUPP,
562 # setfattr: ~/blah: No such file or directory
563 # xattr: No such file: ~/blah
564 "No such file": errno.ENOENT,
565 }
566
567 if user_has_setfattr:
568 cmd = ['setfattr', '-n', key, '-v', value, path]
569 elif user_has_xattr:
570 cmd = ['xattr', '-w', key, value, path]
571
572 try:
573 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
574 except subprocess.CalledProcessError as e:
575 errorstr = e.output.strip().decode()
576 for potential_errorstr, potential_errno in potential_errors.items():
577 if errorstr.find(potential_errorstr) > -1:
578 e = OSError(potential_errno, potential_errorstr)
579 e.__cause__ = None
580 raise e
581 raise # Reraise unhandled error
582
583 else:
584 # On Unix, and can't find pyxattr, setfattr, or xattr.
585 if sys.platform.startswith('linux'):
586 self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).")
587 elif sys.platform == 'darwin':
588 self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.")
589 else:
590 # Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
591 def write_xattr(path, key, value):
592 assert(key.find(":") < 0)
593 assert(path.find(":") < 0)
594 assert(os.path.exists(path))
595
596 f = open(path+":"+key, "w")
597 f.write(value)
598 f.close()
599
600 # Write the metadata to the file's xattrs
601 self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs...')
602
603 filename = info['filepath']
604
605 try:
606 xattr_mapping = {
607 'user.xdg.referrer.url': 'webpage_url',
608 # 'user.xdg.comment': 'description',
609 'user.dublincore.title': 'title',
610 'user.dublincore.date': 'upload_date',
611 'user.dublincore.description': 'description',
612 'user.dublincore.contributor': 'uploader',
613 'user.dublincore.format': 'format',
614 }
615
616 for xattrname, infoname in xattr_mapping.items():
617
618 value = info.get(infoname)
619
620 if value:
621 if infoname == "upload_date":
622 value = hyphenate_date(value)
623
624 write_xattr(filename, xattrname, value)
625
626 return True, info
627
628 except OSError:
629 self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
630 return False, info
631