8 compat_subprocess_get_DEVNULL
,
16 class PostProcessor(object):
17 """Post Processor class.
19 PostProcessor objects can be added to downloaders with their
20 add_post_processor() method. When the downloader has finished a
21 successful download, it will take its internal chain of PostProcessors
22 and start calling the run() method on each one of them, first with
23 an initial argument and then with the returned value of the previous
26 The chain will be stopped if one of them ever returns None or the end
27 of the chain is reached.
29 PostProcessor objects follow a "mutual registration" process similar
30 to InfoExtractor objects.
35 def __init__(self
, downloader
=None):
36 self
._downloader
= downloader
38 def set_downloader(self
, downloader
):
39 """Sets the downloader for this PP."""
40 self
._downloader
= downloader
42 def run(self
, information
):
43 """Run the PostProcessor.
45 The "information" argument is a dictionary like the ones
46 composed by InfoExtractors. The only difference is that this
47 one has an extra field called "filepath" that points to the
50 This method returns a tuple, the first element of which describes
51 whether the original file should be kept (i.e. not deleted - None for
52 no preference), and the second of which is the updated information.
54 In addition, this method may raise a PostProcessingError
55 exception if post processing fails.
57 return None, information
# by default, keep file and do nothing
59 class FFmpegPostProcessorError(PostProcessingError
):
62 class AudioConversionError(PostProcessingError
):
66 class FFmpegPostProcessor(PostProcessor
):
67 def __init__(self
,downloader
=None):
68 PostProcessor
.__init
__(self
, downloader
)
69 self
._exes
= self
.detect_executables()
72 def detect_executables():
75 subprocess
.Popen([exe
, '-version'], stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate()
79 programs
= ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
80 return dict((program
, executable(program
)) for program
in programs
)
82 def run_ffmpeg_multiple_files(self
, input_paths
, out_path
, opts
):
83 if not self
._exes
['ffmpeg'] and not self
._exes
['avconv']:
84 raise FFmpegPostProcessorError(u
'ffmpeg or avconv not found. Please install one.')
87 for path
in input_paths
:
88 files_cmd
.extend(['-i', encodeFilename(path
)])
89 cmd
= ([self
._exes
['avconv'] or self
._exes
['ffmpeg'], '-y'] + files_cmd
91 [encodeFilename(self
._ffmpeg
_filename
_argument
(out_path
))])
93 if self
._downloader
.params
.get('verbose', False):
94 self
._downloader
.to_screen(u
'[debug] ffmpeg command line: %s' % shell_quote(cmd
))
95 p
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
96 stdout
,stderr
= p
.communicate()
98 stderr
= stderr
.decode('utf-8', 'replace')
99 msg
= stderr
.strip().split('\n')[-1]
100 raise FFmpegPostProcessorError(msg
)
102 def run_ffmpeg(self
, path
, out_path
, opts
):
103 self
.run_ffmpeg_multiple_files([path
], out_path
, opts
)
105 def _ffmpeg_filename_argument(self
, fn
):
106 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
107 if fn
.startswith(u
'-'):
112 class FFmpegExtractAudioPP(FFmpegPostProcessor
):
113 def __init__(self
, downloader
=None, preferredcodec
=None, preferredquality
=None, nopostoverwrites
=False):
114 FFmpegPostProcessor
.__init
__(self
, downloader
)
115 if preferredcodec
is None:
116 preferredcodec
= 'best'
117 self
._preferredcodec
= preferredcodec
118 self
._preferredquality
= preferredquality
119 self
._nopostoverwrites
= nopostoverwrites
121 def get_audio_codec(self
, path
):
122 if not self
._exes
['ffprobe'] and not self
._exes
['avprobe']:
123 raise PostProcessingError(u
'ffprobe or avprobe not found. Please install one.')
125 cmd
= [self
._exes
['avprobe'] or self
._exes
['ffprobe'], '-show_streams', encodeFilename(self
._ffmpeg
_filename
_argument
(path
))]
126 handle
= subprocess
.Popen(cmd
, stderr
=compat_subprocess_get_DEVNULL(), stdout
=subprocess
.PIPE
)
127 output
= handle
.communicate()[0]
128 if handle
.wait() != 0:
130 except (IOError, OSError):
133 for line
in output
.decode('ascii', 'ignore').split('\n'):
134 if line
.startswith('codec_name='):
135 audio_codec
= line
.split('=')[1].strip()
136 elif line
.strip() == 'codec_type=audio' and audio_codec
is not None:
140 def run_ffmpeg(self
, path
, out_path
, codec
, more_opts
):
141 if not self
._exes
['ffmpeg'] and not self
._exes
['avconv']:
142 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
146 acodec_opts
= ['-acodec', codec
]
147 opts
= ['-vn'] + acodec_opts
+ more_opts
149 FFmpegPostProcessor
.run_ffmpeg(self
, path
, out_path
, opts
)
150 except FFmpegPostProcessorError
as err
:
151 raise AudioConversionError(err
.msg
)
153 def run(self
, information
):
154 path
= information
['filepath']
156 filecodec
= self
.get_audio_codec(path
)
157 if filecodec
is None:
158 raise PostProcessingError(u
'WARNING: unable to obtain file audio codec with ffprobe')
161 if self
._preferredcodec
== 'best' or self
._preferredcodec
== filecodec
or (self
._preferredcodec
== 'm4a' and filecodec
== 'aac'):
162 if filecodec
== 'aac' and self
._preferredcodec
in ['m4a', 'best']:
163 # Lossless, but in another container
166 more_opts
= [self
._exes
['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
167 elif filecodec
in ['aac', 'mp3', 'vorbis', 'opus']:
168 # Lossless if possible
170 extension
= filecodec
171 if filecodec
== 'aac':
172 more_opts
= ['-f', 'adts']
173 if filecodec
== 'vorbis':
177 acodec
= 'libmp3lame'
180 if self
._preferredquality
is not None:
181 if int(self
._preferredquality
) < 10:
182 more_opts
+= [self
._exes
['avconv'] and '-q:a' or '-aq', self
._preferredquality
]
184 more_opts
+= [self
._exes
['avconv'] and '-b:a' or '-ab', self
._preferredquality
+ 'k']
186 # We convert the audio (lossy)
187 acodec
= {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}
[self
._preferredcodec
]
188 extension
= self
._preferredcodec
190 if self
._preferredquality
is not None:
191 # The opus codec doesn't support the -aq option
192 if int(self
._preferredquality
) < 10 and extension
!= 'opus':
193 more_opts
+= [self
._exes
['avconv'] and '-q:a' or '-aq', self
._preferredquality
]
195 more_opts
+= [self
._exes
['avconv'] and '-b:a' or '-ab', self
._preferredquality
+ 'k']
196 if self
._preferredcodec
== 'aac':
197 more_opts
+= ['-f', 'adts']
198 if self
._preferredcodec
== 'm4a':
199 more_opts
+= [self
._exes
['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
200 if self
._preferredcodec
== 'vorbis':
202 if self
._preferredcodec
== 'wav':
204 more_opts
+= ['-f', 'wav']
206 prefix
, sep
, ext
= path
.rpartition(u
'.') # not os.path.splitext, since the latter does not work on unicode in all setups
207 new_path
= prefix
+ sep
+ extension
209 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
211 self
._nopostoverwrites
= True
214 if self
._nopostoverwrites
and os
.path
.exists(encodeFilename(new_path
)):
215 self
._downloader
.to_screen(u
'[youtube] Post-process file %s exists, skipping' % new_path
)
217 self
._downloader
.to_screen(u
'[' + (self
._exes
['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path
)
218 self
.run_ffmpeg(path
, new_path
, acodec
, more_opts
)
220 etype
,e
,tb
= sys
.exc_info()
221 if isinstance(e
, AudioConversionError
):
222 msg
= u
'audio conversion failed: ' + e
.msg
224 msg
= u
'error running ' + (self
._exes
['avconv'] and 'avconv' or 'ffmpeg')
225 raise PostProcessingError(msg
)
227 # Try to update the date time for extracted audio file.
228 if information
.get('filetime') is not None:
230 os
.utime(encodeFilename(new_path
), (time
.time(), information
['filetime']))
232 self
._downloader
.report_warning(u
'Cannot update utime of audio file')
234 information
['filepath'] = new_path
235 return self
._nopostoverwrites
,information
238 class FFmpegVideoConvertor(FFmpegPostProcessor
):
239 def __init__(self
, downloader
=None,preferedformat
=None):
240 super(FFmpegVideoConvertor
, self
).__init
__(downloader
)
241 self
._preferedformat
=preferedformat
243 def run(self
, information
):
244 path
= information
['filepath']
245 prefix
, sep
, ext
= path
.rpartition(u
'.')
246 outpath
= prefix
+ sep
+ self
._preferedformat
247 if information
['ext'] == self
._preferedformat
:
248 self
._downloader
.to_screen(u
'[ffmpeg] Not converting video file %s - already is in target format %s' % (path
, self
._preferedformat
))
249 return True,information
250 self
._downloader
.to_screen(u
'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information
['ext'], self
._preferedformat
) +outpath
)
251 self
.run_ffmpeg(path
, outpath
, [])
252 information
['filepath'] = outpath
253 information
['format'] = self
._preferedformat
254 information
['ext'] = self
._preferedformat
255 return False,information
258 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor
):
259 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
447 def __init__(self
, downloader
=None, subtitlesformat
='srt'):
448 super(FFmpegEmbedSubtitlePP
, self
).__init
__(downloader
)
449 self
._subformat
= subtitlesformat
452 def _conver_lang_code(cls
, code
):
453 """Convert language code from ISO 639-1 to ISO 639-2/T"""
454 return cls
._lang
_map
.get(code
[:2])
456 def run(self
, information
):
457 if information
['ext'] != u
'mp4':
458 self
._downloader
.to_screen(u
'[ffmpeg] Subtitles can only be embedded in mp4 files')
459 return True, information
460 if not information
.get('subtitles'):
461 self
._downloader
.to_screen(u
'[ffmpeg] There aren\'t any subtitles to embed')
462 return True, information
464 sub_langs
= [key
for key
in information
['subtitles']]
465 filename
= information
['filepath']
466 input_files
= [filename
] + [subtitles_filename(filename
, lang
, self
._subformat
) for lang
in sub_langs
]
468 opts
= ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
469 for (i
, lang
) in enumerate(sub_langs
):
470 opts
.extend(['-map', '%d:0' % (i
+1), '-c:s:%d' % i
, 'mov_text'])
471 lang_code
= self
._conver
_lang
_code
(lang
)
472 if lang_code
is not None:
473 opts
.extend(['-metadata:s:s:%d' % i
, 'language=%s' % lang_code
])
474 opts
.extend(['-f', 'mp4'])
476 temp_filename
= filename
+ u
'.temp'
477 self
._downloader
.to_screen(u
'[ffmpeg] Embedding subtitles in \'%s\'' % filename
)
478 self
.run_ffmpeg_multiple_files(input_files
, temp_filename
, opts
)
479 os
.remove(encodeFilename(filename
))
480 os
.rename(encodeFilename(temp_filename
), encodeFilename(filename
))
482 return True, information
485 class FFmpegMetadataPP(FFmpegPostProcessor
):
488 if info
.get('title') is not None:
489 metadata
['title'] = info
['title']
490 if info
.get('upload_date') is not None:
491 metadata
['date'] = info
['upload_date']
492 if info
.get('uploader') is not None:
493 metadata
['artist'] = info
['uploader']
494 elif info
.get('uploader_id') is not None:
495 metadata
['artist'] = info
['uploader_id']
498 self
._downloader
.to_screen(u
'[ffmpeg] There isn\'t any metadata to add')
501 filename
= info
['filepath']
502 ext
= os
.path
.splitext(filename
)[1][1:]
503 temp_filename
= filename
+ u
'.temp'
505 options
= ['-c', 'copy']
506 for (name
, value
) in metadata
.items():
507 options
.extend(['-metadata', '%s=%s' % (name
, value
)])
508 options
.extend(['-f', ext
])
510 self
._downloader
.to_screen(u
'[ffmpeg] Adding metadata to \'%s\'' % filename
)
511 self
.run_ffmpeg(filename
, temp_filename
, options
)
512 os
.remove(encodeFilename(filename
))
513 os
.rename(encodeFilename(temp_filename
), encodeFilename(filename
))
517 class XAttrMetadataPP(PostProcessor
):
520 # More info about extended attributes for media:
521 # http://freedesktop.org/wiki/CommonExtendedAttributes/
522 # http://www.freedesktop.org/wiki/PhreedomDraft/
523 # http://dublincore.org/documents/usageguide/elements.shtml
526 # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
527 # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
531 """ Set extended attributes on downloaded file (if xattr support is found). """
533 from .utils
import hyphenate_date
535 # This mess below finds the best xattr tool for the job and creates a
536 # "write_xattr" function.
538 # try the pyxattr module...
540 def write_xattr(path
, key
, value
):
541 return xattr
.setxattr(path
, key
, value
)
545 if os
.name
== 'posix':
547 for dir in os
.environ
["PATH"].split(":"):
548 path
= os
.path
.join(dir, bin
)
549 if os
.path
.exists(path
):
552 user_has_setfattr
= which("setfattr")
553 user_has_xattr
= which("xattr")
555 if user_has_setfattr
or user_has_xattr
:
557 def write_xattr(path
, key
, value
):
560 # setfattr: /tmp/blah: Operation not supported
561 "Operation not supported": errno
.EOPNOTSUPP
,
562 # setfattr: ~/blah: No such file or directory
563 # xattr: No such file: ~/blah
564 "No such file": errno
.ENOENT
,
567 if user_has_setfattr
:
568 cmd
= ['setfattr', '-n', key
, '-v', value
, path
]
570 cmd
= ['xattr', '-w', key
, value
, path
]
573 output
= subprocess
.check_output(cmd
, stderr
=subprocess
.STDOUT
)
574 except subprocess
.CalledProcessError
as e
:
575 errorstr
= e
.output
.strip().decode()
576 for potential_errorstr
, potential_errno
in potential_errors
.items():
577 if errorstr
.find(potential_errorstr
) > -1:
578 e
= OSError(potential_errno
, potential_errorstr
)
581 raise # Reraise unhandled error
584 # On Unix, and can't find pyxattr, setfattr, or xattr.
585 if sys
.platform
.startswith('linux'):
586 self
._downloader
.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).")
587 elif sys
.platform
== 'darwin':
588 self
._downloader
.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.")
590 # Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
591 def write_xattr(path
, key
, value
):
592 assert(key
.find(":") < 0)
593 assert(path
.find(":") < 0)
594 assert(os
.path
.exists(path
))
596 f
= open(path
+":"+key
, "w")
600 # Write the metadata to the file's xattrs
601 self
._downloader
.to_screen('[metadata] Writing metadata to file\'s xattrs...')
603 filename
= info
['filepath']
607 'user.xdg.referrer.url': 'webpage_url',
608 # 'user.xdg.comment': 'description',
609 'user.dublincore.title': 'title',
610 'user.dublincore.date': 'upload_date',
611 'user.dublincore.description': 'description',
612 'user.dublincore.contributor': 'uploader',
613 'user.dublincore.format': 'format',
616 for xattrname
, infoname
in xattr_mapping
.items():
618 value
= info
.get(infoname
)
621 if infoname
== "upload_date":
622 value
= hyphenate_date(value
)
624 write_xattr(filename
, xattrname
, value
)
629 self
._downloader
.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")