[yt-dlp.git] / youtube_dl / PostProcessor.py

import os
import subprocess
import sys
import time

from .utils import *


class PostProcessor(object):
    """Post Processor class.

    PostProcessor objects can be added to downloaders with their
    add_post_processor() method. When the downloader has finished a
    successful download, it will take its internal chain of PostProcessors
    and start calling the run() method on each one of them, first with
    an initial argument and then with the returned value of the previous
    PostProcessor.

    The chain will be stopped if one of them ever returns None or the end
    of the chain is reached.

    PostProcessor objects follow a "mutual registration" process similar
    to InfoExtractor objects.
    """

    _downloader = None

    def __init__(self, downloader=None):
        self._downloader = downloader

    def set_downloader(self, downloader):
        """Sets the downloader for this PP."""
        self._downloader = downloader

    def run(self, information):
        """Run the PostProcessor.

        The "information" argument is a dictionary like the ones
        composed by InfoExtractors. The only difference is that this
        one has an extra field called "filepath" that points to the
        downloaded file.

        This method returns a tuple, the first element of which describes
        whether the original file should be kept (i.e. not deleted - None for
        no preference), and the second of which is the updated information.

        In addition, this method may raise a PostProcessingError
        exception if post processing fails.
        """
        return None, information # by default, keep file and do nothing

class FFmpegPostProcessorError(PostProcessingError):
    pass

class AudioConversionError(PostProcessingError):
    pass

class FFmpegPostProcessor(PostProcessor):
    def __init__(self,downloader=None):
        PostProcessor.__init__(self, downloader)
        self._exes = self.detect_executables()

    @staticmethod
    def detect_executables():
        def executable(exe):
            try:
                subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            except OSError:
                return False
            return exe
        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
        return dict((program, executable(program)) for program in programs)

    def run_ffmpeg(self, path, out_path, opts):
        if not self._exes['ffmpeg'] and not self._exes['avconv']:
            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
        cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
               + opts +
               [encodeFilename(self._ffmpeg_filename_argument(out_path))])
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout,stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
            raise FFmpegPostProcessorError(msg)

    def _ffmpeg_filename_argument(self, fn):
        # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
        if fn.startswith(u'-'):
            return u'./' + fn
        return fn

class FFmpegExtractAudioPP(FFmpegPostProcessor):
    def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
        FFmpegPostProcessor.__init__(self, downloader)
        if preferredcodec is None:
            preferredcodec = 'best'
        self._preferredcodec = preferredcodec
        self._preferredquality = preferredquality
        self._nopostoverwrites = nopostoverwrites

    def get_audio_codec(self, path):
        if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
        try:
            cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
            output = handle.communicate()[0]
            if handle.wait() != 0:
                return None
        except (IOError, OSError):
            return None
        audio_codec = None
        for line in output.decode('ascii', 'ignore').split('\n'):
            if line.startswith('codec_name='):
                audio_codec = line.split('=')[1].strip()
            elif line.strip() == 'codec_type=audio' and audio_codec is not None:
                return audio_codec
        return None

    def run_ffmpeg(self, path, out_path, codec, more_opts):
        if not self._exes['ffmpeg'] and not self._exes['avconv']:
            raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
        if codec is None:
            acodec_opts = []
        else:
            acodec_opts = ['-acodec', codec]
        opts = ['-vn'] + acodec_opts + more_opts
        try:
            FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
        except FFmpegPostProcessorError as err:
            raise AudioConversionError(err.message)

    def run(self, information):
        path = information['filepath']

        filecodec = self.get_audio_codec(path)
        if filecodec is None:
            raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')

        more_opts = []
        if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
            if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
                # Lossless, but in another container
                acodec = 'copy'
                extension = 'm4a'
                more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
            elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
                # Lossless if possible
                acodec = 'copy'
                extension = filecodec
                if filecodec == 'aac':
                    more_opts = ['-f', 'adts']
                if filecodec == 'vorbis':
                    extension = 'ogg'
            else:
                # MP3 otherwise.
                acodec = 'libmp3lame'
                extension = 'mp3'
                more_opts = []
                if self._preferredquality is not None:
                    if int(self._preferredquality) < 10:
                        more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
                    else:
                        more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
        else:
            # We convert the audio (lossy)
            acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
            extension = self._preferredcodec
            more_opts = []
            if self._preferredquality is not None:
                if int(self._preferredquality) < 10:
                    more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
                else:
                    more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
            if self._preferredcodec == 'aac':
                more_opts += ['-f', 'adts']
            if self._preferredcodec == 'm4a':
                more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
            if self._preferredcodec == 'vorbis':
                extension = 'ogg'
            if self._preferredcodec == 'wav':
                extension = 'wav'
                more_opts += ['-f', 'wav']

        prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
        new_path = prefix + sep + extension

        # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
        if new_path == path:
            self._nopostoverwrites = True

        try:
            if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
                self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
            else:
                self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
                self.run_ffmpeg(path, new_path, acodec, more_opts)
        except:
            etype,e,tb = sys.exc_info()
            if isinstance(e, AudioConversionError):
                msg = u'audio conversion failed: ' + e.message
            else:
                msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
            raise PostProcessingError(msg)

        # Try to update the date time for extracted audio file.
        if information.get('filetime') is not None:
            try:
                os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
            except:
                self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')

        information['filepath'] = new_path
        return self._nopostoverwrites,information

class FFmpegVideoConvertor(FFmpegPostProcessor):
    def __init__(self, downloader=None,preferedformat=None):
        super(FFmpegVideoConvertor, self).__init__(downloader)
        self._preferedformat=preferedformat

    def run(self, information):
        path = information['filepath']
        prefix, sep, ext = path.rpartition(u'.')
        outpath = prefix + sep + self._preferedformat
        if information['ext'] == self._preferedformat:
            self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
            return True,information
        self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
        self.run_ffmpeg(path, outpath, [])
        information['filepath'] = outpath
        information['format'] = self._preferedformat
        information['ext'] = self._preferedformat
        return False,information
Commit	Line	Data
d77c3dfd FV	1	import os
	2	import subprocess
	3	import sys
	4	import time
	5
9e8056d5	6	from .utils import *
d77c3dfd FV	7
	8
	9	class PostProcessor(object):
59ae15a5	10	"""Post Processor class.
d77c3dfd	11
59ae15a5 PH	12	PostProcessor objects can be added to downloaders with their
	13	add_post_processor() method. When the downloader has finished a
	14	successful download, it will take its internal chain of PostProcessors
	15	and start calling the run() method on each one of them, first with
	16	an initial argument and then with the returned value of the previous
	17	PostProcessor.
d77c3dfd	18
59ae15a5 PH	19	The chain will be stopped if one of them ever returns None or the end
59ae15a5 PH	20	of the chain is reached.
d77c3dfd	21
59ae15a5 PH	22	PostProcessor objects follow a "mutual registration" process similar
	23	to InfoExtractor objects.
	24	"""
d77c3dfd	25
59ae15a5	26	_downloader = None
d77c3dfd	27
59ae15a5 PH	28	def __init__(self, downloader=None):
59ae15a5 PH	29	self._downloader = downloader
d77c3dfd	30
59ae15a5 PH	31	def set_downloader(self, downloader):
	32	"""Sets the downloader for this PP."""
	33	self._downloader = downloader
d77c3dfd	34
59ae15a5 PH	35	def run(self, information):
59ae15a5 PH	36	"""Run the PostProcessor.
d77c3dfd	37
59ae15a5 PH	38	The "information" argument is a dictionary like the ones
	39	composed by InfoExtractors. The only difference is that this
	40	one has an extra field called "filepath" that points to the
	41	downloaded file.
d77c3dfd	42
7851b379 PH	43	This method returns a tuple, the first element of which describes
	44	whether the original file should be kept (i.e. not deleted - None for
	45	no preference), and the second of which is the updated information.
d77c3dfd	46
59ae15a5	47	In addition, this method may raise a PostProcessingError
7851b379	48	exception if post processing fails.
59ae15a5	49	"""
7851b379	50	return None, information # by default, keep file and do nothing
d77c3dfd	51
7851b379 PH	52	class FFmpegPostProcessorError(PostProcessingError):
7851b379 PH	53	pass
67d0c25e	54
7851b379 PH	55	class AudioConversionError(PostProcessingError):
7851b379 PH	56	pass
d77c3dfd	57
67d0c25e JMF	58	class FFmpegPostProcessor(PostProcessor):
67d0c25e JMF	59	def __init__(self,downloader=None):
59ae15a5	60	PostProcessor.__init__(self, downloader)
59ae15a5 PH	61	self._exes = self.detect_executables()
	62
	63	@staticmethod
	64	def detect_executables():
	65	def executable(exe):
	66	try:
	67	subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
	68	except OSError:
	69	return False
	70	return exe
	71	programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
	72	return dict((program, executable(program)) for program in programs)
	73
67d0c25e JMF	74	def run_ffmpeg(self, path, out_path, opts):
67d0c25e JMF	75	if not self._exes['ffmpeg'] and not self._exes['avconv']:
7851b379	76	raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
67d0c25e JMF	77	cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
	78	+ opts +
	79	[encodeFilename(self._ffmpeg_filename_argument(out_path))])
	80	p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	81	stdout,stderr = p.communicate()
	82	if p.returncode != 0:
fb2f8336	83	stderr = stderr.decode('utf-8', 'replace')
67d0c25e	84	msg = stderr.strip().split('\n')[-1]
fb2f8336	85	raise FFmpegPostProcessorError(msg)
67d0c25e JMF	86
	87	def _ffmpeg_filename_argument(self, fn):
	88	# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
	89	if fn.startswith(u'-'):
	90	return u'./' + fn
	91	return fn
	92
	93	class FFmpegExtractAudioPP(FFmpegPostProcessor):
7851b379	94	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
67d0c25e JMF	95	FFmpegPostProcessor.__init__(self, downloader)
	96	if preferredcodec is None:
	97	preferredcodec = 'best'
	98	self._preferredcodec = preferredcodec
	99	self._preferredquality = preferredquality
67d0c25e JMF	100	self._nopostoverwrites = nopostoverwrites
67d0c25e JMF	101
59ae15a5 PH	102	def get_audio_codec(self, path):
	103	if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
	104	try:
712e86b9	105	cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
5910e210	106	handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
59ae15a5 PH	107	output = handle.communicate()[0]
	108	if handle.wait() != 0:
	109	return None
	110	except (IOError, OSError):
	111	return None
	112	audio_codec = None
5910e210	113	for line in output.decode('ascii', 'ignore').split('\n'):
59ae15a5 PH	114	if line.startswith('codec_name='):
	115	audio_codec = line.split('=')[1].strip()
	116	elif line.strip() == 'codec_type=audio' and audio_codec is not None:
	117	return audio_codec
	118	return None
	119
	120	def run_ffmpeg(self, path, out_path, codec, more_opts):
	121	if not self._exes['ffmpeg'] and not self._exes['avconv']:
0c007432	122	raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
59ae15a5 PH	123	if codec is None:
	124	acodec_opts = []
	125	else:
	126	acodec_opts = ['-acodec', codec]
67d0c25e JMF	127	opts = ['-vn'] + acodec_opts + more_opts
	128	try:
	129	FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
	130	except FFmpegPostProcessorError as err:
	131	raise AudioConversionError(err.message)
59ae15a5 PH	132
	133	def run(self, information):
	134	path = information['filepath']
	135
	136	filecodec = self.get_audio_codec(path)
	137	if filecodec is None:
7851b379	138	raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
59ae15a5 PH	139
	140	more_opts = []
	141	if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
0e336841	142	if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
59ae15a5 PH	143	# Lossless, but in another container
59ae15a5 PH	144	acodec = 'copy'
0e336841	145	extension = 'm4a'
59ae15a5	146	more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
510e6f6d	147	elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
59ae15a5 PH	148	# Lossless if possible
	149	acodec = 'copy'
	150	extension = filecodec
	151	if filecodec == 'aac':
	152	more_opts = ['-f', 'adts']
	153	if filecodec == 'vorbis':
	154	extension = 'ogg'
	155	else:
	156	# MP3 otherwise.
	157	acodec = 'libmp3lame'
	158	extension = 'mp3'
	159	more_opts = []
	160	if self._preferredquality is not None:
	161	if int(self._preferredquality) < 10:
	162	more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
	163	else:
	164	more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
	165	else:
	166	# We convert the audio (lossy)
510e6f6d	167	acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
59ae15a5 PH	168	extension = self._preferredcodec
	169	more_opts = []
	170	if self._preferredquality is not None:
	171	if int(self._preferredquality) < 10:
	172	more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
	173	else:
	174	more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
	175	if self._preferredcodec == 'aac':
	176	more_opts += ['-f', 'adts']
	177	if self._preferredcodec == 'm4a':
	178	more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
	179	if self._preferredcodec == 'vorbis':
	180	extension = 'ogg'
	181	if self._preferredcodec == 'wav':
	182	extension = 'wav'
	183	more_opts += ['-f', 'wav']
	184
	185	prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
	186	new_path = prefix + sep + extension
e74c504f JF	187
	188	# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
	189	if new_path == path:
	190	self._nopostoverwrites = True
	191
59ae15a5	192	try:
b7298b6e BPG	193	if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
	194	self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
	195	else:
	196	self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
	197	self.run_ffmpeg(path, new_path, acodec, more_opts)
59ae15a5 PH	198	except:
	199	etype,e,tb = sys.exc_info()
	200	if isinstance(e, AudioConversionError):
7851b379	201	msg = u'audio conversion failed: ' + e.message
59ae15a5	202	else:
7851b379 PH	203	msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
7851b379 PH	204	raise PostProcessingError(msg)
59ae15a5 PH	205
	206	# Try to update the date time for extracted audio file.
	207	if information.get('filetime') is not None:
	208	try:
	209	os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
	210	except:
	211	self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
	212
59ae15a5	213	information['filepath'] = new_path
e74c504f	214	return self._nopostoverwrites,information
712e86b9	215
67d0c25e JMF	216	class FFmpegVideoConvertor(FFmpegPostProcessor):
67d0c25e JMF	217	def __init__(self, downloader=None,preferedformat=None):
7851b379	218	super(FFmpegVideoConvertor, self).__init__(downloader)
67d0c25e	219	self._preferedformat=preferedformat
712e86b9	220
67d0c25e JMF	221	def run(self, information):
	222	path = information['filepath']
	223	prefix, sep, ext = path.rpartition(u'.')
	224	outpath = prefix + sep + self._preferedformat
7851b379 PH	225	if information['ext'] == self._preferedformat:
	226	self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
	227	return True,information
	228	self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
67d0c25e JMF	229	self.run_ffmpeg(path, outpath, [])
	230	information['filepath'] = outpath
	231	information['format'] = self._preferedformat
7851b379 PH	232	information['ext'] = self._preferedformat
7851b379 PH	233	return False,information