yt_dlp/postprocessor/common.py

   1 import functools
   2 import itertools
   3 import json
   4 import os
   5 import time
   6 import urllib.error
   7
   8 from ..utils import (
   9     PostProcessingError,
  10     _configuration_args,
  11     encodeFilename,
  12     network_exceptions,
  13     sanitized_Request,
  14     write_string,
  15 )
  16
  17
  18 class PostProcessorMetaClass(type):
  19     @staticmethod
  20     def run_wrapper(func):
  21         @functools.wraps(func)
  22         def run(self, info, *args, **kwargs):
  23             info_copy = self._copy_infodict(info)
  24             self._hook_progress({'status': 'started'}, info_copy)
  25             ret = func(self, info, *args, **kwargs)
  26             if ret is not None:
  27                 _, info = ret
  28             self._hook_progress({'status': 'finished'}, info_copy)
  29             return ret
  30         return run
  31
  32     def __new__(cls, name, bases, attrs):
  33         if 'run' in attrs:
  34             attrs['run'] = cls.run_wrapper(attrs['run'])
  35         return type.__new__(cls, name, bases, attrs)
  36
  37
  38 class PostProcessor(metaclass=PostProcessorMetaClass):
  39     """Post Processor class.
  40
  41     PostProcessor objects can be added to downloaders with their
  42     add_post_processor() method. When the downloader has finished a
  43     successful download, it will take its internal chain of PostProcessors
  44     and start calling the run() method on each one of them, first with
  45     an initial argument and then with the returned value of the previous
  46     PostProcessor.
  47
  48     The chain will be stopped if one of them ever returns None or the end
  49     of the chain is reached.
  50
  51     PostProcessor objects follow a "mutual registration" process similar
  52     to InfoExtractor objects.
  53
  54     Optionally PostProcessor can use a list of additional command-line arguments
  55     with self._configuration_args.
  56     """
  57
  58     _downloader = None
  59
  60     def __init__(self, downloader=None):
  61         self._progress_hooks = []
  62         self.add_progress_hook(self.report_progress)
  63         self.set_downloader(downloader)
  64         self.PP_NAME = self.pp_key()
  65
  66     @classmethod
  67     def pp_key(cls):
  68         name = cls.__name__[:-2]
  69         return name[6:] if name[:6].lower() == 'ffmpeg' else name
  70
  71     def to_screen(self, text, prefix=True, *args, **kwargs):
  72         if self._downloader:
  73             tag = '[%s] ' % self.PP_NAME if prefix else ''
  74             return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs)
  75
  76     def report_warning(self, text, *args, **kwargs):
  77         if self._downloader:
  78             return self._downloader.report_warning(text, *args, **kwargs)
  79
  80     def deprecation_warning(self, text):
  81         if self._downloader:
  82             return self._downloader.deprecation_warning(text)
  83         write_string(f'DeprecationWarning: {text}')
  84
  85     def report_error(self, text, *args, **kwargs):
  86         self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. '
  87                                  'raise "yt_dlp.utils.PostProcessingError" instead')
  88         if self._downloader:
  89             return self._downloader.report_error(text, *args, **kwargs)
  90
  91     def write_debug(self, text, *args, **kwargs):
  92         if self._downloader:
  93             return self._downloader.write_debug(text, *args, **kwargs)
  94
  95     def _delete_downloaded_files(self, *files_to_delete, **kwargs):
  96         if not self._downloader:
  97             for filename in set(filter(None, files_to_delete)):
  98                 os.remove(filename)
  99         return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs)
 100
 101     def get_param(self, name, default=None, *args, **kwargs):
 102         if self._downloader:
 103             return self._downloader.params.get(name, default, *args, **kwargs)
 104         return default
 105
 106     def set_downloader(self, downloader):
 107         """Sets the downloader for this PP."""
 108         self._downloader = downloader
 109         for ph in getattr(downloader, '_postprocessor_hooks', []):
 110             self.add_progress_hook(ph)
 111
 112     def _copy_infodict(self, info_dict):
 113         return getattr(self._downloader, '_copy_infodict', dict)(info_dict)
 114
 115     @staticmethod
 116     def _restrict_to(*, video=True, audio=True, images=True, simulated=True):
 117         allowed = {'video': video, 'audio': audio, 'images': images}
 118
 119         def decorator(func):
 120             @functools.wraps(func)
 121             def wrapper(self, info):
 122                 if not simulated and (self.get_param('simulate') or self.get_param('skip_download')):
 123                     return [], info
 124                 format_type = (
 125                     'video' if info.get('vcodec') != 'none'
 126                     else 'audio' if info.get('acodec') != 'none'
 127                     else 'images')
 128                 if allowed[format_type]:
 129                     return func(self, info)
 130                 else:
 131                     self.to_screen('Skipping %s' % format_type)
 132                     return [], info
 133             return wrapper
 134         return decorator
 135
 136     def run(self, information):
 137         """Run the PostProcessor.
 138
 139         The "information" argument is a dictionary like the ones
 140         composed by InfoExtractors. The only difference is that this
 141         one has an extra field called "filepath" that points to the
 142         downloaded file.
 143
 144         This method returns a tuple, the first element is a list of the files
 145         that can be deleted, and the second of which is the updated
 146         information.
 147
 148         In addition, this method may raise a PostProcessingError
 149         exception if post processing fails.
 150         """
 151         return [], information  # by default, keep file and do nothing
 152
 153     def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
 154         try:
 155             os.utime(encodeFilename(path), (atime, mtime))
 156         except Exception:
 157             self.report_warning(errnote)
 158
 159     def _configuration_args(self, exe, *args, **kwargs):
 160         return _configuration_args(
 161             self.pp_key(), self.get_param('postprocessor_args'), exe, *args, **kwargs)
 162
 163     def _hook_progress(self, status, info_dict):
 164         if not self._progress_hooks:
 165             return
 166         status.update({
 167             'info_dict': info_dict,
 168             'postprocessor': self.pp_key(),
 169         })
 170         for ph in self._progress_hooks:
 171             ph(status)
 172
 173     def add_progress_hook(self, ph):
 174         # See YoutubeDl.py (search for postprocessor_hooks) for a description of this interface
 175         self._progress_hooks.append(ph)
 176
 177     def report_progress(self, s):
 178         s['_default_template'] = '%(postprocessor)s %(status)s' % s
 179
 180         progress_dict = s.copy()
 181         progress_dict.pop('info_dict')
 182         progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
 183
 184         progress_template = self.get_param('progress_template', {})
 185         tmpl = progress_template.get('postprocess')
 186         if tmpl:
 187             self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict))
 188
 189         self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
 190             progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
 191             progress_dict))
 192
 193     def _download_json(self, url, *, expected_http_errors=(404,)):
 194         # While this is not an extractor, it behaves similar to one and
 195         # so obey extractor_retries and sleep_interval_requests
 196         max_retries = self.get_param('extractor_retries', 3)
 197         sleep_interval = self.get_param('sleep_interval_requests') or 0
 198
 199         self.write_debug(f'{self.PP_NAME} query: {url}')
 200         for retries in itertools.count():
 201             try:
 202                 rsp = self._downloader.urlopen(sanitized_Request(url))
 203                 return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
 204             except network_exceptions as e:
 205                 if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors:
 206                     return None
 207                 if retries < max_retries:
 208                     self.report_warning(f'{e}. Retrying...')
 209                     if sleep_interval > 0:
 210                         self.to_screen(f'Sleeping {sleep_interval} seconds ...')
 211                         time.sleep(sleep_interval)
 212                     continue
 213                 raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
 214
 215
 216 class AudioConversionError(PostProcessingError):
 217     pass