Improved progress reporting (See desc) (#1125)

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 2e150cd97983737e0d878d16b78b3281bf7c5297..1d865161af31685c00a957a6a4bb880c0ad7d518 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -42,6 +42,7 @@
      compat_urllib_error,
      compat_urllib_request,
      compat_urllib_request_DataHandler,
+    windows_enable_vt_mode,
  )
  from .cookies import load_cookies
  from .utils import (
@@ -67,8 +68,6 @@
      float_or_none,
      format_bytes,
      format_field,
-    STR_FORMAT_RE_TMPL,
-    STR_FORMAT_TYPES,
      formatSeconds,
      GeoRestrictedError,
      HEADRequest,
@@ -101,9 +100,13 @@
      sanitize_url,
      sanitized_Request,
      std_headers,
+    STR_FORMAT_RE_TMPL,
+    STR_FORMAT_TYPES,
      str_or_none,
      strftime_or_none,
      subtitles_filename,
+    supports_terminal_sequences,
+    TERMINAL_SEQUENCES,
      ThrottledDownload,
      to_high_limit_path,
      traverse_obj,
@@ -123,7 +126,7 @@
      gen_extractor_classes,
      get_info_extractor,
      _LAZY_LOADER,
-    _PLUGIN_CLASSES
+    _PLUGIN_CLASSES as plugin_extractors
  )
  from .extractor.openload import PhantomJSwrapper
  from .downloader import (
@@ -142,6 +145,7 @@
      FFmpegMergerPP,
      FFmpegPostProcessor,
      MoveFilesAfterDownloadPP,
+    _PLUGIN_CLASSES as plugin_postprocessors
  )
  from .update import detect_variant
  from .version import __version__
@@ -247,6 +251,7 @@ class YoutubeDL(object):
      rejecttitle:       Reject downloads for matching titles.
      logger:            Log messages to a logging.Logger instance.
      logtostderr:       Log messages to stderr instead of stdout.
+    consoletitle:       Display progress in console window's titlebar.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
      clean_infojson:    Remove private fields from the infojson
@@ -352,6 +357,15 @@ class YoutubeDL(object):
  
                         Progress hooks are guaranteed to be called at least once
                         (with status "finished") if the download is successful.
+    postprocessor_hooks:  A list of functions that get called on postprocessing
+                       progress, with a dictionary with the entries
+                       * status: One of "started", "processing", or "finished".
+                                 Check this first and ignore unknown values.
+                       * postprocessor: Name of the postprocessor
+                       * info_dict: The extracted info_dict
+
+                       Progress hooks are guaranteed to be called at least twice
+                       (with status "started" and "finished") if the processing is successful.
      merge_output_format: Extension to use when merging formats.
      final_ext:         Expected final extension; used to detect when the file was
                         already downloaded and converted. "merge_output_format" is
@@ -411,11 +425,15 @@ class YoutubeDL(object):
                         filename, abort-on-error, multistreams, no-live-chat,
                         no-clean-infojson, no-playlist-metafiles, no-keep-subs.
                         Refer __init__.py for their implementation
+    progress_template: Dictionary of templates for progress outputs.
+                       Allowed keys are 'download', 'postprocess',
+                       'download-title' (console title) and 'postprocess-title'.
+                       The template is mapped on a dictionary with keys 'progress' and 'info'
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the downloader (see yt_dlp/downloader/common.py):
      nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
-    max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
+    max_filesize, test, noresizebuffer, retries, continuedl, noprogress,
      xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
  
      The following options are used by the post processors:
@@ -453,13 +471,12 @@ class YoutubeDL(object):
  
      _NUMERIC_FIELDS = set((
          'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
-        'timestamp', 'upload_year', 'upload_month', 'upload_day',
+        'timestamp', 'release_timestamp',
          'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
          'average_rating', 'comment_count', 'age_limit',
          'start_time', 'end_time',
          'chapter_number', 'season_number', 'episode_number',
          'track_number', 'disc_number', 'release_year',
-        'playlist_index',
      ))
  
      params = None
@@ -484,26 +501,27 @@ def __init__(self, params=None, auto_init=True):
          self._first_webpage_request = True
          self._post_hooks = []
          self._progress_hooks = []
+        self._postprocessor_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._err_file = sys.stderr
-        self.params = {
-            # Default parameters
-            'nocheckcertificate': False,
-        }
-        self.params.update(params)
+        self.params = params
          self.cache = Cache(self)
  
+        windows_enable_vt_mode()
+        self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
+
          if sys.version_info < (3, 6):
              self.report_warning(
                  'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
  
          if self.params.get('allow_unplayable_formats'):
              self.report_warning(
-                'You have asked for unplayable formats to be listed/downloaded. '
-                'This is a developer option intended for debugging. '
-                'If you experience any issues while using this option, DO NOT open a bug report')
+                f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
+                'This is a developer option intended for debugging. \n'
+                '         If you experience any issues while using this option, '
+                f'{self._color_text("DO NOT", "red")} open a bug report')
  
          def check_deprecated(param, option, suggestion):
              if self.params.get(param) is not None:
@@ -578,8 +596,8 @@ def check_deprecated(param, option, suggestion):
  
          self._setup_opener()
  
-        """Preload the archive, if any is specified"""
          def preload_download_archive(fn):
+            """Preload the archive, if any is specified"""
              if fn is None:
                  return False
              self.write_debug('Loading archive file %r\n' % fn)
@@ -675,9 +693,13 @@ def add_post_hook(self, ph):
          self._post_hooks.append(ph)
  
      def add_progress_hook(self, ph):
-        """Add the progress hook (currently only for the file downloader)"""
+        """Add the download progress hook"""
          self._progress_hooks.append(ph)
  
+    def add_postprocessor_hook(self, ph):
+        """Add the postprocessing progress hook"""
+        self._postprocessor_hooks.append(ph)
+
      def _bidi_workaround(self, message):
          if not hasattr(self, '_output_channel'):
              return message
@@ -790,6 +812,11 @@ def to_screen(self, message, skip_eol=False):
          self.to_stdout(
              message, skip_eol, quiet=self.params.get('quiet', False))
  
+    def _color_text(self, text, color):
+        if self.params.get('no_color'):
+            return text
+        return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
+
      def report_warning(self, message, only_once=False):
          '''
          Print the message to stderr, it will be prefixed with 'WARNING:'
@@ -800,24 +827,14 @@ def report_warning(self, message, only_once=False):
          else:
              if self.params.get('no_warnings'):
                  return
-            if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
-                _msg_header = '\033[0;33mWARNING:\033[0m'
-            else:
-                _msg_header = 'WARNING:'
-            warning_message = '%s %s' % (_msg_header, message)
-            self.to_stderr(warning_message, only_once)
+            self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
  
      def report_error(self, message, tb=None):
          '''
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
-        if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
-            _msg_header = '\033[0;31mERROR:\033[0m'
-        else:
-            _msg_header = 'ERROR:'
-        error_message = '%s %s' % (_msg_header, message)
-        self.trouble(error_message, tb)
+        self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
  
      def write_debug(self, message, only_once=False):
          '''Log debug message or Print message to stderr'''
@@ -919,7 +936,7 @@ def validate_outtmpl(cls, outtmpl):
              return err
  
      def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
-        """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
+        """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
          info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
  
          info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
@@ -933,10 +950,11 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
          if info_dict.get('resolution') is None:
              info_dict['resolution'] = self.format_resolution(info_dict, default=None)
  
-        # For fields playlist_index and autonumber convert all occurrences
+        # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
          # of %(field)s to %(field)0Nd for backward compatibility
          field_size_compat_map = {
              'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
+            'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
              'autonumber': self.params.get('autonumber_size') or 5,
          }
  
@@ -1072,6 +1090,10 @@ def create_key(outer_mobj):
  
          return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
  
+    def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
+        outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
+        return self.escape_outtmpl(outtmpl) % info_dict
+
      def _prepare_filename(self, info_dict, tmpl_type='default'):
          try:
              sanitize = lambda k, v: sanitize_filename(
@@ -1943,9 +1965,14 @@ def selector_function(ctx):
                          filter_f = lambda f: _filter_f(f) and (
                              f.get('vcodec') != 'none' or f.get('acodec') != 'none')
                      else:
-                        filter_f = ((lambda f: f.get('ext') == format_spec)
-                                    if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
-                                    else (lambda f: f.get('format_id') == format_spec))  # id
+                        if format_spec in ('m4a', 'mp3', 'ogg', 'aac'):  # audio extension
+                            filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
+                        elif format_spec in ('mp4', 'flv', 'webm', '3gp'):  # video extension
+                            filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
+                        elif format_spec in ('mhtml', ):  # storyboards extension
+                            filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
+                        else:
+                            filter_f = (lambda f: f.get('format_id') == format_spec)  # id
  
                      def selector_function(ctx):
                          formats = list(ctx['formats'])
@@ -2425,10 +2452,8 @@ def print_optional(field):
          if self.params.get('forceprint') or self.params.get('forcejson'):
              self.post_extract(info_dict)
          for tmpl in self.params.get('forceprint', []):
-            if re.match(r'\w+$', tmpl):
-                tmpl = '%({})s'.format(tmpl)
-            tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
-            self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
+            self.to_stdout(self.evaluate_outtmpl(
+                f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict))
  
          print_mandatory('title')
          print_mandatory('id')
@@ -2814,7 +2839,8 @@ def ffmpeg_fixup(cndn, msg, cls):
  
                      downloader = (get_suitable_downloader(info_dict, self.params).__name__
                                    if 'protocol' in info_dict else None)
-                    ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
+                    ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
+                                 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
                      ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
                      ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
  
@@ -3028,9 +3054,7 @@ def record_download_archive(self, info_dict):
  
      @staticmethod
      def format_resolution(format, default='unknown'):
-        if format.get('vcodec') == 'none':
-            if format.get('acodec') == 'none':
-                return 'images'
+        if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
              return 'audio only'
          if format.get('resolution') is not None:
              return format['resolution']
@@ -3042,6 +3066,8 @@ def format_resolution(format, default='unknown'):
              res = '%dx?' % format['width']
          else:
              res = default
+        if format.get('vcodec') == 'none' and format.get('acodec') == 'none':
+            res += ' (images)'
          return res
  
      def _format_note(self, fdict):
@@ -3201,9 +3227,10 @@ def print_debug_header(self):
          self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
          if _LAZY_LOADER:
              self._write_string('[debug] Lazy loading extractors enabled\n')
-        if _PLUGIN_CLASSES:
-            self._write_string(
-                '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
+        if plugin_extractors or plugin_postprocessors:
+            self._write_string('[debug] Plugins: %s\n' % [
+                '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
+                for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
          if self.params.get('compat_opts'):
              self._write_string(
                  '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
@@ -3427,7 +3454,7 @@ def _write_subtitles(self, info_dict, filename):
              except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
                  self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
                  continue
-            return ret
+        return ret
  
      def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
          ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''