[ffmpeg] Cache version data

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index d85748fc98de2060c09c0d480a92b89d3bffc0a4..6a8e45b1adc6947f785d1b92e53fd1c6f2a1cf7b 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -32,6 +32,7 @@
  
  from .compat import (
      compat_basestring,
+    compat_brotli,
      compat_get_terminal_size,
      compat_kwargs,
      compat_numeric_types,
@@ -64,6 +65,7 @@
      ExistingVideoReached,
      expand_path,
      ExtractorError,
+    filter_dict,
      float_or_none,
      format_bytes,
      format_field,
@@ -71,6 +73,7 @@
      formatSeconds,
      GeoRestrictedError,
      get_domain,
+    has_certifi,
      HEADRequest,
      InAdvancePagedList,
      int_or_none,
@@ -83,7 +86,9 @@
      make_dir,
      make_HTTPS_handler,
      MaxDownloadsReached,
+    merge_headers,
      network_exceptions,
+    NO_DEFAULT,
      number_of_digits,
      orderedSet,
      OUTTMPL_TYPES,
@@ -233,6 +238,8 @@ class YoutubeDL(object):
                         See "Sorting Formats" for more details.
      format_sort_force: Force the given format_sort. see "Sorting Formats"
                         for more details.
+    prefer_free_formats: Whether to prefer video formats with free containers
+                       over non-free ones of same quality.
      allow_multiple_video_streams:   Allow multiple video streams to be merged
                         into a single file
      allow_multiple_audio_streams:   Allow multiple audio streams to be merged
@@ -332,6 +339,7 @@ class YoutubeDL(object):
      nocheckcertificate:  Do not verify SSL certificates
      prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
                         At the moment, this is only supported by YouTube.
+    http_headers:      A dictionary of custom headers to be used for all requests
      proxy:             URL of the proxy server to use
      geo_verification_proxy:  URL of the proxy to use for IP address verification
                         on geo-restricted sites.
@@ -507,23 +515,22 @@ class YoutubeDL(object):
          'track_number', 'disc_number', 'release_year',
      ))
  
+    _format_fields = {
+        # NB: Keep in sync with the docstring of extractor/common.py
+        'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
+        'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
+        'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
+        'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
+        'preference', 'language', 'language_preference', 'quality', 'source_preference',
+        'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
+        'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
+    }
      _format_selection_exts = {
          'audio': {'m4a', 'mp3', 'ogg', 'aac'},
          'video': {'mp4', 'flv', 'webm', '3gp'},
          'storyboards': {'mhtml'},
      }
  
-    params = None
-    _ies = {}
-    _pps = {k: [] for k in POSTPROCESS_WHEN}
-    _printed_messages = set()
-    _first_webpage_request = True
-    _download_retcode = None
-    _num_downloads = None
-    _playlist_level = 0
-    _playlist_urls = set()
-    _screen_file = None
-
      def __init__(self, params=None, auto_init=True):
          """Create a FileDownloader object with the given options.
          @param auto_init    Whether to load the default extractors and print header (if verbose).
@@ -531,6 +538,7 @@ def __init__(self, params=None, auto_init=True):
          """
          if params is None:
              params = {}
+        self.params = params
          self._ies = {}
          self._ies_instances = {}
          self._pps = {k: [] for k in POSTPROCESS_WHEN}
@@ -542,15 +550,21 @@ def __init__(self, params=None, auto_init=True):
          self._download_retcode = 0
          self._num_downloads = 0
          self._num_videos = 0
-        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
-        self._err_file = sys.stderr
-        self.params = params
+        self._playlist_level = 0
+        self._playlist_urls = set()
          self.cache = Cache(self)
  
          windows_enable_vt_mode()
+        self._out_files = {
+            'error': sys.stderr,
+            'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
+            'console': None if compat_os_name == 'nt' else next(
+                filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
+        }
+        self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
          self._allow_colors = {
-            'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
-            'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
+            type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
+            for type_ in ('screen', 'error')
          }
  
          if sys.version_info < (3, 6):
@@ -615,7 +629,7 @@ def check_deprecated(param, option, suggestion):
                  sp_kwargs = dict(
                      stdin=subprocess.PIPE,
                      stdout=slave,
-                    stderr=self._err_file)
+                    stderr=self._out_files['error'])
                  try:
                      self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
                  except OSError:
@@ -647,6 +661,9 @@ def check_deprecated(param, option, suggestion):
              else self.params['format'] if callable(self.params['format'])
              else self.build_format_selector(self.params['format']))
  
+        # Set http_headers defaults according to std_headers
+        self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
+
          self._setup_opener()
  
          if auto_init:
@@ -780,14 +797,24 @@ def _write_string(self, message, out=None, only_once=False):
              self._printed_messages.add(message)
          write_string(message, out=out, encoding=self.params.get('encoding'))
  
-    def to_stdout(self, message, skip_eol=False, quiet=False):
+    def to_stdout(self, message, skip_eol=False, quiet=None):
          """Print message to stdout"""
+        if quiet is not None:
+            self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
+        self._write_string(
+            '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+            self._out_files['print'])
+
+    def to_screen(self, message, skip_eol=False, quiet=None):
+        """Print message to screen if not in quiet mode"""
          if self.params.get('logger'):
              self.params['logger'].debug(message)
-        elif not quiet or self.params.get('verbose'):
-            self._write_string(
-                '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
-                self._err_file if quiet else self._screen_file)
+            return
+        if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
+            return
+        self._write_string(
+            '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+            self._out_files['screen'])
  
      def to_stderr(self, message, only_once=False):
          """Print message to stderr"""
@@ -795,7 +822,12 @@ def to_stderr(self, message, only_once=False):
          if self.params.get('logger'):
              self.params['logger'].error(message)
          else:
-            self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
+            self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
+
+    def _send_console_code(self, code):
+        if compat_os_name == 'nt' or not self._out_files['console']:
+            return
+        self._write_string(code, self._out_files['console'])
  
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
@@ -806,26 +838,18 @@ def to_console_title(self, message):
                  # c_wchar_p() might not be necessary if `message` is
                  # already of type unicode()
                  ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
-        elif 'TERM' in os.environ:
-            self._write_string('\033]0;%s\007' % message, self._screen_file)
+        else:
+            self._send_console_code(f'\033]0;{message}\007')
  
      def save_console_title(self):
-        if not self.params.get('consoletitle', False):
+        if not self.params.get('consoletitle') or self.params.get('simulate'):
              return
-        if self.params.get('simulate'):
-            return
-        if compat_os_name != 'nt' and 'TERM' in os.environ:
-            # Save the title on stack
-            self._write_string('\033[22;0t', self._screen_file)
+        self._send_console_code('\033[22;0t')  # Save the title on stack
  
      def restore_console_title(self):
-        if not self.params.get('consoletitle', False):
+        if not self.params.get('consoletitle') or self.params.get('simulate'):
              return
-        if self.params.get('simulate'):
-            return
-        if compat_os_name != 'nt' and 'TERM' in os.environ:
-            # Restore the title from stack
-            self._write_string('\033[23;0t', self._screen_file)
+        self._send_console_code('\033[23;0t')  # Restore the title from stack
  
      def __enter__(self):
          self.save_console_title()
@@ -871,11 +895,6 @@ def trouble(self, message=None, tb=None, is_error=True):
              raise DownloadError(message, exc_info)
          self._download_retcode = 1
  
-    def to_screen(self, message, skip_eol=False):
-        """Print message to stdout if not in quiet mode"""
-        self.to_stdout(
-            message, skip_eol, quiet=self.params.get('quiet', False))
-
      class Styles(Enum):
          HEADERS = 'yellow'
          EMPHASIS = 'light blue'
@@ -899,11 +918,11 @@ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_enc
  
      def _format_screen(self, *args, **kwargs):
          return self._format_text(
-            self._screen_file, self._allow_colors['screen'], *args, **kwargs)
+            self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
  
      def _format_err(self, *args, **kwargs):
          return self._format_text(
-            self._err_file, self._allow_colors['err'], *args, **kwargs)
+            self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
  
      def report_warning(self, message, only_once=False):
          '''
@@ -954,13 +973,13 @@ def report_file_delete(self, file_name):
          except UnicodeEncodeError:
              self.to_screen('Deleting existing file')
  
-    def raise_no_formats(self, info, forced=False):
+    def raise_no_formats(self, info, forced=False, *, msg=None):
          has_drm = info.get('__has_drm')
-        msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
-        expected = self.params.get('ignore_no_formats_error')
-        if forced or not expected:
+        ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
+        msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
+        if forced or not ignored:
              raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
-                                 expected=has_drm or expected)
+                                 expected=has_drm or ignored or expected)
          else:
              self.report_warning(msg)
  
@@ -1037,8 +1056,7 @@ def validate_outtmpl(cls, outtmpl):
      @staticmethod
      def _copy_infodict(info_dict):
          info_dict = dict(info_dict)
-        for key in ('__original_infodict', '__postprocessors'):
-            info_dict.pop(key, None)
+        info_dict.pop('__postprocessors', None)
          return info_dict
  
      def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
@@ -1083,10 +1101,11 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
              (?P<fields>{field})
              (?P<maths>(?:{math_op}{math_field})*)
              (?:>(?P<strf_format>.+?))?
-            (?P<alternate>(?<!\\),[^|&)]+)?
-            (?:&(?P<replacement>.*?))?
-            (?:\|(?P<default>.*?))?
-            $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+            (?P<remaining>
+                (?P<alternate>(?<!\\),[^|&)]+)?
+                (?:&(?P<replacement>.*?))?
+                (?:\|(?P<default>.*?))?
+            )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
  
          def _traverse_infodict(k):
              k = k.split('.')
@@ -1133,8 +1152,10 @@ def get_value(mdict):
          na = self.params.get('outtmpl_na_placeholder', 'NA')
  
          def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
-            return sanitize_filename(str(value), restricted=restricted,
-                                     is_id=re.search(r'(^|[_.])id(\.|$)', key))
+            return sanitize_filename(str(value), restricted=restricted, is_id=(
+                bool(re.search(r'(^|[_.])id(\.|$)', key))
+                if 'filename-sanitization' in self.params.get('compat_opts', [])
+                else NO_DEFAULT))
  
          sanitizer = sanitize if callable(sanitize) else filename_sanitizer
          sanitize = bool(sanitize)
@@ -1157,7 +1178,7 @@ def create_key(outer_mobj):
                  value = get_value(mobj)
                  replacement = mobj['replacement']
                  if value is None and mobj['alternate']:
-                    mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
+                    mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
                  else:
                      break
  
@@ -1423,7 +1444,7 @@ def progress(msg):
          min_wait, max_wait = self.params.get('wait_for_video')
          diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
          if diff is None and ie_result.get('live_status') == 'is_upcoming':
-            diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
+            diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
              self.report_warning('Release time of video is not known')
          elif (diff or 0) <= 0:
              self.report_warning('Video should already be available according to extracted info')
@@ -1554,13 +1575,9 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
              if not info:
                  return info
  
-            force_properties = dict(
-                (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
-                if f in force_properties:
-                    del force_properties[f]
              new_result = info.copy()
-            new_result.update(force_properties)
+            new_result.update(filter_dict(ie_result, lambda k, v: (
+                v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
  
              # Extracted info may not be a video result (i.e.
              # info.get('_type', 'video') != video) but rather an url or
@@ -1585,6 +1602,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
  
              self._playlist_level += 1
              self._playlist_urls.add(webpage_url)
+            self._fill_common_fields(ie_result, False)
              self._sanitize_thumbnails(ie_result)
              try:
                  return self.__process_playlist(ie_result, download)
@@ -1797,7 +1815,7 @@ def get_entry(i):
          ie_result['entries'] = playlist_results
  
          # Write the updated info to json
-        if _infojson_written and self._write_info_json(
+        if _infojson_written is True and self._write_info_json(
                  'updated playlist', ie_result,
                  self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
              return
@@ -2250,8 +2268,7 @@ def restore_last_token(self):
          return _build_selector_function(parsed_selector)
  
      def _calc_headers(self, info_dict):
-        res = std_headers.copy()
-        res.update(info_dict.get('http_headers') or {})
+        res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
  
          cookies = self._calc_cookies(info_dict)
          if cookies:
@@ -2309,6 +2326,58 @@ def check_thumbnails(thumbnails):
          else:
              info_dict['thumbnails'] = thumbnails
  
+    def _fill_common_fields(self, info_dict, is_video=True):
+        # TODO: move sanitization here
+        if is_video:
+            # playlists are allowed to lack "title"
+            info_dict['fulltitle'] = info_dict.get('title')
+            if 'title' not in info_dict:
+                raise ExtractorError('Missing "title" field in extractor result',
+                                     video_id=info_dict['id'], ie=info_dict['extractor'])
+            elif not info_dict.get('title'):
+                self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
+                info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
+
+        if info_dict.get('duration') is not None:
+            info_dict['duration_string'] = formatSeconds(info_dict['duration'])
+
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+                ('modified_timestamp', 'modified_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                except (ValueError, OverflowError, OSError):
+                    pass
+
+        live_keys = ('is_live', 'was_live')
+        live_status = info_dict.get('live_status')
+        if live_status is None:
+            for key in live_keys:
+                if info_dict.get(key) is False:
+                    continue
+                if info_dict.get(key):
+                    live_status = key
+                break
+            if all(info_dict.get(key) is False for key in live_keys):
+                live_status = 'not_live'
+        if live_status:
+            info_dict['live_status'] = live_status
+            for key in live_keys:
+                if info_dict.get(key) is None:
+                    info_dict[key] = (live_status == key)
+
+        # Auto generate title fields corresponding to the *_number fields when missing
+        # in order to always have clean titles. This is very common for TV series.
+        for field in ('chapter', 'season', 'episode'):
+            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
      def process_video_result(self, info_dict, download=True):
          assert info_dict.get('_type', 'video') == 'video'
          self._num_videos += 1
@@ -2318,14 +2387,6 @@ def process_video_result(self, info_dict, download=True):
          elif not info_dict.get('id'):
              raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
  
-        info_dict['fulltitle'] = info_dict.get('title')
-        if 'title' not in info_dict:
-            raise ExtractorError('Missing "title" field in extractor result',
-                                 video_id=info_dict['id'], ie=info_dict['extractor'])
-        elif not info_dict.get('title'):
-            self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
-            info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
-
          def report_force_conversion(field, field_not, conversion):
              self.report_warning(
                  '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
@@ -2348,6 +2409,8 @@ def sanitize_numeric_fields(info):
  
          sanitize_string_field(info_dict, 'id')
          sanitize_numeric_fields(info_dict)
+        if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
+            self.report_warning('"duration" field is negative, there is an error in extractor')
  
          if 'playlist' not in info_dict:
              # It isn't part of a playlist
@@ -2366,45 +2429,7 @@ def sanitize_numeric_fields(info):
          if info_dict.get('display_id') is None and 'id' in info_dict:
              info_dict['display_id'] = info_dict['id']
  
-        if info_dict.get('duration') is not None:
-            info_dict['duration_string'] = formatSeconds(info_dict['duration'])
-
-        for ts_key, date_key in (
-                ('timestamp', 'upload_date'),
-                ('release_timestamp', 'release_date'),
-                ('modified_timestamp', 'modified_date'),
-        ):
-            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
-                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-                # see http://bugs.python.org/issue1646728)
-                try:
-                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
-                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
-                except (ValueError, OverflowError, OSError):
-                    pass
-
-        live_keys = ('is_live', 'was_live')
-        live_status = info_dict.get('live_status')
-        if live_status is None:
-            for key in live_keys:
-                if info_dict.get(key) is False:
-                    continue
-                if info_dict.get(key):
-                    live_status = key
-                break
-            if all(info_dict.get(key) is False for key in live_keys):
-                live_status = 'not_live'
-        if live_status:
-            info_dict['live_status'] = live_status
-            for key in live_keys:
-                if info_dict.get(key) is None:
-                    info_dict[key] = (live_status == key)
-
-        # Auto generate title fields corresponding to the *_number fields when missing
-        # in order to always have clean titles. This is very common for TV series.
-        for field in ('chapter', 'season', 'episode'):
-            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
-                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+        self._fill_common_fields(info_dict)
  
          for cc_kind in ('subtitles', 'automatic_captions'):
              cc = info_dict.get(cc_kind)
@@ -2431,12 +2456,20 @@ def sanitize_numeric_fields(info):
          info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
          if not self.params.get('allow_unplayable_formats'):
              formats = [f for f in formats if not f.get('has_drm')]
+            if info_dict['__has_drm'] and all(
+                    f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+                self.report_warning(
+                    'This video is DRM protected and only images are available for download. '
+                    'Use --list-formats to see them')
  
-        if info_dict.get('is_live'):
-            get_from_start = bool(self.params.get('live_from_start'))
+        get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
+        if not get_from_start:
+            info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+        if info_dict.get('is_live') and formats:
              formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
-            if not get_from_start:
-                info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+            if get_from_start and not formats:
+                self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
+                                                     'If you want to download from the current time, pass --no-live-from-start')
  
          if not formats:
              self.raise_no_formats(info_dict)
@@ -2512,8 +2545,6 @@ def is_wellformed(f):
          if '__x_forwarded_for_ip' in info_dict:
              del info_dict['__x_forwarded_for_ip']
  
-        # TODO Central sorting goes here
-
          if self.params.get('check_formats') is True:
              formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
  
@@ -2526,6 +2557,12 @@ def is_wellformed(f):
  
          info_dict, _ = self.pre_process(info_dict)
  
+        if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
+            return info_dict
+
+        self.post_extract(info_dict)
+        info_dict, _ = self.pre_process(info_dict, 'after_filter')
+
          # The pre-processors may have modified the formats
          formats = info_dict.get('formats', [info_dict])
  
@@ -2596,8 +2633,9 @@ def is_wellformed(f):
  
          if not formats_to_download:
              if not self.params.get('ignore_no_formats_error'):
-                raise ExtractorError('Requested format is not available', expected=True,
-                                     video_id=info_dict['id'], ie=info_dict['extractor'])
+                raise ExtractorError(
+                    'Requested format is not available. Use --list-formats for a list of available formats',
+                    expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
              self.report_warning('Requested format is not available')
              # Process what we can, even without any available formats.
              formats_to_download = [{}]
@@ -2610,15 +2648,12 @@ def is_wellformed(f):
                      + ', '.join([f['format_id'] for f in formats_to_download]))
              max_downloads_reached = False
              for i, fmt in enumerate(formats_to_download):
-                formats_to_download[i] = new_info = dict(info_dict)
-                # Save a reference to the original info_dict so that it can be modified in process_info if needed
+                formats_to_download[i] = new_info = self._copy_infodict(info_dict)
                  new_info.update(fmt)
-                new_info['__original_infodict'] = info_dict
                  try:
                      self.process_info(new_info)
                  except MaxDownloadsReached:
                      max_downloads_reached = True
-                new_info.pop('__original_infodict')
                  # Remove copied info
                  for key, val in tuple(new_info.items()):
                      if info_dict.get(key) == val:
@@ -2735,8 +2770,9 @@ def format_tmpl(tmpl):
              filename = self.evaluate_outtmpl(file_tmpl, info_dict)
              tmpl = format_tmpl(tmpl)
              self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
-            with io.open(filename, 'a', encoding='utf-8') as f:
-                f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
+            if self._ensure_dir_exists(filename):
+                with io.open(filename, 'a', encoding='utf-8') as f:
+                    f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
  
      def __forced_printings(self, info_dict, filename, incomplete):
          def print_mandatory(field, actual_field=None):
@@ -2757,7 +2793,7 @@ def print_optional(field):
          if info_dict.get('requested_formats') is not None:
              # For RTMP URLs, also include the playpath
              info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
-        elif 'url' in info_dict:
+        elif info_dict.get('url'):
              info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
  
          if (self.params.get('forcejson')
@@ -2825,7 +2861,7 @@ def existing_file(self, filepaths, *, default_overwrite=True):
          return None
  
      def process_info(self, info_dict):
-        """Process a single resolved IE result. (Modified it in-place)"""
+        """Process a single resolved IE result. (Modifies it in-place)"""
  
          assert info_dict.get('_type', 'video') == 'video'
          original_infodict = info_dict
@@ -2833,10 +2869,13 @@ def process_info(self, info_dict):
          if 'format' not in info_dict and 'ext' in info_dict:
              info_dict['format'] = info_dict['ext']
  
+        # This is mostly just for backward compatibility of process_info
+        # As a side-effect, this allows for format-specific filters
          if self._match_entry(info_dict) is not None:
              info_dict['__write_download_archive'] = 'ignore'
              return
  
+        # Does nothing under normal operation - for backward compatibility of process_info
          self.post_extract(info_dict)
          self._num_downloads += 1
  
@@ -2907,9 +2946,11 @@ def process_info(self, info_dict):
  
          # Write internet shortcut files
          def _write_link_file(link_type):
-            if 'webpage_url' not in info_dict:
-                self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
-                return False
+            url = try_get(info_dict['webpage_url'], iri_to_uri)
+            if not url:
+                self.report_warning(
+                    f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
+                return True
              linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
              if not self._ensure_dir_exists(encodeFilename(linkfn)):
                  return False
@@ -2920,7 +2961,7 @@ def _write_link_file(link_type):
                  self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
                  with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
                               newline='\r\n' if link_type == 'url' else '\n') as linkfile:
-                    template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
+                    template_vars = {'url': url}
                      if link_type == 'desktop':
                          template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
                      linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
@@ -3256,17 +3297,14 @@ def sanitize_info(info_dict, remove_private_keys=False):
              return info_dict
          info_dict.setdefault('epoch', int(time.time()))
          info_dict.setdefault('_type', 'video')
-        remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
-        keep_keys = ['_type']  # Always keep this to facilitate load-info-json
+
          if remove_private_keys:
-            remove_keys |= {
+            reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
                  'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
                  'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
              }
-            reject = lambda k, v: k not in keep_keys and (
-                k.startswith('_') or k in remove_keys or v is None)
          else:
-            reject = lambda k, v: k in remove_keys
+            reject = lambda k, v: False
  
          def filter_fn(obj):
              if isinstance(obj, dict):
@@ -3293,14 +3331,8 @@ def actual_post_extract(info_dict):
                      actual_post_extract(video_dict or {})
                  return
  
-            post_extractor = info_dict.get('__post_extractor') or (lambda: {})
-            extra = post_extractor().items()
-            info_dict.update(extra)
-            info_dict.pop('__post_extractor', None)
-
-            original_infodict = info_dict.get('__original_infodict') or {}
-            original_infodict.update(extra)
-            original_infodict.pop('__post_extractor', None)
+            post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
+            info_dict.update(post_extractor())
  
          actual_post_extract(info_dict or {})
  
@@ -3578,7 +3610,7 @@ def print_debug_header(self):
              return
  
          def get_encoding(stream):
-            ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
+            ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
              if not supports_terminal_sequences(stream):
                  from .compat import WINDOWS_VT_MODE
                  ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
@@ -3587,7 +3619,7 @@ def get_encoding(stream):
          encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
              locale.getpreferredencoding(),
              sys.getfilesystemencoding(),
-            get_encoding(self._screen_file), get_encoding(self._err_file),
+            get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
              self.get_encoding())
  
          logger = self.params.get('logger')
@@ -3661,6 +3693,8 @@ def python_implementation():
          from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
  
          lib_str = join_nonempty(
+            compat_brotli and compat_brotli.__name__,
+            has_certifi and 'certifi',
              compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
              SECRETSTORAGE_AVAILABLE and 'secretstorage',
              has_mutagen and 'mutagen',
@@ -3752,7 +3786,7 @@ def get_encoding(self):
          return encoding
  
      def _write_info_json(self, label, ie_result, infofn, overwrite=None):
-        ''' Write infojson and returns True = written, False = skip, None = error '''
+        ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
          if overwrite is None:
              overwrite = self.params.get('overwrites', True)
          if not self.params.get('writeinfojson'):
@@ -3764,14 +3798,15 @@ def _write_info_json(self, label, ie_result, infofn, overwrite=None):
              return None
          elif not overwrite and os.path.exists(infofn):
              self.to_screen(f'[info] {label.title()} metadata is already present')
-        else:
-            self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
-            try:
-                write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
-            except (OSError, IOError):
-                self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
-                return None
-        return True
+            return 'exists'
+
+        self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
+        try:
+            write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+            return True
+        except (OSError, IOError):
+            self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
+            return None
  
      def _write_description(self, label, ie_result, descfn):
          ''' Write description and returns True = written, False = skip, None = error '''
@@ -3842,9 +3877,12 @@ def _write_subtitles(self, info_dict, filename):
                  sub_info['filepath'] = sub_filename
                  ret.append((sub_filename, sub_filename_final))
              except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
+                msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
                  if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
-                    raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
-                self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
+                    if not self.params.get('ignoreerrors'):
+                        self.report_error(msg)
+                    raise DownloadError(msg)
+                self.report_warning(msg)
          return ret
  
      def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):