[cleanup] Misc

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index e1bbb01fa21c7014ebf0e2bc903196b455c367e1..3cfd0a69973427b4bde90ba7af963a7f971d8d84 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -48,7 +48,7 @@
      get_postprocessor,
  )
  from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
-from .update import detect_variant
+from .update import REPOSITORY, current_git_head, detect_variant
  from .utils import (
      DEFAULT_OUTTMPL,
      IDENTITY,
@@ -90,6 +90,7 @@
      args_to_str,
      bug_reports_message,
      date_from_str,
+    deprecation_warning,
      determine_ext,
      determine_protocol,
      encode_compat_str,
@@ -107,6 +108,7 @@
      get_domain,
      int_or_none,
      iri_to_uri,
+    is_path_like,
      join_nonempty,
      locked_file,
      make_archive_id,
@@ -250,8 +252,8 @@ class YoutubeDL:
      matchtitle:        Download only matching titles.
      rejecttitle:       Reject downloads for matching titles.
      logger:            Log messages to a logging.Logger instance.
-    logtostderr:       Log messages to stderr instead of stdout.
-    consoletitle:       Display progress in console window's titlebar.
+    logtostderr:       Print everything to stderr instead of stdout.
+    consoletitle:      Display progress in console window's titlebar.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
      clean_infojson:    Remove private fields from the infojson
@@ -292,9 +294,8 @@ class YoutubeDL:
                         downloaded.
                         Videos without view count information are always
                         downloaded. None for no limit.
-    download_archive:  File name of a file where all downloads are recorded.
-                       Videos already present in the file are not downloaded
-                       again.
+    download_archive:  A set, or the name of a file where all downloads are recorded.
+                       Videos already present in the file are not downloaded again.
      break_on_existing: Stop the download process after attempting to download a
                         file that is in the archive.
      break_on_reject:   Stop the download process when encountering a video that
@@ -303,8 +304,9 @@ class YoutubeDL:
                         should act on each input URL as opposed to for the entire queue
      cookiefile:        File name or text stream from where cookies should be read and dumped to
      cookiesfrombrowser:  A tuple containing the name of the browser, the profile
-                       name/path from where cookies are loaded, and the name of the
-                       keyring, e.g. ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
+                       name/path from where cookies are loaded, the name of the keyring,
+                       and the container name, e.g. ('chrome', ) or
+                       ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
      legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
                         support RFC 5746 secure renegotiation
      nocheckcertificate:  Do not verify SSL certificates
@@ -631,7 +633,7 @@ def check_deprecated(param, option, suggestion):
          for msg in self.params.get('_warnings', []):
              self.report_warning(msg)
          for msg in self.params.get('_deprecation_warnings', []):
-            self.deprecation_warning(msg)
+            self.deprecated_feature(msg)
  
          self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
          if 'list-formats' in self.params['compat_opts']:
@@ -721,21 +723,23 @@ def check_deprecated(param, option, suggestion):
  
          def preload_download_archive(fn):
              """Preload the archive, if any is specified"""
+            archive = set()
              if fn is None:
-                return False
+                return archive
+            elif not is_path_like(fn):
+                return fn
+
              self.write_debug(f'Loading archive file {fn!r}')
              try:
                  with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                      for line in archive_file:
-                        self.archive.add(line.strip())
+                        archive.add(line.strip())
              except OSError as ioe:
                  if ioe.errno != errno.ENOENT:
                      raise
-                return False
-            return True
+            return archive
  
-        self.archive = set()
-        preload_download_archive(self.params.get('download_archive'))
+        self.archive = preload_download_archive(self.params.get('download_archive'))
  
      def warn_if_short_id(self, argv):
          # short YouTube ID starting with dash?
@@ -835,9 +839,11 @@ def _write_string(self, message, out=None, only_once=False):
      def to_stdout(self, message, skip_eol=False, quiet=None):
          """Print message to stdout"""
          if quiet is not None:
-            self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
+            self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
+                                     'Use "YoutubeDL.to_screen" instead')
          if skip_eol is not False:
-            self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
+            self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
+                                     'Use "YoutubeDL.to_screen" instead')
          self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
  
      def to_screen(self, message, skip_eol=False, quiet=None):
@@ -973,11 +979,14 @@ def report_warning(self, message, only_once=False):
                  return
              self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
  
-    def deprecation_warning(self, message):
+    def deprecation_warning(self, message, *, stacklevel=0):
+        deprecation_warning(
+            message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
+
+    def deprecated_feature(self, message):
          if self.params.get('logger') is not None:
-            self.params['logger'].warning(f'DeprecationWarning: {message}')
-        else:
-            self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+            self.params['logger'].warning(f'Deprecated Feature: {message}')
+        self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
  
      def report_error(self, message, *args, **kwargs):
          '''
@@ -1037,7 +1046,7 @@ def _parse_outtmpl(self):
  
      def get_output_path(self, dir_type='', filename=None):
          paths = self.params.get('paths', {})
-        assert isinstance(paths, dict)
+        assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
          path = os.path.join(
              expand_path(paths.get('home', '').strip()),
              expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
@@ -1120,8 +1129,12 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
              '-': float.__sub__,
          }
          # Field is of the form key1.key2...
-        # where keys (except first) can be string, int or slice
-        FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
+        # where keys (except first) can be string, int, slice or "{field, ...}"
+        FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
+        FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
+            'inner': FIELD_INNER_RE,
+            'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
+        }
          MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
          MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
          INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
@@ -1135,11 +1148,20 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
                  (?:\|(?P<default>.*?))?
              )$''')
  
-        def _traverse_infodict(k):
-            k = k.split('.')
-            if k[0] == '':
-                k.pop(0)
-            return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
+        def _traverse_infodict(fields):
+            fields = [f for x in re.split(r'\.({.+?})\.?', fields)
+                      for f in ([x] if x.startswith('{') else x.split('.'))]
+            for i in (0, -1):
+                if fields and not fields[i]:
+                    fields.pop(i)
+
+            for i, f in enumerate(fields):
+                if not f.startswith('{'):
+                    continue
+                assert f.endswith('}'), f'No closing brace for {f} in {fields}'
+                fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
+
+            return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
  
          def get_value(mdict):
              # Object traversal
@@ -1225,9 +1247,11 @@ def create_key(outer_mobj):
                  delim = '\n' if '#' in flags else ', '
                  value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
              elif fmt[-1] == 'j':  # json
-                value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
+                value, fmt = json.dumps(
+                    value, default=_dumpjson_default,
+                    indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt
              elif fmt[-1] == 'h':  # html
-                value, fmt = escapeHTML(value), str_fmt
+                value, fmt = escapeHTML(str(value)), str_fmt
              elif fmt[-1] == 'q':  # quoted
                  value = map(str, variadic(value) if '#' in flags else [value])
                  value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
@@ -1399,18 +1423,19 @@ def add_extra_info(info_dict, extra_info):
      def extract_info(self, url, download=True, ie_key=None, extra_info=None,
                       process=True, force_generic_extractor=False):
          """
-        Return a list with a dictionary for each video extracted.
+        Extract and return the information dictionary of the URL
  
          Arguments:
-        url -- URL to extract
+        @param url          URL to extract
  
          Keyword arguments:
-        download -- whether to download videos during extraction
-        ie_key -- extractor key hint
-        extra_info -- dictionary containing the extra values to add to each result
-        process -- whether to resolve all unresolved references (URLs, playlist items),
-            must be True for download to work.
-        force_generic_extractor -- force using the generic extractor
+        @param download     Whether to download videos
+        @param process      Whether to resolve all unresolved references (URLs, playlist items).
+                            Must be True for download to work
+        @param ie_key       Use only the extractor with this key
+
+        @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
+        @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
          """
  
          if extra_info is None:
@@ -2505,11 +2530,11 @@ def sanitize_numeric_fields(info):
          info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
          if not self.params.get('allow_unplayable_formats'):
              formats = [f for f in formats if not f.get('has_drm')]
-            if info_dict['_has_drm'] and formats and all(
-                    f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
-                self.report_warning(
-                    'This video is DRM protected and only images are available for download. '
-                    'Use --list-formats to see them')
+
+        if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+            self.report_warning(
+                f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
+                'only images are available for download. Use --list-formats to see them'.capitalize())
  
          get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
          if not get_from_start:
@@ -2521,9 +2546,6 @@ def sanitize_numeric_fields(info):
                      '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
                      'If you want to download from the current time, use --no-live-from-start'))
  
-        if not formats:
-            self.raise_no_formats(info_dict)
-
          def is_wellformed(f):
              url = f.get('url')
              if not url:
@@ -2536,7 +2558,10 @@ def is_wellformed(f):
              return True
  
          # Filter out malformed formats for better extraction robustness
-        formats = list(filter(is_wellformed, formats))
+        formats = list(filter(is_wellformed, formats or []))
+
+        if not formats:
+            self.raise_no_formats(info_dict)
  
          formats_dict = {}
  
@@ -2738,9 +2763,9 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
                  if lang not in available_subs:
                      available_subs[lang] = cap_info
  
-        if (not self.params.get('writesubtitles') and not
-                self.params.get('writeautomaticsub') or not
-                available_subs):
+        if not available_subs or (
+                not self.params.get('writesubtitles')
+                and not self.params.get('writeautomaticsub')):
              return None
  
          all_sub_langs = tuple(available_subs.keys())
@@ -2757,7 +2782,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
          else:
              requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
          if requested_langs:
-            self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
+            self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
  
          formats_query = self.params.get('subtitlesformat', 'best')
          formats_preference = formats_query.split('/') if formats_query else []
@@ -2793,12 +2818,16 @@ def _forceprint(self, key, info_dict):
          info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
  
          def format_tmpl(tmpl):
-            mobj = re.match(r'\w+(=?)$', tmpl)
-            if mobj and mobj.group(1):
-                return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
-            elif mobj:
-                return f'%({tmpl})s'
-            return tmpl
+            mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
+            if not mobj:
+                return tmpl
+
+            fmt = '%({})s'
+            if tmpl.startswith('{'):
+                tmpl = f'.{tmpl}'
+            if tmpl.endswith('='):
+                tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
+            return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
  
          for tmpl in self.params['forceprint'].get(key, []):
              self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
@@ -3314,6 +3343,12 @@ def sanitize_info(info_dict, remove_private_keys=False):
              return info_dict
          info_dict.setdefault('epoch', int(time.time()))
          info_dict.setdefault('_type', 'video')
+        info_dict.setdefault('_version', {
+            'version': __version__,
+            'current_git_head': current_git_head(),
+            'release_git_head': RELEASE_GIT_HEAD,
+            'repository': REPOSITORY,
+        })
  
          if remove_private_keys:
              reject = lambda k, v: v is None or k.startswith('__') or k in {
@@ -3434,8 +3469,7 @@ def _make_archive_id(self, info_dict):
          return make_archive_id(extractor, video_id)
  
      def in_download_archive(self, info_dict):
-        fn = self.params.get('download_archive')
-        if fn is None:
+        if not self.archive:
              return False
  
          vid_ids = [self._make_archive_id(info_dict)]
@@ -3448,9 +3482,11 @@ def record_download_archive(self, info_dict):
              return
          vid_id = self._make_archive_id(info_dict)
          assert vid_id
+
          self.write_debug(f'Adding to archive: {vid_id}')
-        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
-            archive_file.write(vid_id + '\n')
+        if isinstance(fn, os.PathLike):
+            with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+                archive_file.write(vid_id + '\n')
          self.archive.add(vid_id)
  
      @staticmethod
@@ -3678,7 +3714,8 @@ def get_encoding(stream):
          if VARIANT not in (None, 'pip'):
              source += '*'
          write_debug(join_nonempty(
-            'yt-dlp version', __version__,
+            f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
+            __version__,
              f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
              '' if source == 'unknown' else f'({source})',
              delim=' '))
@@ -3694,18 +3731,8 @@ def get_encoding(stream):
          if self.params['compat_opts']:
              write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
  
-        if source == 'source':
-            try:
-                stdout, _, _ = Popen.run(
-                    ['git', 'rev-parse', '--short', 'HEAD'],
-                    text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
-                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                if re.fullmatch('[0-9a-f]+', stdout.strip()):
-                    write_debug(f'Git HEAD: {stdout.strip()}')
-            except Exception:
-                with contextlib.suppress(Exception):
-                    sys.exc_clear()
-
+        if current_git_head():
+            write_debug(f'Git HEAD: {current_git_head()}')
          write_debug(system_identifier())
  
          exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)