Add new field `aspect_ratio`

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 18a52a855922f47a70a24546961c4e65c2c9f55d..94128bd84125ddd7708c6173608453efaa352826 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -66,6 +66,7 @@
      sanitize_filename,
      sanitize_url,
      sanitized_Request,
+    smuggle_url,
      str_or_none,
      str_to_int,
      strip_or_none,
@@ -149,7 +150,10 @@ class InfoExtractor:
                                   ("3D" or "DASH video")
                      * width      Width of the video, if known
                      * height     Height of the video, if known
+                    * aspect_ratio  Aspect ratio of the video, if known
+                                 Automatically calculated from width and height
                      * resolution Textual description of width and height
+                                 Automatically calculated from width and height
                      * dynamic_range The dynamic range of the video. One of:
                                   "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
                      * tbr        Average bitrate of audio and video in KBit/s
@@ -1107,7 +1111,9 @@ def get_param(self, name, default=None, *args, **kwargs):
              return self._downloader.params.get(name, default, *args, **kwargs)
          return default
  
-    def report_drm(self, video_id, partial=False):
+    def report_drm(self, video_id, partial=NO_DEFAULT):
+        if partial is not NO_DEFAULT:
+            self._downloader.deprecation_warning('InfoExtractor.report_drm no longer accepts the argument partial')
          self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
  
      def report_extraction(self, id_or_name):
@@ -1573,7 +1579,7 @@ def traverse_json_ld(json_ld, at_top_level=True):
                      continue
                  if at_top_level and set(e.keys()) == {'@context', '@graph'}:
                      traverse_json_ld(e['@graph'], at_top_level=False)
-                    break
+                    continue
                  if expected_type is not None and not is_type(e, expected_type):
                      continue
                  rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
@@ -1644,7 +1650,10 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
          FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
          js, arg_keys, arg_vals = self._search_regex(
              (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
-            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal)
+            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+            default=NO_DEFAULT if fatal else (None, None, None))
+        if js is None:
+            return {}
  
          args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
  
@@ -3670,12 +3679,13 @@ def _apply_first_set_cookie_header(self, url_handle, cookie):
  
      @classmethod
      def get_testcases(cls, include_onlymatching=False):
-        t = getattr(cls, '_TEST', None)
+        # Do not look in super classes
+        t = vars(cls).get('_TEST')
          if t:
              assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
              tests = [t]
          else:
-            tests = getattr(cls, '_TESTS', [])
+            tests = vars(cls).get('_TESTS', [])
          for t in tests:
              if not include_onlymatching and t.get('only_matching', False):
                  continue
@@ -3684,18 +3694,36 @@ def get_testcases(cls, include_onlymatching=False):
  
      @classmethod
      def get_webpage_testcases(cls):
-        tests = getattr(cls, '_WEBPAGE_TESTS', [])
+        tests = vars(cls).get('_WEBPAGE_TESTS', [])
          for t in tests:
              t['name'] = cls.ie_key()
          return tests
  
-    @classproperty
+    @classproperty(cache=True)
      def age_limit(cls):
          """Get age limit from the testcases"""
          return max(traverse_obj(
              (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
              (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
  
+    @classproperty(cache=True)
+    def _RETURN_TYPE(cls):
+        """What the extractor returns: "video", "playlist", "any", or None (Unknown)"""
+        tests = tuple(cls.get_testcases(include_onlymatching=False))
+        if not tests:
+            return None
+        elif not any(k.startswith('playlist') for test in tests for k in test):
+            return 'video'
+        elif all(any(k.startswith('playlist') for k in test) for test in tests):
+            return 'playlist'
+        return 'any'
+
+    @classmethod
+    def is_single_video(cls, url):
+        """Returns whether the URL is of a single video, None if unknown"""
+        assert cls.suitable(url), 'The URL must be suitable for the extractor'
+        return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
+
      @classmethod
      def is_suitable(cls, age_limit):
          """Test whether the extractor is generally suitable for the given age limit"""
@@ -3722,7 +3750,8 @@ def description(cls, *, markdown=True, search_examples=None):
          if not cls.working():
              desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
  
-        name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME
+        # Escape emojis. Ref: https://github.com/github/markup/issues/1153
+        name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
          return f'{name}:{desc}' if desc else name
  
      def extract_subtitles(self, *args, **kwargs):
@@ -3734,6 +3763,9 @@ def extract_subtitles(self, *args, **kwargs):
      def _get_subtitles(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
+    class CommentsDisabled(Exception):
+        """Raise in _get_comments if comments are disabled for the video"""
+
      def extract_comments(self, *args, **kwargs):
          if not self.get_param('getcomments'):
              return None
@@ -3749,6 +3781,8 @@ def extractor():
                  interrupted = False
              except KeyboardInterrupt:
                  self.to_screen('Interrupted by user')
+            except self.CommentsDisabled:
+                return {'comments': None, 'comment_count': None}
              except Exception as e:
                  if self.get_param('ignoreerrors') is not True:
                      raise
@@ -3817,9 +3851,11 @@ def geo_verification_headers(self):
      def _generic_id(url):
          return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
  
-    @staticmethod
-    def _generic_title(url):
-        return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
+    def _generic_title(self, url='', webpage='', *, default=None):
+        return (self._og_search_title(webpage, default=None)
+                or self._html_extract_title(webpage, default=None)
+                or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
+                or default)
  
      @staticmethod
      def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
@@ -3842,8 +3878,8 @@ def _configuration_arg(self, key, default=NO_DEFAULT, *, ie_key=None, casesense=
          @param default      The default value to return when the key is not present (default: [])
          @param casesense    When false, the values are converted to lower case
          '''
-        val = traverse_obj(
-            self._downloader.params, ('extractor_args', (ie_key or self.ie_key()).lower(), key))
+        ie_key = ie_key if isinstance(ie_key, str) else (ie_key or self).ie_key()
+        val = traverse_obj(self._downloader.params, ('extractor_args', ie_key.lower(), key))
          if val is None:
              return [] if default is NO_DEFAULT else default
          return list(val) if casesense else [x.lower() for x in val]
@@ -3873,6 +3909,12 @@ def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
      def RetryManager(self, **kwargs):
          return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
  
+    def _extract_generic_embeds(self, url, *args, info_dict={}, note='Extracting generic embeds', **kwargs):
+        display_id = traverse_obj(info_dict, 'display_id', 'id')
+        self.to_screen(f'{format_field(display_id, None, "%s: ")}{note}')
+        return self._downloader.get_info_extractor('Generic')._extract_embeds(
+            smuggle_url(url, {'block_ies': [self.ie_key()]}), *args, **kwargs)
+
      @classmethod
      def extract_from_webpage(cls, ydl, url, webpage):
          ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
@@ -3933,6 +3975,7 @@ class SearchInfoExtractor(InfoExtractor):
      """
  
      _MAX_RESULTS = float('inf')
+    _RETURN_TYPE = 'playlist'
  
      @classproperty
      def _VALID_URL(cls):