Add option `--use-extractors`

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 0ae0f4301288bea06017b7da5b26f6894b131e7e..6337a13a4427ee9ed46dbd50ecc05c36e9342524 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -154,6 +154,7 @@ class InfoExtractor:
                      * abr        Average audio bitrate in KBit/s
                      * acodec     Name of the audio codec in use
                      * asr        Audio sampling rate in Hertz
+                    * audio_channels  Number of audio channels
                      * vbr        Average video bitrate in KBit/s
                      * fps        Frame rate
                      * vcodec     Name of the video codec in use
@@ -316,7 +317,7 @@ class InfoExtractor:
                      live stream that goes on instead of a fixed-length video.
      was_live:       True, False, or None (=unknown). Whether this video was
                      originally a live stream.
-    live_status:    None (=unknown), 'is_live', 'is_upcoming', 'was_live', 'not_live'
+    live_status:    None (=unknown), 'is_live', 'is_upcoming', 'was_live', 'not_live',
                      or 'post_live' (was live, but VOD is not yet processed)
                      If absent, automatically set from is_live, was_live
      start_time:     Time in seconds where the reproduction should start, as
@@ -330,7 +331,7 @@ class InfoExtractor:
      playable_in_embed: Whether this video is allowed to play in embedded
                      players on other sites. Can be True (=always allowed),
                      False (=never allowed), None (=unknown), or a string
-                    specifying the criteria for embedability (Eg: 'whitelist')
+                    specifying the criteria for embedability; e.g. 'whitelist'
      availability:   Under what condition the video is available. One of
                      'private', 'premium_only', 'subscriber_only', 'needs_auth',
                      'unlisted' or 'public'. Use 'InfoExtractor._availability'
@@ -451,8 +452,8 @@ class InfoExtractor:
  
      _extract_from_webpage may raise self.StopExtraction() to stop further
      processing of the webpage and obtain exclusive rights to it. This is useful
-    when the extractor cannot reliably be matched using just the URL.
-    Eg: invidious/peertube instances
+    when the extractor cannot reliably be matched using just the URL,
+    e.g. invidious/peertube instances
  
      Embed-only extractors can be defined by setting _VALID_URL = False.
  
@@ -479,6 +480,9 @@ class InfoExtractor:
      will be used by geo restriction bypass mechanism similarly
      to _GEO_COUNTRIES.
  
+    The _ENABLED attribute should be set to False for IEs that
+    are disabled by default and must be explicitly enabled.
+
      The _WORKING attribute should be set to False for broken IEs
      in order to warn the users and skip the tests.
      """
@@ -490,6 +494,7 @@ class InfoExtractor:
      _GEO_COUNTRIES = None
      _GEO_IP_BLOCKS = None
      _WORKING = True
+    _ENABLED = True
      _NETRC_MACHINE = None
      IE_DESC = None
      SEARCH_KEY = None
@@ -1668,8 +1673,8 @@ class FormatSort:
          regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
  
          default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
-                   'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr',
-                   'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
+                   'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
+                   'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
          ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
                          'height', 'width', 'proto', 'vext', 'abr', 'aext',
                          'fps', 'fs_approx', 'source', 'id')
@@ -1704,6 +1709,7 @@ class FormatSort:
              'height': {'convert': 'float_none'},
              'width': {'convert': 'float_none'},
              'fps': {'convert': 'float_none'},
+            'channels': {'convert': 'float_none', 'field': 'audio_channels'},
              'tbr': {'convert': 'float_none'},
              'vbr': {'convert': 'float_none'},
              'abr': {'convert': 'float_none'},
@@ -1717,13 +1723,14 @@ class FormatSort:
              'res': {'type': 'multiple', 'field': ('height', 'width'),
                      'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
  
-            # For compatibility with youtube-dl
+            # Actual field names
              'format_id': {'type': 'alias', 'field': 'id'},
              'preference': {'type': 'alias', 'field': 'ie_pref'},
              'language_preference': {'type': 'alias', 'field': 'lang'},
              'source_preference': {'type': 'alias', 'field': 'source'},
              'protocol': {'type': 'alias', 'field': 'proto'},
              'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
+            'audio_channels': {'type': 'alias', 'field': 'channels'},
  
              # Deprecated
              'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
@@ -2364,7 +2371,7 @@ def build_stream_name():
                      audio_group_id = last_stream_inf.get('AUDIO')
                      # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
                      # references a rendition group MUST have a CODECS attribute.
-                    # However, this is not always respected, for example, [2]
+                    # However, this is not always respected. E.g. [2]
                      # contains EXT-X-STREAM-INF tag which references AUDIO
                      # rendition group but does not have CODECS and despite
                      # referencing an audio group it represents a complete
@@ -3000,8 +3007,8 @@ def add_segment_url():
                                      segment_number += 1
                                  segment_time += segment_d
                      elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
-                        # No media template
-                        # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+                        # No media template,
+                        # e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI
                          # or any YouTube dashsegments video
                          fragments = []
                          segment_index = 0
@@ -3018,7 +3025,7 @@ def add_segment_url():
                          representation_ms_info['fragments'] = fragments
                      elif 'segment_urls' in representation_ms_info:
                          # Segment URLs with no SegmentTimeline
-                        # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+                        # E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
                          # https://github.com/ytdl-org/youtube-dl/pull/14844
                          fragments = []
                          segment_duration = float_or_none(
@@ -3246,8 +3253,8 @@ def _media_formats(src, cur_media_type, type_info=None):
          media_tags.extend(re.findall(
              # We only allow video|audio followed by a whitespace or '>'.
              # Allowing more characters may end up in significant slow down (see
-            # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
-            # http://www.porntrex.com/maps/videositemap.xml).
+            # https://github.com/ytdl-org/youtube-dl/issues/11979,
+            # e.g. http://www.porntrex.com/maps/videositemap.xml).
              r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
          for media_tag, _, media_type, media_content in media_tags:
              media_info = {
@@ -3703,7 +3710,7 @@ def description(cls, *, markdown=True, search_examples=None):
              desc += f'; "{cls.SEARCH_KEY}:" prefix'
              if search_examples:
                  _COUNTS = ('', '5', '10', 'all')
-                desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
+                desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
          if not cls.working():
              desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
  
@@ -3938,3 +3945,12 @@ def _search_results(self, query):
      @classproperty
      def SEARCH_KEY(cls):
          return cls._SEARCH_KEY
+
+
+class UnsupportedURLIE(InfoExtractor):
+    _VALID_URL = '.*'
+    _ENABLED = False
+    IE_DESC = False
+
+    def _real_extract(self, url):
+        raise UnsupportedError(url)