Add option `--use-extractors`

author pukkandan <redacted>

Wed, 24 Aug 2022 00:12:16 +0000 (05:42 +0530)

committer pukkandan <redacted>

Wed, 24 Aug 2022 02:17:51 +0000 (07:47 +0530)
author pukkandan <redacted>
Wed, 24 Aug 2022 00:12:16 +0000 (05:42 +0530)
committer pukkandan <redacted>
Wed, 24 Aug 2022 02:17:51 +0000 (07:47 +0530)
diff --git a/README.md b/README.md

index 7cfeec4f124b30cd68b01a22b36f56d6700fdeb9..aab20c079f65d2d379c1d6c235615a99a36e15f8 100644 (file)
--- a/README.md
+++ b/README.md
@@ -375,7 +375,13 @@ ## General Options:
      --list-extractors               List all supported extractors and exit
      --extractor-descriptions        Output descriptions of all supported
                                      extractors and exit
-    --force-generic-extractor       Force extraction to use the generic extractor
+    --use-extractors, --ies NAMES   Extractor names to use separated by commas.
+                                    You can also use regexes, "all", "default"
+                                    and "end" (end URL matching); e.g. --ies
+                                    "holodex.*,end,youtube". Prefix the name
+                                    with a "-" to exclude it, e.g. --ies
+                                    default,-generic. Use --list-extractors for
+                                    a list of available extractor names
      --default-search PREFIX         Use this prefix for unqualified URLs. E.g.
                                      "gvsearch2:python" downloads two videos from
                                      google videos for the search term "python".
@@ -2058,6 +2064,7 @@ #### Redundant options
  #### Not recommended
  While these options still work, their use is not recommended since there are other alternatives to achieve the same
  
+    --force-generic-extractor        --ies generic,default
      --exec-before-download CMD       --exec "before_dl:CMD"
      --no-exec-before-download        --no-exec
      --all-formats                    -f all
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 872e0bdc3c8b9d9a5f3493bc3b74c7b662716d85..a3d5620425ee62df6430f483b2d669f0f8695142 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -29,6 +29,7 @@
  from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  from .downloader.rtmp import rtmpdump_version
  from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.common import UnsupportedURLIE
  from .extractor.openload import PhantomJSwrapper
  from .minicurses import format_text
  from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
@@ -237,7 +238,7 @@ class YoutubeDL:
                         Default is 'only_download' for CLI, but False for API
      skip_playlist_after_errors: Number of allowed failures until the rest of
                         the playlist is skipped
-    force_generic_extractor: Force downloader to use the generic extractor
+    allowed_extractors:  List of regexes to match against extractor names that are allowed
      overwrites:        Overwrite all video and metadata files if True,
                         overwrite only non-video files if None
                         and don't overwrite any file if False
@@ -477,6 +478,8 @@ class YoutubeDL:
  
      The following options are deprecated and may be removed in the future:
  
+    force_generic_extractor: Force downloader to use the generic extractor
+                       - Use allowed_extractors = ['generic', 'default']
      playliststart:     - Use playlist_items
                         Playlist item to start at.
      playlistend:       - Use playlist_items
@@ -758,13 +761,6 @@ def add_info_extractor(self, ie):
              self._ies_instances[ie_key] = ie
              ie.set_downloader(self)
  
-    def _get_info_extractor_class(self, ie_key):
-        ie = self._ies.get(ie_key)
-        if ie is None:
-            ie = get_info_extractor(ie_key)
-            self.add_info_extractor(ie)
-        return ie
-
      def get_info_extractor(self, ie_key):
          """
          Get an instance of an IE with name ie_key, it will try to get one from
@@ -781,8 +777,19 @@ def add_default_info_extractors(self):
          """
          Add the InfoExtractors returned by gen_extractors to the end of the list
          """
-        for ie in gen_extractor_classes():
-            self.add_info_extractor(ie)
+        all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
+        all_ies['end'] = UnsupportedURLIE()
+        try:
+            ie_names = orderedSet_from_options(
+                self.params.get('allowed_extractors', ['default']), {
+                    'all': list(all_ies),
+                    'default': [name for name, ie in all_ies.items() if ie._ENABLED],
+                }, use_regex=True)
+        except re.error as e:
+            raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
+        for name in ie_names:
+            self.add_info_extractor(all_ies[name])
+        self.write_debug(f'Loaded {len(ie_names)} extractors')
  
      def add_post_processor(self, pp, when='post_process'):
          """Add a PostProcessor object to the end of the chain."""
@@ -1413,11 +1420,11 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
              ie_key = 'Generic'
  
          if ie_key:
-            ies = {ie_key: self._get_info_extractor_class(ie_key)}
+            ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
          else:
              ies = self._ies
  
-        for ie_key, ie in ies.items():
+        for key, ie in ies.items():
              if not ie.suitable(url):
                  continue
  
@@ -1426,14 +1433,16 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
                                      'and will probably not work.')
  
              temp_id = ie.get_temp_id(url)
-            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
-                self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
+                self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
                  if self.params.get('break_on_existing', False):
                      raise ExistingVideoReached()
                  break
-            return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
+            return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
          else:
-            self.report_error('no suitable InfoExtractor for URL %s' % url)
+            extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
+            self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
+                              tb=False if extractors_restricted else None)
  
      def _handle_extraction_exceptions(func):
          @functools.wraps(func)
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py

index 317dd26231cea426a8ffa8636e701d70ca5922fc..e9234e6f49186257c6e195aa5b742b9b7a6751b3 100644 (file)
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -766,6 +766,7 @@ def parse_options(argv=None):
          'windowsfilenames': opts.windowsfilenames,
          'ignoreerrors': opts.ignoreerrors,
          'force_generic_extractor': opts.force_generic_extractor,
+        'allowed_extractors': opts.allowed_extractors or ['default'],
          'ratelimit': opts.ratelimit,
          'throttledratelimit': opts.throttledratelimit,
          'overwrites': opts.overwrites,
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index a534703e53e71eb19e757e9415b2c390aea31412..6337a13a4427ee9ed46dbd50ecc05c36e9342524 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -480,6 +480,9 @@ class InfoExtractor:
      will be used by geo restriction bypass mechanism similarly
      to _GEO_COUNTRIES.
  
+    The _ENABLED attribute should be set to False for IEs that
+    are disabled by default and must be explicitly enabled.
+
      The _WORKING attribute should be set to False for broken IEs
      in order to warn the users and skip the tests.
      """
@@ -491,6 +494,7 @@ class InfoExtractor:
      _GEO_COUNTRIES = None
      _GEO_IP_BLOCKS = None
      _WORKING = True
+    _ENABLED = True
      _NETRC_MACHINE = None
      IE_DESC = None
      SEARCH_KEY = None
@@ -3941,3 +3945,12 @@ def _search_results(self, query):
      @classproperty
      def SEARCH_KEY(cls):
          return cls._SEARCH_KEY
+
+
+class UnsupportedURLIE(InfoExtractor):
+    _VALID_URL = '.*'
+    _ENABLED = False
+    IE_DESC = False
+
+    def _real_extract(self, url):
+        raise UnsupportedError(url)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py

index 0cddb7fd5268f59912cf6416bc83bc2711305149..bee531d1b518cd19e117862e0a0d39b704a3269c 100644 (file)
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -353,10 +353,20 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
          '--extractor-descriptions',
          action='store_true', dest='list_extractor_descriptions', default=False,
          help='Output descriptions of all supported extractors and exit')
+    general.add_option(
+        '--use-extractors', '--ies',
+        action='callback', dest='allowed_extractors', metavar='NAMES', type='str',
+        default=[], callback=_list_from_options_callback,
+        help=(
+            'Extractor names to use separated by commas. '
+            'You can also use regexes, "all", "default" and "end" (end URL matching); '
+            'e.g. --ies "holodex.*,end,youtube". '
+            'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. '
+            'Use --list-extractors for a list of available extractor names'))
      general.add_option(
          '--force-generic-extractor',
          action='store_true', dest='force_generic_extractor', default=False,
-        help='Force extraction to use the generic extractor')
+        help=optparse.SUPPRESS_HELP)
      general.add_option(
          '--default-search',
          dest='default_search', metavar='PREFIX',
author	pukkandan <redacted>
	Wed, 24 Aug 2022 00:12:16 +0000 (05:42 +0530)
committer	pukkandan <redacted>
	Wed, 24 Aug 2022 02:17:51 +0000 (07:47 +0530)
README.md		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/__init__.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/options.py		patch \| blob \| blame \| history