Improve handling for overriding extractors with plugins (#5916)

author Matthew <redacted>

Mon, 2 Jan 2023 04:55:11 +0000 (04:55 +0000)

committer GitHub <redacted>

Mon, 2 Jan 2023 04:55:11 +0000 (04:55 +0000)
author Matthew <redacted>
Mon, 2 Jan 2023 04:55:11 +0000 (04:55 +0000)
committer GitHub <redacted>
Mon, 2 Jan 2023 04:55:11 +0000 (04:55 +0000)
diff --git a/README.md b/README.md

index 83e69a236bcee9b09f858167e91519891d97fd1f..c4bd6ef0c78b0f282006ce5c2e430291a763f663 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1841,7 +1841,7 @@ ## Installing Plugins
      * Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
  
  3. **pip and other locations in `PYTHONPATH`**
-    * Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
+    * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
        * Note: plugin files between plugin packages installed with pip must have unique filenames
      * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder.
        * Note: This does not apply for Pyinstaller/py2exe builds.
@@ -1854,9 +1854,12 @@ ## Installing Plugins
  
  ## Developing Plugins
  
-See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. 
+See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. 
  
-All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`)
+All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`)
+
+To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). 
+Due to the mechanics behind this, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
  
  If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability
  
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 8ce71a2dc637a15d558f3538c30e6291b29c24a3..e7b4690590b6de3dbbbe874f245f328e12e035db 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -33,7 +33,7 @@
  from .extractor.openload import PhantomJSwrapper
  from .minicurses import format_text
  from .plugins import directories as plugin_directories
-from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
+from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  from .postprocessor import (
      EmbedThumbnailPP,
      FFmpegFixupDuplicateMoovPP,
@@ -3730,7 +3730,10 @@ def print_debug_header(self):
  
          # These imports can be slow. So import them only as needed
          from .extractor.extractors import _LAZY_LOADER
-        from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
+        from .extractor.extractors import (
+            _PLUGIN_CLASSES as plugin_ies,
+            _PLUGIN_OVERRIDES as plugin_ie_overrides
+        )
  
          def get_encoding(stream):
              ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
@@ -3808,12 +3811,17 @@ def get_encoding(stream):
                  proxy_map.update(handler.proxies)
          write_debug(f'Proxy map: {proxy_map}')
  
-        for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items():
-            if not plugins:
-                continue
-            write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % (
+        for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
+            display_list = ['%s%s' % (
                  klass.__name__, '' if klass.__name__ == name else f' as {name}')
-                for name, klass in plugins.items())))))
+                for name, klass in plugins.items()]
+            if plugin_type == 'Extractor':
+                display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
+                                    for parent, plugins in plugin_ie_overrides.items())
+            if not display_list:
+                continue
+            write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
+
          plugin_dirs = plugin_directories()
          if plugin_dirs:
              write_debug(f'Plugin directories: {plugin_dirs}')
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 9031f3c1163565d797d672a53a0d304e7d632794..f48b97a6b6d75ecdee3672398d2ce86bd0cce3f6 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3442,13 +3442,17 @@ def get_testcases(cls, include_onlymatching=False):
                  continue
              t['name'] = cls.ie_key()
              yield t
+        if getattr(cls, '__wrapped__', None):
+            yield from cls.__wrapped__.get_testcases(include_onlymatching)
  
      @classmethod
      def get_webpage_testcases(cls):
          tests = vars(cls).get('_WEBPAGE_TESTS', [])
          for t in tests:
              t['name'] = cls.ie_key()
-        return tests
+            yield t
+        if getattr(cls, '__wrapped__', None):
+            yield from cls.__wrapped__.get_webpage_testcases()
  
      @classproperty(cache=True)
      def age_limit(cls):
@@ -3710,10 +3714,12 @@ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
          if plugin_name:
              mro = inspect.getmro(cls)
              super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
-            cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key
+            cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
+            cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
              while getattr(super_class, '__wrapped__', None):
                  super_class = super_class.__wrapped__
              setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
+            _PLUGIN_OVERRIDES[super_class].append(cls)
  
          return super().__init_subclass__(**kwargs)
  
@@ -3770,3 +3776,6 @@ class UnsupportedURLIE(InfoExtractor):
  
      def _real_extract(self, url):
          raise UnsupportedError(url)
+
+
+_PLUGIN_OVERRIDES = collections.defaultdict(list)
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py

index beda02917e3f7c050496d1e617e27b4b9b54e317..baa69d2421cd4a10dc815b206e24713583c8660c 100644 (file)
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -24,3 +24,5 @@
  
  globals().update(_PLUGIN_CLASSES)
  _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
+
+from .common import _PLUGIN_OVERRIDES  # noqa: F401
diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py

index dccca100467315302e8e1a4cf4d0696696419fdf..0da01aa53e2b337b25637781f71ac9136acdabfe 100644 (file)
--- a/yt_dlp/extractor/testurl.py
+++ b/yt_dlp/extractor/testurl.py
@@ -23,11 +23,12 @@ def _real_extract(self, url):
          if len(matching_extractors) == 0:
              raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True)
          elif len(matching_extractors) > 1:
-            try:  # Check for exact match
-                extractor = next(
-                    ie for ie in matching_extractors
-                    if ie.IE_NAME.lower() == extractor_id.lower())
-            except StopIteration:
+            extractor = next((  # Check for exact match
+                ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()
+            ), None) or next((  # Check for exact match without plugin suffix
+                ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower()
+            ), None)
+            if not extractor:
                  raise ExtractorError(
                      'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
                      expected=True)
author	Matthew <redacted>
	Mon, 2 Jan 2023 04:55:11 +0000 (04:55 +0000)
committer	GitHub <redacted>
	Mon, 2 Jan 2023 04:55:11 +0000 (04:55 +0000)
README.md		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/testurl.py		patch \| blob \| blame \| history