[extractor, test] Basic framework for embed tests (#4307)

author pukkandan <redacted>

Fri, 8 Jul 2022 11:23:05 +0000 (16:53 +0530)

committer pukkandan <redacted>

Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
author pukkandan <redacted>
Fri, 8 Jul 2022 11:23:05 +0000 (16:53 +0530)
committer pukkandan <redacted>
Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
diff --git a/test/helper.py b/test/helper.py

index f19e1a34fce4f5679265eada0409d30cadba0a3a..e918d8c4693e24625ab9b7eb144101fea5f460f2 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -92,6 +92,13 @@ def gettestcases(include_onlymatching=False):
          yield from ie.get_testcases(include_onlymatching)
  
  
+def getwebpagetestcases():
+    for ie in yt_dlp.extractor.gen_extractors():
+        for tc in ie.get_webpage_testcases():
+            tc.setdefault('add_ie', []).append('Generic')
+            yield tc
+
+
  md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
  
  
diff --git a/test/test_download.py b/test/test_download.py

index c9f5e735c217c365a6ea06e33a8e02a0487ed171..787013c3422102420896d1e9f1fa59b128125c77 100755 (executable)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -8,6 +8,7 @@
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  
+import collections
  import hashlib
  import http.client
  import json
@@ -20,6 +21,7 @@
      expect_warnings,
      get_params,
      gettestcases,
+    getwebpagetestcases,
      is_download_test,
      report_warning,
      try_rm,
@@ -32,6 +34,7 @@
      ExtractorError,
      UnavailableVideoError,
      format_bytes,
+    join_nonempty,
  )
  
  RETRIES = 3
@@ -57,7 +60,9 @@ def _file_md5(fn):
          return hashlib.md5(f.read()).hexdigest()
  
  
-defs = gettestcases()
+normal_test_cases = gettestcases()
+webpage_test_cases = getwebpagetestcases()
+tests_counter = collections.defaultdict(collections.Counter)
  
  
  @is_download_test
@@ -72,24 +77,13 @@ class TestDownload(unittest.TestCase):
  
      def __str__(self):
          """Identify each test with the `add_ie` attribute, if available."""
+        cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie
+        return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:'
  
-        def strclass(cls):
-            """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
-            return f'{cls.__module__}.{cls.__name__}'
-
-        add_ie = getattr(self, self._testMethodName).add_ie
-        return '%s (%s)%s:' % (self._testMethodName,
-                               strclass(self.__class__),
-                               ' [%s]' % add_ie if add_ie else '')
-
-    def setUp(self):
-        self.defs = defs
  
  # Dynamically generate tests
  
-
  def generator(test_case, tname):
-
      def test_template(self):
          if self.COMPLETED_TESTS.get(tname):
              return
@@ -255,39 +249,43 @@ def try_rm_tcs_files(tcs=None):
  
  
  # And add them to TestDownload
-tests_counter = {}
-for test_case in defs:
-    name = test_case['name']
-    i = tests_counter.get(name, 0)
-    tests_counter[name] = i + 1
-    tname = f'test_{name}_{i}' if i else f'test_{name}'
-    test_method = generator(test_case, tname)
-    test_method.__name__ = str(tname)
-    ie_list = test_case.get('add_ie')
-    test_method.add_ie = ie_list and ','.join(ie_list)
-    setattr(TestDownload, test_method.__name__, test_method)
-    del test_method
+def inject_tests(test_cases, label=''):
+    for test_case in test_cases:
+        name = test_case['name']
+        tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_')
+        tests_counter[name][label] += 1
  
+        test_method = generator(test_case, tname)
+        test_method.__name__ = tname
+        test_method.add_ie = ','.join(test_case.get('add_ie', []))
+        setattr(TestDownload, test_method.__name__, test_method)
  
-def batch_generator(name, num_tests):
  
+inject_tests(normal_test_cases)
+
+# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction
+inject_tests(webpage_test_cases, 'webpage')
+
+
+def batch_generator(name):
      def test_template(self):
-        for i in range(num_tests):
-            test_name = f'test_{name}_{i}' if i else f'test_{name}'
-            try:
-                getattr(self, test_name)()
-            except unittest.SkipTest:
-                print(f'Skipped {test_name}')
+        for label, num_tests in tests_counter[name].items():
+            for i in range(num_tests):
+                test_name = join_nonempty('test', name, label, i, delim='_')
+                try:
+                    getattr(self, test_name)()
+                except unittest.SkipTest:
+                    print(f'Skipped {test_name}')
  
      return test_template
  
  
-for name, num_tests in tests_counter.items():
-    test_method = batch_generator(name, num_tests)
+for name in tests_counter:
+    test_method = batch_generator(name)
      test_method.__name__ = f'test_{name}_all'
      test_method.add_ie = ''
      setattr(TestDownload, test_method.__name__, test_method)
-    del test_method
+del test_method
  
  
  if __name__ == '__main__':
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index b8347fe4cf1767d7d7338afea8e242c02d499356..317aa270e70f0600fa268e33bdfbdfb9248636c2 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3665,11 +3665,18 @@ def get_testcases(cls, include_onlymatching=False):
              t['name'] = cls.ie_key()
              yield t
  
+    @classmethod
+    def get_webpage_testcases(cls):
+        tests = getattr(cls, '_WEBPAGE_TESTS', [])
+        for t in tests:
+            t['name'] = cls.ie_key()
+        return tests
+
      @classproperty
      def age_limit(cls):
          """Get age limit from the testcases"""
          return max(traverse_obj(
-            tuple(cls.get_testcases(include_onlymatching=False)),
+            (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
              (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
  
      @classmethod
@@ -3844,7 +3851,10 @@ def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_l
      def extract_from_webpage(cls, ydl, url, webpage):
          ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
                else ydl.get_info_extractor(cls.ie_key()))
-        yield from ie._extract_from_webpage(url, webpage) or []
+        for info in ie._extract_from_webpage(url, webpage) or []:
+            # url = None since we do not want to set (webpage/original)_url
+            ydl.add_default_extra_info(info, ie, None)
+            yield info
  
      @classmethod
      def _extract_from_webpage(cls, url, webpage):
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py

index d6a6166a0a717be40195a5996a1849c587e10013..0dc9ae0da6c956c7cbe5c4bda6e64f545d707e88 100644 (file)
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -933,21 +933,6 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              }
          },
-        # YouTube <object> embed
-        {
-            'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
-            'md5': '516718101ec834f74318df76259fb3cc',
-            'info_dict': {
-                'id': 'msN87y-iEx0',
-                'ext': 'webm',
-                'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
-                'upload_date': '20080526',
-                'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
-                'uploader': 'Christopher Sykes',
-                'uploader_id': 'ChristopherJSykes',
-            },
-            'add_ie': ['Youtube'],
-        },
          # Camtasia studio
          {
              'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index fb23afbad69ac5d09f403fcc8bbe03afaca40abc..4dc8e79ac1ba22576cb46293052ba88dad835778 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2266,6 +2266,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          }
      ]
  
+    _WEBPAGE_TESTS = [
+        # YouTube <object> embed
+        {
+            'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+            'md5': '873c81d308b979f0e23ee7e620b312a3',
+            'info_dict': {
+                'id': 'msN87y-iEx0',
+                'ext': 'mp4',
+                'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+                'upload_date': '20080526',
+                'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
+                'uploader': 'Christopher Sykes',
+                'uploader_id': 'ChristopherJSykes',
+                'age_limit': 0,
+                'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
+                'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
+                'playable_in_embed': True,
+                'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
+                'like_count': int,
+                'comment_count': int,
+                'channel': 'Christopher Sykes',
+                'live_status': 'not_live',
+                'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
+                'availability': 'public',
+                'duration': 195,
+                'view_count': int,
+                'categories': ['Science & Technology'],
+                'channel_follower_count': int,
+                'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
+            },
+            'params': {
+                'skip_download': True,
+            }
+        },
+    ]
+
      @classmethod
      def suitable(cls, url):
          from ..utils import parse_qs
author	pukkandan <redacted>
	Fri, 8 Jul 2022 11:23:05 +0000 (16:53 +0530)
committer	pukkandan <redacted>
	Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
test/helper.py		patch \| blob \| blame \| history
test/test_download.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/generic.py		patch \| blob \| blame \| history
yt_dlp/extractor/youtube.py		patch \| blob \| blame \| history