]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/teachable.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / teachable.py
index e480d7610145f912f48cb1cb260825257fd2837b..0d39be6d5f7304f69c6d93dacf89f432a525fd96 100644 (file)
@@ -3,10 +3,10 @@
 from .common import InfoExtractor
 from .wistia import WistiaIE
 from ..utils import (
-    clean_html,
     ExtractorError,
-    int_or_none,
+    clean_html,
     get_element_by_class,
+    int_or_none,
     strip_or_none,
     urlencode_postdata,
     urljoin,
@@ -29,7 +29,7 @@ class TeachableBaseIE(InfoExtractor):
         'courses.workitdaily.com': 'workitdaily',
     }
 
-    _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
+    _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES))
 
     def _real_initialize(self):
         self._logged_in = False
@@ -43,8 +43,8 @@ def _login(self, site):
             return
 
         login_page, urlh = self._download_webpage_handle(
-            'https://%s/sign_in' % site, None,
-            'Downloading %s login page' % site)
+            f'https://{site}/sign_in', None,
+            f'Downloading {site} login page')
 
         def is_logged(webpage):
             return any(re.search(p, webpage) for p in (
@@ -56,7 +56,7 @@ def is_logged(webpage):
             self._logged_in = True
             return
 
-        login_url = urlh.geturl()
+        login_url = urlh.url
 
         login_form = self._hidden_inputs(login_page)
 
@@ -73,7 +73,7 @@ def is_logged(webpage):
             post_url = urljoin(login_url, post_url)
 
         response = self._download_webpage(
-            post_url, None, 'Logging in to %s' % site,
+            post_url, None, f'Logging in to {site}',
             data=urlencode_postdata(login_form),
             headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
@@ -82,8 +82,8 @@ def is_logged(webpage):
 
         if '>I accept the new Privacy Policy<' in response:
             raise ExtractorError(
-                'Unable to login: %s asks you to accept new Privacy Policy. '
-                'Go to https://%s/ and accept.' % (site, site), expected=True)
+                f'Unable to login: {site} asks you to accept new Privacy Policy. '
+                f'Go to https://{site}/ and accept.', expected=True)
 
         # Successful login
         if is_logged(response):
@@ -93,19 +93,20 @@ def is_logged(webpage):
         message = get_element_by_class('alert', response)
         if message is not None:
             raise ExtractorError(
-                'Unable to login: %s' % clean_html(message), expected=True)
+                f'Unable to login: {clean_html(message)}', expected=True)
 
         raise ExtractorError('Unable to log in')
 
 
 class TeachableIE(TeachableBaseIE):
+    _WORKING = False
     _VALID_URL = r'''(?x)
                     (?:
-                        %shttps?://(?P<site_t>[^/]+)|
-                        https?://(?:www\.)?(?P<site>%s)
+                        {}https?://(?P<site_t>[^/]+)|
+                        https?://(?:www\.)?(?P<site>{})
                     )
                     /courses/[^/]+/lectures/(?P<id>\d+)
-                    ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+                    '''.format(*TeachableBaseIE._VALID_URL_SUB_TUPLE)
 
     _TESTS = [{
         'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
@@ -140,12 +141,12 @@ def _is_teachable(webpage):
             r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
             webpage)
 
-    @staticmethod
-    def _extract_url(webpage, source_url):
-        if not TeachableIE._is_teachable(webpage):
-            return
-        if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
-            return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        if cls._is_teachable(webpage):
+            if re.match(r'https?://[^/]+/(?:courses|p)', url):
+                yield f'{cls._URL_PREFIX}{url}'
+                raise cls.StopExtraction
 
     def _real_extract(self, url):
         mobj = self._match_valid_url(url)
@@ -160,7 +161,7 @@ def _real_extract(self, url):
 
         webpage = self._download_webpage(url, video_id)
 
-        wistia_urls = WistiaIE._extract_urls(webpage)
+        wistia_urls = WistiaIE._extract_embed_urls(url, webpage)
         if not wistia_urls:
             if any(re.search(p, webpage) for p in (
                     r'class=["\']lecture-contents-locked',
@@ -177,7 +178,7 @@ def _real_extract(self, url):
         chapter = None
         chapter_number = None
         section_item = self._search_regex(
-            r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
+            rf'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']{video_id}[^>]+>.+?</li>)',
             webpage, 'section item', default=None, group='li')
         if section_item:
             chapter_number = int_or_none(self._search_regex(
@@ -210,11 +211,11 @@ def _real_extract(self, url):
 class TeachableCourseIE(TeachableBaseIE):
     _VALID_URL = r'''(?x)
                         (?:
-                            %shttps?://(?P<site_t>[^/]+)|
-                            https?://(?:www\.)?(?P<site>%s)
+                            {}https?://(?P<site_t>[^/]+)|
+                            https?://(?:www\.)?(?P<site>{})
                         )
                         /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
-                    ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+                    '''.format(*TeachableBaseIE._VALID_URL_SUB_TUPLE)
     _TESTS = [{
         'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
         'info_dict': {
@@ -241,8 +242,7 @@ class TeachableCourseIE(TeachableBaseIE):
 
     @classmethod
     def suitable(cls, url):
-        return False if TeachableIE.suitable(url) else super(
-            TeachableCourseIE, cls).suitable(url)
+        return False if TeachableIE.suitable(url) else super().suitable(url)
 
     def _real_extract(self, url):
         mobj = self._match_valid_url(url)
@@ -258,7 +258,7 @@ def _real_extract(self, url):
 
         webpage = self._download_webpage(url, course_id)
 
-        url_base = 'https://%s/' % site
+        url_base = f'https://{site}/'
 
         entries = []