[ie/matchtv] Fix extractor (#10190)

[yt-dlp.git] / yt_dlp / extractor / lecturio.py
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py

index 1b2dcef46621237fd7c7ce376165a6bc5c674606..4cfb8722418b79dbf89d9eebb4dc5bf95bef5b20 100644 (file)
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -1,13 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import re
  
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
      clean_html,
      determine_ext,
-    ExtractorError,
      float_or_none,
      int_or_none,
      str_or_none,
@@ -22,20 +19,13 @@ class LecturioBaseIE(InfoExtractor):
      _LOGIN_URL = 'https://app.lecturio.com/en/login'
      _NETRC_MACHINE = 'lecturio'
  
-    def _real_initialize(self):
-        self._login()
-
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
-
+    def _perform_login(self, username, password):
          # Sets some cookies
          _, urlh = self._download_webpage_handle(
              self._LOGIN_URL, None, 'Downloading login popup')
  
          def is_logged(url_handle):
-            return self._LOGIN_URL not in url_handle.geturl()
+            return self._LOGIN_URL not in url_handle.url
  
          # Already logged in
          if is_logged(urlh):
@@ -59,7 +49,7 @@ def is_logged(url_handle):
              r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
              'errors', default=None)
          if errors:
-            raise ExtractorError('Unable to login: %s' % errors, expected=True)
+            raise ExtractorError(f'Unable to login: {errors}', expected=True)
          raise ExtractorError('Unable to log in')
  
  
@@ -67,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
      _VALID_URL = r'''(?x)
                      https://
                          (?:
-                            app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
-                            (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+                            app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+                            (?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
                          )
                      '''
      _TESTS = [{
@@ -83,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
      }, {
          'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
          'only_matching': True,
+    }, {
+        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
+        'only_matching': True,
      }, {
          'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
          'only_matching': True,
@@ -103,7 +96,7 @@ class LecturioIE(LecturioBaseIE):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
          nt = mobj.group('nt') or mobj.group('nt_de')
          lecture_id = mobj.group('id')
          display_id = nt or lecture_id
@@ -137,7 +130,7 @@ def _real_extract(self, url):
              f = {
                  'url': file_url,
                  'format_id': label,
-                'filesize': float_or_none(filesize, invscale=1000)
+                'filesize': float_or_none(filesize, invscale=1000),
              }
              if label:
                  mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
@@ -147,7 +140,6 @@ def _real_extract(self, url):
                          'height': int(mobj.group(1)),
                      })
              formats.append(f)
-        self._sort_formats(formats)
  
          subtitles = {}
          automatic_captions = {}
@@ -180,7 +172,7 @@ def _real_extract(self, url):
  
  
  class LecturioCourseIE(LecturioBaseIE):
-    _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
+    _VALID_URL = r'https?://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
      _TESTS = [{
          'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
          'info_dict': {
@@ -196,7 +188,7 @@ class LecturioCourseIE(LecturioBaseIE):
      }]
  
      def _real_extract(self, url):
-        nt, course_id = re.match(self._VALID_URL, url).groups()
+        nt, course_id = self._match_valid_url(url).groups()
          display_id = nt or course_id
          api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
          course = self._download_json(
@@ -208,7 +200,7 @@ def _real_extract(self, url):
              if lecture_url:
                  lecture_url = urljoin(url, lecture_url)
              else:
-                lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
+                lecture_url = f'https://app.lecturio.com/#/lecture/c/{course_id}/{lecture_id}'
              entries.append(self.url_result(
                  lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
          return self.playlist_result(
@@ -217,7 +209,7 @@ def _real_extract(self, url):
  
  
  class LecturioDeCourseIE(LecturioBaseIE):
-    _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
+    _VALID_URL = r'https?://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
      _TEST = {
          'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
          'only_matching': True,