]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/udemy.py
[ie/cbc.ca:player] Support new URL format (#9561)
[yt-dlp.git] / yt_dlp / extractor / udemy.py
index 1dc2dbdc46d8143e02f79be2ba9ff71918637d51..5c296051af634b538d89ce286d67b4283126fdbc 100644 (file)
@@ -1,8 +1,9 @@
 import re
-import urllib.request
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str, compat_urlparse
+from ..compat import compat_str, compat_urlparse
+from ..networking import Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     determine_ext,
     float_or_none,
     int_or_none,
     js_to_json,
-    sanitized_Request,
+    smuggle_url,
     try_get,
     unescapeHTML,
+    unsmuggle_url,
     url_or_none,
     urlencode_postdata,
 )
@@ -106,7 +108,7 @@ def _download_lecture(self, course_id, lecture_id):
             % (course_id, lecture_id),
             lecture_id, 'Downloading lecture JSON', query={
                 'fields[lecture]': 'title,description,view_html,asset',
-                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
+                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed',
             })
 
     def _handle_error(self, response):
@@ -151,11 +153,10 @@ def _download_json(self, url_or_request, *args, **kwargs):
                 headers['X-Udemy-Bearer-Token'] = cookie.value
                 headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
 
-        if isinstance(url_or_request, urllib.request.Request):
-            for header, value in headers.items():
-                url_or_request.add_header(header, value)
+        if isinstance(url_or_request, Request):
+            url_or_request.headers.update(headers)
         else:
-            url_or_request = sanitized_Request(url_or_request, headers=headers)
+            url_or_request = Request(url_or_request, headers=headers)
 
         response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
         self._handle_error(response)
@@ -199,16 +200,19 @@ def is_logged(webpage):
 
     def _real_extract(self, url):
         lecture_id = self._match_id(url)
+        course_id = unsmuggle_url(url, {})[1].get('course_id')
 
-        webpage = self._download_webpage(url, lecture_id)
-
-        course_id, _ = self._extract_course_info(webpage, lecture_id)
+        webpage = None
+        if not course_id:
+            webpage = self._download_webpage(url, lecture_id)
+            course_id, _ = self._extract_course_info(webpage, lecture_id)
 
         try:
             lecture = self._download_lecture(course_id, lecture_id)
         except ExtractorError as e:
             # Error could possibly mean we are not enrolled in the course
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                webpage = webpage or self._download_webpage(url, lecture_id)
                 self._enroll_course(url, webpage, course_id)
                 lecture = self._download_lecture(course_id, lecture_id)
             else:
@@ -391,7 +395,8 @@ def extract_subtitles(track_list):
                 if f.get('url'):
                     formats.append(f)
 
-        self._sort_formats(formats)
+        if not formats and asset.get('course_is_drmed'):
+            self.report_drm(video_id)
 
         return {
             'id': video_id,
@@ -405,7 +410,7 @@ def extract_subtitles(track_list):
         }
 
 
-class UdemyCourseIE(UdemyIE):
+class UdemyCourseIE(UdemyIE):  # XXX: Do not subclass from concrete IE
     IE_NAME = 'udemy:course'
     _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
     _TESTS = [{
@@ -451,7 +456,9 @@ def _real_extract(self, url):
                 if lecture_id:
                     entry = {
                         '_type': 'url_transparent',
-                        'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
+                        'url': smuggle_url(
+                            f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}',
+                            {'course_id': course_id}),
                         'title': entry.get('title'),
                         'ie_key': UdemyIE.ie_key(),
                     }