]>
Commit | Line | Data |
---|---|---|
e5de3f6c S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_urllib_parse, | |
8 | compat_urllib_request, | |
9 | ExtractorError, | |
10 | ) | |
11 | ||
12 | ||
13 | class UdemyIE(InfoExtractor): | |
14 | IE_NAME = 'udemy' | |
15 | _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' | |
16 | _LOGIN_URL = 'https://www.udemy.com/join/login-submit/' | |
17 | _NETRC_MACHINE = 'udemy' | |
18 | ||
19 | _TEST = { | |
20 | 'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757', | |
21 | 'md5': '98eda5b657e752cf945d8445e261b5c5', | |
22 | 'info_dict': { | |
23 | 'id': '160614', | |
24 | 'ext': 'mp4', | |
25 | 'title': 'Introduction and Installation', | |
26 | 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', | |
27 | 'duration': 579.29, | |
28 | }, | |
29 | 'skip': 'Requires udemy account credentials', | |
30 | } | |
31 | ||
32 | def _handle_error(self, response): | |
33 | if not isinstance(response, dict): | |
34 | return | |
35 | error = response.get('error') | |
36 | if error: | |
37 | error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message')) | |
38 | error_data = error.get('data') | |
39 | if error_data: | |
40 | error_str += ' - %s' % error_data.get('formErrors') | |
41 | raise ExtractorError(error_str, expected=True) | |
42 | ||
43 | def _download_json(self, url, video_id, note='Downloading JSON metadata'): | |
44 | response = super(UdemyIE, self)._download_json(url, video_id, note) | |
45 | self._handle_error(response) | |
46 | return response | |
47 | ||
48 | def _real_initialize(self): | |
49 | self._login() | |
50 | ||
51 | def _login(self): | |
52 | (username, password) = self._get_login_info() | |
53 | if username is None: | |
54 | raise ExtractorError( | |
55 | 'Udemy account is required, use --username and --password options to provide account credentials.', | |
56 | expected=True) | |
57 | ||
58 | login_popup = self._download_webpage( | |
59 | 'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None, | |
60 | 'Downloading login popup') | |
61 | ||
62 | if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': | |
63 | return | |
64 | ||
65 | csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token') | |
66 | ||
67 | login_form = { | |
68 | 'email': username, | |
69 | 'password': password, | |
70 | 'csrf': csrf, | |
71 | 'displayType': 'json', | |
72 | 'isSubmitted': '1', | |
73 | } | |
74 | request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | |
75 | response = self._download_json(request, None, 'Logging in as %s' % username) | |
76 | ||
77 | if 'returnUrl' not in response: | |
78 | raise ExtractorError('Unable to log in') | |
79 | ||
80 | def _real_extract(self, url): | |
81 | mobj = re.match(self._VALID_URL, url) | |
82 | lecture_id = mobj.group('id') | |
83 | ||
84 | lecture = self._download_json( | |
85 | 'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON') | |
86 | ||
87 | if lecture['assetType'] != 'Video': | |
88 | raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True) | |
89 | ||
90 | asset = lecture['asset'] | |
91 | ||
92 | stream_url = asset['streamUrl'] | |
93 | mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url) | |
94 | if mobj: | |
95 | return self.url_result(mobj.group(1), 'Youtube') | |
96 | ||
97 | video_id = asset['id'] | |
98 | thumbnail = asset['thumbnailUrl'] | |
99 | duration = asset['data']['duration'] | |
100 | ||
101 | download_url = asset['downloadUrl'] | |
102 | ||
103 | formats = [ | |
104 | { | |
105 | 'url': download_url['Video480p'][0], | |
106 | 'format_id': '360p', | |
107 | }, | |
108 | { | |
109 | 'url': download_url['Video'][0], | |
110 | 'format_id': '720p', | |
111 | }, | |
112 | ] | |
113 | ||
114 | title = lecture['title'] | |
115 | description = lecture['description'] | |
116 | ||
117 | return { | |
118 | 'id': video_id, | |
119 | 'title': title, | |
120 | 'description': description, | |
121 | 'thumbnail': thumbnail, | |
122 | 'duration': duration, | |
123 | 'formats': formats | |
124 | } | |
125 | ||
126 | ||
127 | class UdemyCourseIE(UdemyIE): | |
128 | IE_NAME = 'udemy:course' | |
129 | _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)' | |
130 | _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<' | |
131 | _ALREADY_ENROLLED = '>You are already taking this course.<' | |
132 | ||
133 | @classmethod | |
134 | def suitable(cls, url): | |
135 | return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url) | |
136 | ||
137 | def _real_extract(self, url): | |
138 | mobj = re.match(self._VALID_URL, url) | |
139 | course_path = mobj.group('coursepath') | |
140 | ||
141 | response = self._download_json( | |
142 | 'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON') | |
143 | ||
144 | course_id = int(response['id']) | |
145 | course_title = response['title'] | |
146 | ||
147 | webpage = self._download_webpage( | |
148 | 'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course') | |
149 | ||
150 | if self._SUCCESSFULLY_ENROLLED in webpage: | |
151 | self.to_screen('%s: Successfully enrolled in' % course_id) | |
152 | elif self._ALREADY_ENROLLED in webpage: | |
153 | self.to_screen('%s: Already enrolled in' % course_id) | |
154 | ||
155 | response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, | |
156 | course_id, 'Downloading course curriculum') | |
157 | ||
158 | entries = [ | |
159 | self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') | |
160 | for asset in response if asset.get('assetType') == 'Video' | |
161 | ] | |
162 | ||
163 | return self.playlist_result(entries, course_id, course_title) |